mdb-engine 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. mdb_engine/__init__.py +104 -11
  2. mdb_engine/auth/ARCHITECTURE.md +112 -0
  3. mdb_engine/auth/README.md +648 -11
  4. mdb_engine/auth/__init__.py +136 -29
  5. mdb_engine/auth/audit.py +592 -0
  6. mdb_engine/auth/base.py +252 -0
  7. mdb_engine/auth/casbin_factory.py +264 -69
  8. mdb_engine/auth/config_helpers.py +7 -6
  9. mdb_engine/auth/cookie_utils.py +3 -7
  10. mdb_engine/auth/csrf.py +373 -0
  11. mdb_engine/auth/decorators.py +3 -10
  12. mdb_engine/auth/dependencies.py +47 -50
  13. mdb_engine/auth/helpers.py +3 -3
  14. mdb_engine/auth/integration.py +53 -80
  15. mdb_engine/auth/jwt.py +2 -6
  16. mdb_engine/auth/middleware.py +77 -34
  17. mdb_engine/auth/oso_factory.py +18 -38
  18. mdb_engine/auth/provider.py +270 -171
  19. mdb_engine/auth/rate_limiter.py +504 -0
  20. mdb_engine/auth/restrictions.py +8 -24
  21. mdb_engine/auth/session_manager.py +14 -29
  22. mdb_engine/auth/shared_middleware.py +600 -0
  23. mdb_engine/auth/shared_users.py +759 -0
  24. mdb_engine/auth/token_store.py +14 -28
  25. mdb_engine/auth/users.py +54 -113
  26. mdb_engine/auth/utils.py +213 -15
  27. mdb_engine/cli/commands/generate.py +545 -9
  28. mdb_engine/cli/commands/validate.py +3 -7
  29. mdb_engine/cli/utils.py +3 -3
  30. mdb_engine/config.py +7 -21
  31. mdb_engine/constants.py +65 -0
  32. mdb_engine/core/README.md +117 -6
  33. mdb_engine/core/__init__.py +39 -7
  34. mdb_engine/core/app_registration.py +22 -41
  35. mdb_engine/core/app_secrets.py +290 -0
  36. mdb_engine/core/connection.py +18 -9
  37. mdb_engine/core/encryption.py +223 -0
  38. mdb_engine/core/engine.py +1057 -93
  39. mdb_engine/core/index_management.py +12 -16
  40. mdb_engine/core/manifest.py +459 -150
  41. mdb_engine/core/ray_integration.py +435 -0
  42. mdb_engine/core/seeding.py +10 -18
  43. mdb_engine/core/service_initialization.py +12 -23
  44. mdb_engine/core/types.py +2 -5
  45. mdb_engine/database/README.md +140 -17
  46. mdb_engine/database/__init__.py +17 -6
  47. mdb_engine/database/abstraction.py +25 -37
  48. mdb_engine/database/connection.py +11 -18
  49. mdb_engine/database/query_validator.py +367 -0
  50. mdb_engine/database/resource_limiter.py +204 -0
  51. mdb_engine/database/scoped_wrapper.py +713 -196
  52. mdb_engine/dependencies.py +426 -0
  53. mdb_engine/di/__init__.py +34 -0
  54. mdb_engine/di/container.py +248 -0
  55. mdb_engine/di/providers.py +205 -0
  56. mdb_engine/di/scopes.py +139 -0
  57. mdb_engine/embeddings/README.md +54 -24
  58. mdb_engine/embeddings/__init__.py +31 -24
  59. mdb_engine/embeddings/dependencies.py +37 -154
  60. mdb_engine/embeddings/service.py +11 -25
  61. mdb_engine/exceptions.py +92 -0
  62. mdb_engine/indexes/README.md +30 -13
  63. mdb_engine/indexes/__init__.py +1 -0
  64. mdb_engine/indexes/helpers.py +1 -1
  65. mdb_engine/indexes/manager.py +50 -114
  66. mdb_engine/memory/README.md +2 -2
  67. mdb_engine/memory/__init__.py +1 -2
  68. mdb_engine/memory/service.py +30 -87
  69. mdb_engine/observability/README.md +4 -2
  70. mdb_engine/observability/__init__.py +26 -9
  71. mdb_engine/observability/health.py +8 -9
  72. mdb_engine/observability/metrics.py +32 -12
  73. mdb_engine/repositories/__init__.py +34 -0
  74. mdb_engine/repositories/base.py +325 -0
  75. mdb_engine/repositories/mongo.py +233 -0
  76. mdb_engine/repositories/unit_of_work.py +166 -0
  77. mdb_engine/routing/README.md +1 -1
  78. mdb_engine/routing/__init__.py +1 -3
  79. mdb_engine/routing/websockets.py +25 -60
  80. mdb_engine-0.2.0.dist-info/METADATA +313 -0
  81. mdb_engine-0.2.0.dist-info/RECORD +96 -0
  82. mdb_engine-0.1.6.dist-info/METADATA +0 -213
  83. mdb_engine-0.1.6.dist-info/RECORD +0 -75
  84. {mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/WHEEL +0 -0
  85. {mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/entry_points.txt +0 -0
  86. {mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/licenses/LICENSE +0 -0
  87. {mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/top_level.txt +0 -0
@@ -26,27 +26,65 @@ a familiar (Motor-like) developer experience with automatic index optimization.
26
26
 
27
27
  import asyncio
28
28
  import logging
29
+ import re
29
30
  import time
30
- from typing import (Any, ClassVar, Coroutine, Dict, List, Mapping, Optional,
31
- Tuple, Union)
32
-
33
- from motor.motor_asyncio import (AsyncIOMotorCollection, AsyncIOMotorCursor,
34
- AsyncIOMotorDatabase)
31
+ from typing import (
32
+ TYPE_CHECKING,
33
+ Any,
34
+ ClassVar,
35
+ Coroutine,
36
+ Dict,
37
+ List,
38
+ Mapping,
39
+ Optional,
40
+ Tuple,
41
+ Union,
42
+ )
43
+
44
+ if TYPE_CHECKING:
45
+ from ..core.app_secrets import AppSecretsManager
46
+
47
+ from motor.motor_asyncio import (
48
+ AsyncIOMotorCollection,
49
+ AsyncIOMotorCursor,
50
+ AsyncIOMotorDatabase,
51
+ )
35
52
  from pymongo import ASCENDING, DESCENDING, TEXT
36
- from pymongo.errors import (AutoReconnect, CollectionInvalid,
37
- ConnectionFailure, InvalidOperation,
38
- OperationFailure, ServerSelectionTimeoutError)
53
+ from pymongo.errors import (
54
+ AutoReconnect,
55
+ CollectionInvalid,
56
+ ConnectionFailure,
57
+ InvalidOperation,
58
+ OperationFailure,
59
+ PyMongoError,
60
+ ServerSelectionTimeoutError,
61
+ )
39
62
  from pymongo.operations import SearchIndexModel
40
- from pymongo.results import (DeleteResult, InsertManyResult, InsertOneResult,
41
- UpdateResult)
63
+ from pymongo.results import (
64
+ DeleteResult,
65
+ InsertManyResult,
66
+ InsertOneResult,
67
+ UpdateResult,
68
+ )
42
69
 
43
70
  # Import constants
44
- from ..constants import (AUTO_INDEX_HINT_THRESHOLD, DEFAULT_DROP_TIMEOUT,
45
- DEFAULT_POLL_INTERVAL, DEFAULT_SEARCH_TIMEOUT,
46
- MAX_INDEX_FIELDS)
71
+ from ..constants import (
72
+ AUTO_INDEX_HINT_THRESHOLD,
73
+ DEFAULT_DROP_TIMEOUT,
74
+ DEFAULT_POLL_INTERVAL,
75
+ DEFAULT_SEARCH_TIMEOUT,
76
+ MAX_COLLECTION_NAME_LENGTH,
77
+ MAX_INDEX_FIELDS,
78
+ MIN_COLLECTION_NAME_LENGTH,
79
+ RESERVED_COLLECTION_NAMES,
80
+ RESERVED_COLLECTION_PREFIXES,
81
+ )
47
82
  from ..exceptions import MongoDBEngineError
83
+
48
84
  # Import observability
49
85
  from ..observability import record_operation
86
+ from .query_validator import QueryValidator
87
+ from .resource_limiter import ResourceLimiter
50
88
 
51
89
  # --- FIX: Configure logger *before* first use ---
52
90
  logger = logging.getLogger(__name__)
@@ -60,9 +98,7 @@ GEO2DSPHERE = "2dsphere"
60
98
 
61
99
 
62
100
  # --- HELPER FUNCTION FOR MANAGED TASK CREATION ---
63
- def _create_managed_task(
64
- coro: Coroutine[Any, Any, Any], task_name: Optional[str] = None
65
- ) -> None:
101
+ def _create_managed_task(coro: Coroutine[Any, Any, Any], task_name: Optional[str] = None) -> None:
66
102
  """
67
103
  Creates a background task using asyncio.create_task().
68
104
 
@@ -86,6 +122,149 @@ def _create_managed_task(
86
122
  # --- END HELPER FUNCTION ---
87
123
 
88
124
 
125
+ # ##########################################################################
126
+ # SECURITY VALIDATION FUNCTIONS
127
+ # ##########################################################################
128
+
129
+ # Collection name pattern: alphanumeric, underscore, dot, hyphen
130
+ # Must start with alphanumeric or underscore
131
+ # MongoDB allows: [a-zA-Z0-9_.-] but cannot start with number or special char
132
+ COLLECTION_NAME_PATTERN: re.Pattern = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_.-]*$")
133
+ """Regex pattern for valid MongoDB collection names."""
134
+
135
+
136
+ def _validate_collection_name(name: str, allow_prefixed: bool = False) -> None:
137
+ """
138
+ Validate collection name for security.
139
+
140
+ Validates that collection names:
141
+ - Meet MongoDB naming requirements
142
+ - Are not reserved system names
143
+ - Do not use reserved prefixes
144
+ - Are within length limits
145
+
146
+ Args:
147
+ name: Collection name to validate
148
+ allow_prefixed: If True, allows prefixed names (e.g., "app_collection")
149
+ for cross-app access validation
150
+
151
+ Raises:
152
+ ValueError: If collection name is invalid, reserved, or uses reserved prefix
153
+ """
154
+ if not name:
155
+ raise ValueError("Collection name cannot be empty")
156
+
157
+ # Check length
158
+ if len(name) < MIN_COLLECTION_NAME_LENGTH:
159
+ raise ValueError(
160
+ f"Collection name too short (minimum {MIN_COLLECTION_NAME_LENGTH} character): {name}"
161
+ )
162
+ if len(name) > MAX_COLLECTION_NAME_LENGTH:
163
+ raise ValueError(
164
+ f"Collection name too long (maximum {MAX_COLLECTION_NAME_LENGTH} characters): {name}"
165
+ )
166
+
167
+ # Check pattern (MongoDB naming rules)
168
+ if not COLLECTION_NAME_PATTERN.match(name):
169
+ raise ValueError(
170
+ f"Invalid collection name format: '{name}'. "
171
+ "Collection names must start with a letter or underscore and "
172
+ "contain only alphanumeric characters, underscores, dots, or hyphens."
173
+ )
174
+
175
+ # MongoDB doesn't allow collection names to end with a dot
176
+ if name.endswith("."):
177
+ raise ValueError(
178
+ f"Invalid collection name format: '{name}'. " "Collection names cannot end with a dot."
179
+ )
180
+
181
+ # Check for path traversal attempts
182
+ if ".." in name or "/" in name or "\\" in name:
183
+ raise ValueError(
184
+ f"Invalid collection name format: '{name}'. "
185
+ f"Collection names must start with a letter or underscore and contain "
186
+ f"only alphanumeric characters, underscores, dots, or hyphens."
187
+ )
188
+
189
+ # Check reserved names (exact match)
190
+ if name in RESERVED_COLLECTION_NAMES:
191
+ logger.warning(f"Security: Attempted access to reserved collection name: {name}")
192
+ raise ValueError(
193
+ f"Collection name '{name}' is reserved and cannot be accessed through scoped database."
194
+ )
195
+
196
+ # Check reserved prefixes
197
+ name_lower = name.lower()
198
+ for prefix in RESERVED_COLLECTION_PREFIXES:
199
+ if name_lower.startswith(prefix):
200
+ logger.warning(
201
+ f"Security: Attempted access to collection with reserved prefix '{prefix}': {name}"
202
+ )
203
+ raise ValueError(
204
+ f"Collection name '{name}' uses reserved prefix '{prefix}' and cannot be accessed."
205
+ )
206
+
207
+
208
+ def _extract_app_slug_from_prefixed_name(prefixed_name: str) -> Optional[str]:
209
+ """
210
+ Extract app slug from a prefixed collection name.
211
+
212
+ Args:
213
+ prefixed_name: Collection name that may be prefixed (e.g., "app_slug_collection")
214
+
215
+ Returns:
216
+ App slug if name is prefixed, None otherwise
217
+ """
218
+ if "_" not in prefixed_name:
219
+ return None
220
+
221
+ # Split on first underscore
222
+ parts = prefixed_name.split("_", 1)
223
+ if len(parts) != 2:
224
+ return None
225
+
226
+ app_slug = parts[0]
227
+ # Basic validation - app slug should be non-empty
228
+ if app_slug:
229
+ return app_slug
230
+ return None
231
+
232
+
233
+ class _SecureCollectionProxy:
234
+ """
235
+ Proxy wrapper that blocks access to dangerous attributes on collections.
236
+
237
+ Prevents access to database/client attributes that could be used to bypass scoping.
238
+ """
239
+
240
+ __slots__ = ("_collection",)
241
+
242
+ def __init__(self, collection: AsyncIOMotorCollection):
243
+ self._collection = collection
244
+
245
+ def __getattr__(self, name: str) -> Any:
246
+ """Block access to database/client attributes."""
247
+ if name in ("database", "client", "db"):
248
+ logger.warning(
249
+ f"Security: Attempted access to '{name}' attribute on collection. "
250
+ "This is blocked to prevent bypassing scoping."
251
+ )
252
+ raise AttributeError(
253
+ f"Access to '{name}' is blocked for security. "
254
+ "Use collection.index_manager for index operations. "
255
+ "All data access must go through scoped collections."
256
+ )
257
+ return getattr(self._collection, name)
258
+
259
+ def __setattr__(self, name: str, value: Any) -> None:
260
+ """Allow setting _collection, delegate other attributes to underlying collection."""
261
+ if name == "_collection":
262
+ super().__setattr__(name, value)
263
+ else:
264
+ # Delegate to underlying collection for other attributes
265
+ setattr(self._collection, name, value)
266
+
267
+
89
268
  # ##########################################################################
90
269
  # ASYNCHRONOUS ATLAS INDEX MANAGER
91
270
  # ##########################################################################
@@ -115,10 +294,11 @@ class AsyncAtlasIndexManager:
115
294
  Initializes the manager with a direct reference to a
116
295
  motor.motor_asyncio.AsyncIOMotorCollection.
117
296
  """
297
+ # Unwrap _SecureCollectionProxy if present to get the real collection
298
+ if isinstance(real_collection, _SecureCollectionProxy):
299
+ real_collection = real_collection._collection
118
300
  if not isinstance(real_collection, AsyncIOMotorCollection):
119
- raise TypeError(
120
- f"Expected AsyncIOMotorCollection, got {type(real_collection)}"
121
- )
301
+ raise TypeError(f"Expected AsyncIOMotorCollection, got {type(real_collection)}")
122
302
  self._collection = real_collection
123
303
 
124
304
  async def _ensure_collection_exists(self) -> None:
@@ -134,9 +314,7 @@ class AsyncAtlasIndexManager:
134
314
  f"Continuing index creation."
135
315
  )
136
316
  else:
137
- logger.exception(
138
- "Failed to ensure collection exists - CollectionInvalid error"
139
- )
317
+ logger.exception("Failed to ensure collection exists - CollectionInvalid error")
140
318
  raise MongoDBEngineError(
141
319
  f"Failed to create prerequisite collection '{self._collection.name}'",
142
320
  context={"collection_name": self._collection.name},
@@ -208,9 +386,7 @@ class AsyncAtlasIndexManager:
208
386
  )
209
387
  return False # Will wait below
210
388
  elif existing_index.get("queryable"):
211
- logger.info(
212
- f"Search index '{name}' is already queryable and definition is up-to-date."
213
- )
389
+ logger.info(f"Search index '{name}' is already queryable and definition is up-to-date.")
214
390
  return True
215
391
  elif existing_index.get("status") == "FAILED":
216
392
  logger.error(
@@ -231,22 +407,17 @@ class AsyncAtlasIndexManager:
231
407
  """Create a new search index."""
232
408
  try:
233
409
  logger.info(f"Creating new search index '{name}' of type '{index_type}'...")
234
- search_index_model = SearchIndexModel(
235
- definition=definition, name=name, type=index_type
236
- )
410
+ search_index_model = SearchIndexModel(definition=definition, name=name, type=index_type)
237
411
  await self._collection.create_search_index(model=search_index_model)
238
412
  logger.info(f"Search index '{name}' build has been submitted.")
239
413
  except OperationFailure as e:
240
414
  if "IndexAlreadyExists" in str(e) or "DuplicateIndexName" in str(e):
241
- logger.warning(
242
- f"Race condition: Index '{name}' was created by another process."
243
- )
415
+ logger.warning(f"Race condition: Index '{name}' was created by another process.")
244
416
  else:
245
- logger.error(
246
- f"OperationFailure during search index creation "
247
- f"for '{name}': {e.details}"
417
+ logger.exception(
418
+ f"OperationFailure during search index creation " f"for '{name}': {e.details}"
248
419
  )
249
- raise e
420
+ raise
250
421
 
251
422
  async def create_search_index(
252
423
  self,
@@ -283,17 +454,13 @@ class AsyncAtlasIndexManager:
283
454
  return True
284
455
 
285
456
  except OperationFailure as e:
286
- logger.exception(
287
- f"OperationFailure during search index creation/check for '{name}'"
288
- )
457
+ logger.exception(f"OperationFailure during search index creation/check for '{name}'")
289
458
  raise MongoDBEngineError(
290
459
  f"Failed to create/check search index '{name}'",
291
460
  context={"index_name": name, "operation": "create_search_index"},
292
461
  ) from e
293
462
  except (ConnectionFailure, ServerSelectionTimeoutError) as e:
294
- logger.exception(
295
- f"Connection error during search index creation/check for '{name}'"
296
- )
463
+ logger.exception(f"Connection error during search index creation/check for '{name}'")
297
464
  raise MongoDBEngineError(
298
465
  f"Connection failed while creating/checking search index '{name}'",
299
466
  context={"index_name": name, "operation": "create_search_index"},
@@ -362,9 +529,7 @@ class AsyncAtlasIndexManager:
362
529
  except OperationFailure as e:
363
530
  # Handle race condition where index was already dropped
364
531
  if "IndexNotFound" in str(e):
365
- logger.info(
366
- f"Search index '{name}' was already deleted (race condition)."
367
- )
532
+ logger.info(f"Search index '{name}' was already deleted (race condition).")
368
533
  return True
369
534
  logger.exception(f"OperationFailure dropping search index '{name}'")
370
535
  raise MongoDBEngineError(
@@ -427,19 +592,13 @@ class AsyncAtlasIndexManager:
427
592
  queryable or fails.
428
593
  """
429
594
  start_time = time.time()
430
- logger.info(
431
- f"Waiting up to {timeout}s for search index '{name}' to become queryable..."
432
- )
595
+ logger.info(f"Waiting up to {timeout}s for search index '{name}' to become queryable...")
433
596
 
434
597
  while True:
435
598
  elapsed = time.time() - start_time
436
599
  if elapsed > timeout:
437
- logger.error(
438
- f"Timeout: Index '{name}' did not become queryable within {timeout}s."
439
- )
440
- raise TimeoutError(
441
- f"Index '{name}' did not become queryable within {timeout}s."
442
- )
600
+ logger.error(f"Timeout: Index '{name}' did not become queryable within {timeout}s.")
601
+ raise TimeoutError(f"Index '{name}' did not become queryable within {timeout}s.")
443
602
 
444
603
  index_info = None
445
604
  try:
@@ -471,9 +630,7 @@ class AsyncAtlasIndexManager:
471
630
  queryable = index_info.get("queryable")
472
631
  if queryable:
473
632
  # Success!
474
- logger.info(
475
- f"Search index '{name}' is queryable (Status: {status})."
476
- )
633
+ logger.info(f"Search index '{name}' is queryable (Status: {status}).")
477
634
  return True
478
635
 
479
636
  # Not ready yet, log and wait
@@ -495,14 +652,10 @@ class AsyncAtlasIndexManager:
495
652
  Private helper to poll until an index is successfully dropped.
496
653
  """
497
654
  start_time = time.time()
498
- logger.info(
499
- f"Waiting up to {timeout}s for search index '{name}' to be dropped..."
500
- )
655
+ logger.info(f"Waiting up to {timeout}s for search index '{name}' to be dropped...")
501
656
  while True:
502
657
  if time.time() - start_time > timeout:
503
- logger.error(
504
- f"Timeout: Index '{name}' was not dropped within {timeout}s."
505
- )
658
+ logger.error(f"Timeout: Index '{name}' was not dropped within {timeout}s.")
506
659
  raise TimeoutError(f"Index '{name}' was not dropped within {timeout}s.")
507
660
 
508
661
  index_info = await self.get_search_index(name)
@@ -588,9 +741,7 @@ class AsyncAtlasIndexManager:
588
741
  # Wait for index to be ready (MongoDB indexes are usually immediate, but we verify)
589
742
  if wait_for_ready:
590
743
  try:
591
- is_ready = await self._wait_for_regular_index_ready(
592
- name, timeout=30
593
- )
744
+ is_ready = await self._wait_for_regular_index_ready(name, timeout=30)
594
745
  if not is_ready:
595
746
  logger.warning(
596
747
  f"Regular index '{name}' may not be fully ready yet, "
@@ -606,11 +757,7 @@ class AsyncAtlasIndexManager:
606
757
  return name
607
758
  except OperationFailure as e:
608
759
  # Handle index build aborted (e.g., database being dropped during teardown)
609
- if (
610
- e.code == 276
611
- or "IndexBuildAborted" in str(e)
612
- or "dropDatabase" in str(e)
613
- ):
760
+ if e.code == 276 or "IndexBuildAborted" in str(e) or "dropDatabase" in str(e):
614
761
  logger.debug(
615
762
  f"Skipping regular index creation '{index_name}': "
616
763
  f"index build aborted (likely during database drop/teardown): {e}"
@@ -650,9 +797,7 @@ class AsyncAtlasIndexManager:
650
797
  kwargs["name"] = name
651
798
  return await self.create_index(keys, **kwargs)
652
799
 
653
- async def create_geo_index(
654
- self, field: str, name: Optional[str] = None, **kwargs: Any
655
- ) -> str:
800
+ async def create_geo_index(self, field: str, name: Optional[str] = None, **kwargs: Any) -> str:
656
801
  """Helper to create a standard 2dsphere index."""
657
802
  keys = [(field, GEO2DSPHERE)]
658
803
  if name:
@@ -681,9 +826,7 @@ class AsyncAtlasIndexManager:
681
826
  context={"index_name": name, "operation": "drop_index"},
682
827
  ) from e
683
828
  except InvalidOperation as e:
684
- logger.debug(
685
- f"Cannot drop regular index '{name}': MongoDB client is closed"
686
- )
829
+ logger.debug(f"Cannot drop regular index '{name}': MongoDB client is closed")
687
830
  raise MongoDBEngineError(
688
831
  f"Cannot drop regular index '{name}': MongoDB client is closed",
689
832
  context={"index_name": name, "operation": "drop_index"},
@@ -698,9 +841,7 @@ class AsyncAtlasIndexManager:
698
841
  return []
699
842
  except InvalidOperation:
700
843
  # Client is closed (e.g., during shutdown/teardown)
701
- logger.debug(
702
- "Skipping list_indexes: MongoDB client is closed (likely during shutdown)"
703
- )
844
+ logger.debug("Skipping list_indexes: MongoDB client is closed (likely during shutdown)")
704
845
  return []
705
846
 
706
847
  async def get_index(self, name: str) -> Optional[Dict[str, Any]]:
@@ -774,9 +915,7 @@ class AutoIndexManager:
774
915
  "_pending_tasks",
775
916
  )
776
917
 
777
- def __init__(
778
- self, collection: AsyncIOMotorCollection, index_manager: AsyncAtlasIndexManager
779
- ):
918
+ def __init__(self, collection: AsyncIOMotorCollection, index_manager: AsyncAtlasIndexManager):
780
919
  self._collection = collection
781
920
  self._index_manager = index_manager
782
921
  # Cache of index creation decisions (index_name -> bool)
@@ -812,8 +951,7 @@ class AutoIndexManager:
812
951
  if isinstance(value, dict):
813
952
  # Handle operators like $gt, $gte, $lt, $lte, $ne, $in, $exists
814
953
  if any(
815
- op in value
816
- for op in ["$gt", "$gte", "$lt", "$lte", "$ne", "$in", "$exists"]
954
+ op in value for op in ["$gt", "$gte", "$lt", "$lte", "$ne", "$in", "$exists"]
817
955
  ):
818
956
  # These operators benefit from indexes
819
957
  index_fields.append((field_name, ASCENDING))
@@ -888,9 +1026,7 @@ class AutoIndexManager:
888
1026
 
889
1027
  # Create the index
890
1028
  keys = all_fields
891
- await self._index_manager.create_index(
892
- keys, name=index_name, background=True
893
- )
1029
+ await self._index_manager.create_index(keys, name=index_name, background=True)
894
1030
  async with self._lock:
895
1031
  self._creation_cache[index_name] = True
896
1032
  logger.info(
@@ -986,9 +1122,7 @@ class AutoIndexManager:
986
1122
 
987
1123
  # Create task and track it
988
1124
  # Cleanup happens in _create_index_safely's finally block
989
- task = asyncio.create_task(
990
- self._create_index_safely(index_name, all_fields)
991
- )
1125
+ task = asyncio.create_task(self._create_index_safely(index_name, all_fields))
992
1126
  self._pending_tasks[index_name] = task
993
1127
 
994
1128
 
@@ -1028,6 +1162,9 @@ class ScopedCollectionWrapper:
1028
1162
  "_index_manager",
1029
1163
  "_auto_index_manager",
1030
1164
  "_auto_index_enabled",
1165
+ "_query_validator",
1166
+ "_resource_limiter",
1167
+ "_parent_wrapper",
1031
1168
  )
1032
1169
 
1033
1170
  def __init__(
@@ -1036,6 +1173,9 @@ class ScopedCollectionWrapper:
1036
1173
  read_scopes: List[str],
1037
1174
  write_scope: str,
1038
1175
  auto_index: bool = True,
1176
+ query_validator: Optional[QueryValidator] = None,
1177
+ resource_limiter: Optional[ResourceLimiter] = None,
1178
+ parent_wrapper: Optional["ScopedMongoWrapper"] = None,
1039
1179
  ):
1040
1180
  self._collection = real_collection
1041
1181
  self._read_scopes = read_scopes
@@ -1044,6 +1184,11 @@ class ScopedCollectionWrapper:
1044
1184
  # Lazily instantiated and cached
1045
1185
  self._index_manager: Optional[AsyncAtlasIndexManager] = None
1046
1186
  self._auto_index_manager: Optional[AutoIndexManager] = None
1187
+ # Query security and resource limits
1188
+ self._query_validator = query_validator or QueryValidator()
1189
+ self._resource_limiter = resource_limiter or ResourceLimiter()
1190
+ # Reference to parent wrapper for token verification
1191
+ self._parent_wrapper = parent_wrapper
1047
1192
 
1048
1193
  @property
1049
1194
  def index_manager(self) -> AsyncAtlasIndexManager:
@@ -1060,7 +1205,9 @@ class ScopedCollectionWrapper:
1060
1205
  # Create and cache it.
1061
1206
  # Pass the *real* collection, not 'self', as indexes
1062
1207
  # are not scoped by app_id.
1063
- self._index_manager = AsyncAtlasIndexManager(self._collection)
1208
+ # Access the real collection directly, bypassing the proxy
1209
+ real_collection = super().__getattribute__("_collection")
1210
+ self._index_manager = AsyncAtlasIndexManager(real_collection)
1064
1211
  return self._index_manager
1065
1212
 
1066
1213
  @property
@@ -1075,15 +1222,52 @@ class ScopedCollectionWrapper:
1075
1222
 
1076
1223
  if self._auto_index_manager is None:
1077
1224
  # Lazily instantiate auto-index manager
1225
+ # Access the real collection directly, bypassing the proxy
1226
+ real_collection = super().__getattribute__("_collection")
1078
1227
  self._auto_index_manager = AutoIndexManager(
1079
- self._collection,
1228
+ real_collection,
1080
1229
  self.index_manager, # This will create index_manager if needed
1081
1230
  )
1082
1231
  return self._auto_index_manager
1083
1232
 
1084
- def _inject_read_filter(
1085
- self, filter: Optional[Mapping[str, Any]] = None
1086
- ) -> Dict[str, Any]:
1233
+ def __getattribute__(self, name: str) -> Any:
1234
+ """
1235
+ Override to prevent access to dangerous attributes on _collection.
1236
+
1237
+ Blocks access to _collection.database and _collection.client to prevent
1238
+ bypassing scoping.
1239
+ """
1240
+ # Allow access to our own attributes
1241
+ if name.startswith("_") and name not in (
1242
+ "_collection",
1243
+ "_read_scopes",
1244
+ "_write_scope",
1245
+ "_index_manager",
1246
+ "_auto_index_manager",
1247
+ "_auto_index_enabled",
1248
+ "_query_validator",
1249
+ "_resource_limiter",
1250
+ ):
1251
+ return super().__getattribute__(name)
1252
+
1253
+ # If accessing _collection, wrap it to block database/client access
1254
+ if name == "_collection":
1255
+ collection = super().__getattribute__(name)
1256
+ # Return a proxy that blocks dangerous attributes
1257
+ return _SecureCollectionProxy(collection)
1258
+
1259
+ return super().__getattribute__(name)
1260
+
1261
+ def __setattr__(self, name: str, value: Any) -> None:
1262
+ """Override to prevent modification of _collection."""
1263
+ if name == "_collection" and hasattr(self, "_collection"):
1264
+ raise AttributeError(
1265
+ "Cannot modify '_collection' attribute. "
1266
+ "Collection wrappers are immutable for security."
1267
+ )
1268
+ super().__setattr__(name, value)
1269
+
1270
+ def _inject_read_filter(self, filter: Optional[Mapping[str, Any]] = None) -> Dict[str, Any]:
1087
1271
  """
1088
1272
  Combines the user's filter with our mandatory scope filter.
1089
1273
 
@@ -1099,9 +1283,7 @@ class ScopedCollectionWrapper:
1099
1283
  # If filter exists, combine them robustly with $and
1100
1284
  return {"$and": [filter, scope_filter]}
1101
1285
 
1102
- async def insert_one(
1103
- self, document: Mapping[str, Any], *args, **kwargs
1104
- ) -> InsertOneResult:
1286
+ async def insert_one(self, document: Mapping[str, Any], *args, **kwargs) -> InsertOneResult:
1105
1287
  """
1106
1288
  Injects the app_id before writing.
1107
1289
 
@@ -1110,12 +1292,31 @@ class ScopedCollectionWrapper:
1110
1292
  import time
1111
1293
 
1112
1294
  start_time = time.time()
1113
- collection_name = self._collection.name
1295
+ # Get collection name safely (may not exist for new collections)
1296
+ try:
1297
+ collection_name = self._collection.name
1298
+ except (AttributeError, TypeError):
1299
+ # Fallback if name is not accessible
1300
+ collection_name = "unknown"
1114
1301
 
1115
1302
  try:
1303
+ # Verify token if needed (lazy verification for async contexts)
1304
+ if self._parent_wrapper:
1305
+ await self._parent_wrapper._verify_token_if_needed()
1306
+
1307
+ # Validate document size before insert
1308
+ self._resource_limiter.validate_document_size(document)
1309
+
1116
1310
  # Use dictionary spread to create a non-mutating copy
1117
1311
  doc_to_insert = {**document, "app_id": self._write_scope}
1118
- result = await self._collection.insert_one(doc_to_insert, *args, **kwargs)
1312
+
1313
+ # Enforce query timeout
1314
+ kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
1315
+ # Remove maxTimeMS - insert_one doesn't accept it
1316
+ kwargs_for_insert = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
1317
+
1318
+ # Use self._collection.insert_one() - proxy delegates correctly
1319
+ result = await self._collection.insert_one(doc_to_insert, *args, **kwargs_for_insert)
1119
1320
  duration_ms = (time.time() - start_time) * 1000
1120
1321
  record_operation(
1121
1322
  "database.insert_one",
@@ -1164,8 +1365,17 @@ class ScopedCollectionWrapper:
1164
1365
  Safety: Uses a list comprehension to create copies of all documents,
1165
1366
  avoiding in-place mutation of the original list.
1166
1367
  """
1368
+ # Validate all document sizes before insert
1369
+ self._resource_limiter.validate_documents_size(documents)
1370
+
1371
+ # Enforce query timeout
1372
+ kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
1373
+ # Remove maxTimeMS - insert_many doesn't accept it
1374
+ kwargs_for_insert = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
1375
+
1167
1376
  docs_to_insert = [{**doc, "app_id": self._write_scope} for doc in documents]
1168
- return await self._collection.insert_many(docs_to_insert, *args, **kwargs)
1377
+ # Use self._collection.insert_many() - proxy delegates correctly
1378
+ return await self._collection.insert_many(docs_to_insert, *args, **kwargs_for_insert)
1169
1379
 
1170
1380
  async def find_one(
1171
1381
  self, filter: Optional[Mapping[str, Any]] = None, *args, **kwargs
@@ -1177,20 +1387,36 @@ class ScopedCollectionWrapper:
1177
1387
  import time
1178
1388
 
1179
1389
  start_time = time.time()
1180
- collection_name = self._collection.name
1390
+ # Access real collection directly (bypass proxy) for name attribute
1391
+ # Use object.__getattribute__ to bypass our custom __getattribute__ that wraps in proxy
1392
+ real_collection = object.__getattribute__(self, "_collection")
1393
+ collection_name = real_collection.name
1181
1394
 
1182
1395
  try:
1396
+ # Verify token if needed (lazy verification for async contexts)
1397
+ if self._parent_wrapper:
1398
+ await self._parent_wrapper._verify_token_if_needed()
1399
+
1400
+ # Validate query filter for security
1401
+ self._query_validator.validate_filter(filter)
1402
+ self._query_validator.validate_sort(kwargs.get("sort"))
1403
+
1404
+ # Enforce query timeout - but remove maxTimeMS for find_one
1405
+ # because Motor's find_one internally creates a cursor and some versions
1406
+ # don't handle maxTimeMS correctly when passed to find_one
1407
+ kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
1408
+ # Remove maxTimeMS to avoid cursor creation errors in find_one
1409
+ kwargs_for_find_one = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
1410
+
1183
1411
  # Magical auto-indexing: ensure indexes exist before querying
1184
1412
  # Note: We analyze the user's filter, not the scoped filter, since
1185
1413
  # app_id index is always ensured separately
1186
1414
  if self.auto_index_manager:
1187
1415
  sort = kwargs.get("sort")
1188
- await self.auto_index_manager.ensure_index_for_query(
1189
- filter=filter, sort=sort
1190
- )
1416
+ await self.auto_index_manager.ensure_index_for_query(filter=filter, sort=sort)
1191
1417
 
1192
1418
  scoped_filter = self._inject_read_filter(filter)
1193
- result = await self._collection.find_one(scoped_filter, *args, **kwargs)
1419
+ result = await self._collection.find_one(scoped_filter, *args, **kwargs_for_find_one)
1194
1420
  duration_ms = (time.time() - start_time) * 1000
1195
1421
  record_operation(
1196
1422
  "database.find_one",
@@ -1200,7 +1426,7 @@ class ScopedCollectionWrapper:
1200
1426
  app_slug=self._write_scope,
1201
1427
  )
1202
1428
  return result
1203
- except Exception:
1429
+ except (PyMongoError, ValueError, TypeError, KeyError, AttributeError):
1204
1430
  duration_ms = (time.time() - start_time) * 1000
1205
1431
  record_operation(
1206
1432
  "database.find_one",
@@ -1219,6 +1445,25 @@ class ScopedCollectionWrapper:
1219
1445
  Returns an async cursor, just like motor.
1220
1446
  Automatically ensures appropriate indexes exist for the query.
1221
1447
  """
1448
+ # Validate query filter for security
1449
+ self._query_validator.validate_filter(filter)
1450
+ self._query_validator.validate_sort(kwargs.get("sort"))
1451
+
1452
+ # Enforce result limit
1453
+ limit = kwargs.get("limit")
1454
+ if limit is not None:
1455
+ kwargs["limit"] = self._resource_limiter.enforce_result_limit(limit)
1456
+
1457
+ # Enforce batch size
1458
+ batch_size = kwargs.get("batch_size")
1459
+ if batch_size is not None:
1460
+ kwargs["batch_size"] = self._resource_limiter.enforce_batch_size(batch_size)
1461
+
1462
+ # Enforce query timeout - but remove maxTimeMS before passing to find()
1463
+ # because Cursor constructor doesn't accept maxTimeMS
1464
+ kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
1465
+ kwargs_for_find = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
1466
+
1222
1467
  # Magical auto-indexing: ensure indexes exist before querying
1223
1468
  # Note: This is fire-and-forget, doesn't block cursor creation
1224
1469
  if self.auto_index_manager:
@@ -1227,23 +1472,20 @@ class ScopedCollectionWrapper:
1227
1472
  # Create a task to ensure index (fire and forget, managed to prevent accumulation)
1228
1473
  async def _safe_index_task():
1229
1474
  try:
1230
- await self.auto_index_manager.ensure_index_for_query(
1231
- filter=filter, sort=sort
1232
- )
1475
+ await self.auto_index_manager.ensure_index_for_query(filter=filter, sort=sort)
1233
1476
  except (
1234
1477
  OperationFailure,
1235
1478
  ConnectionFailure,
1236
1479
  ServerSelectionTimeoutError,
1237
1480
  InvalidOperation,
1238
1481
  ) as e:
1239
- logger.debug(
1240
- f"Auto-index creation failed for query (non-critical): {e}"
1241
- )
1482
+ logger.debug(f"Auto-index creation failed for query (non-critical): {e}")
1483
+ # Let other exceptions bubble up - they are non-recoverable (Type 4)
1242
1484
 
1243
1485
  _create_managed_task(_safe_index_task(), task_name="auto_index_check")
1244
1486
 
1245
1487
  scoped_filter = self._inject_read_filter(filter)
1246
- return self._collection.find(scoped_filter, *args, **kwargs)
1488
+ return self._collection.find(scoped_filter, *args, **kwargs_for_find)
1247
1489
 
1248
1490
  async def update_one(
1249
1491
  self, filter: Mapping[str, Any], update: Mapping[str, Any], *args, **kwargs
@@ -1252,8 +1494,16 @@ class ScopedCollectionWrapper:
1252
1494
  Applies the read scope to the filter.
1253
1495
  Note: This only scopes the *filter*, not the update operation.
1254
1496
  """
1497
+ # Validate query filter for security
1498
+ self._query_validator.validate_filter(filter)
1499
+
1500
+ # Enforce query timeout
1501
+ kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
1502
+ # Remove maxTimeMS - update_one doesn't accept it
1503
+ kwargs_for_update = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
1504
+
1255
1505
  scoped_filter = self._inject_read_filter(filter)
1256
- return await self._collection.update_one(scoped_filter, update, *args, **kwargs)
1506
+ return await self._collection.update_one(scoped_filter, update, *args, **kwargs_for_update)
1257
1507
 
1258
1508
  async def update_many(
1259
1509
  self, filter: Mapping[str, Any], update: Mapping[str, Any], *args, **kwargs
@@ -1262,24 +1512,42 @@ class ScopedCollectionWrapper:
1262
1512
  Applies the read scope to the filter.
1263
1513
  Note: This only scopes the *filter*, not the update operation.
1264
1514
  """
1515
+ # Validate query filter for security
1516
+ self._query_validator.validate_filter(filter)
1517
+
1518
+ # Enforce query timeout
1519
+ kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
1520
+ # Remove maxTimeMS - update_many doesn't accept it
1521
+ kwargs_for_update = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
1522
+
1265
1523
  scoped_filter = self._inject_read_filter(filter)
1266
- return await self._collection.update_many(
1267
- scoped_filter, update, *args, **kwargs
1268
- )
1524
+ return await self._collection.update_many(scoped_filter, update, *args, **kwargs_for_update)
1269
1525
 
1270
- async def delete_one(
1271
- self, filter: Mapping[str, Any], *args, **kwargs
1272
- ) -> DeleteResult:
1526
+ async def delete_one(self, filter: Mapping[str, Any], *args, **kwargs) -> DeleteResult:
1273
1527
  """Applies the read scope to the filter."""
1528
+ # Validate query filter for security
1529
+ self._query_validator.validate_filter(filter)
1530
+
1531
+ # Enforce query timeout
1532
+ kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
1533
+ # Remove maxTimeMS - delete_one doesn't accept it
1534
+ kwargs_for_delete = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
1535
+
1274
1536
  scoped_filter = self._inject_read_filter(filter)
1275
- return await self._collection.delete_one(scoped_filter, *args, **kwargs)
1537
+ return await self._collection.delete_one(scoped_filter, *args, **kwargs_for_delete)
1276
1538
 
1277
- async def delete_many(
1278
- self, filter: Mapping[str, Any], *args, **kwargs
1279
- ) -> DeleteResult:
1539
+ async def delete_many(self, filter: Mapping[str, Any], *args, **kwargs) -> DeleteResult:
1280
1540
  """Applies the read scope to the filter."""
1541
+ # Validate query filter for security
1542
+ self._query_validator.validate_filter(filter)
1543
+
1544
+ # Enforce query timeout
1545
+ kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
1546
+ # Remove maxTimeMS - delete_many doesn't accept it
1547
+ kwargs_for_delete = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
1548
+
1281
1549
  scoped_filter = self._inject_read_filter(filter)
1282
- return await self._collection.delete_many(scoped_filter, *args, **kwargs)
1550
+ return await self._collection.delete_many(scoped_filter, *args, **kwargs_for_delete)
1283
1551
 
1284
1552
  async def count_documents(
1285
1553
  self, filter: Optional[Mapping[str, Any]] = None, *args, **kwargs
@@ -1288,22 +1556,34 @@ class ScopedCollectionWrapper:
1288
1556
  Applies the read scope to the filter for counting.
1289
1557
  Automatically ensures appropriate indexes exist for the query.
1290
1558
  """
1559
+ # Validate query filter for security
1560
+ self._query_validator.validate_filter(filter)
1561
+
1562
+ # Note: count_documents doesn't reliably support maxTimeMS in all Motor versions
1563
+ # Remove it to avoid cursor creation errors when auto-indexing triggers list_indexes()
1564
+ kwargs_for_count = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
1565
+ # Don't enforce timeout for count_documents to avoid issues with cursor operations
1566
+
1291
1567
  # Magical auto-indexing: ensure indexes exist before querying
1292
1568
  if self.auto_index_manager:
1293
1569
  await self.auto_index_manager.ensure_index_for_query(filter=filter)
1294
1570
 
1295
1571
  scoped_filter = self._inject_read_filter(filter)
1296
- return await self._collection.count_documents(scoped_filter, *args, **kwargs)
1572
+ return await self._collection.count_documents(scoped_filter, *args, **kwargs_for_count)
1297
1573
 
1298
- def aggregate(
1299
- self, pipeline: List[Dict[str, Any]], *args, **kwargs
1300
- ) -> AsyncIOMotorCursor:
1574
+ def aggregate(self, pipeline: List[Dict[str, Any]], *args, **kwargs) -> AsyncIOMotorCursor:
1301
1575
  """
1302
1576
  Injects a scope filter into the pipeline. For normal pipelines, we prepend
1303
1577
  a $match stage. However, if the first stage is $vectorSearch, we embed
1304
1578
  the read_scope filter into its 'filter' property, because $vectorSearch must
1305
1579
  remain the very first stage in Atlas.
1306
1580
  """
1581
+ # Validate aggregation pipeline for security
1582
+ self._query_validator.validate_pipeline(pipeline)
1583
+
1584
+ # Enforce query timeout - Motor's aggregate() accepts maxTimeMS
1585
+ kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
1586
+
1307
1587
  if not pipeline:
1308
1588
  # No stages given, just prepend our $match
1309
1589
  scope_match_stage = {"$match": {"app_id": {"$in": self._read_scopes}}}
@@ -1363,7 +1643,20 @@ class ScopedMongoWrapper:
1363
1643
  # Lock to prevent race conditions when multiple requests try to create the same index
1364
1644
  _app_id_index_lock: ClassVar[asyncio.Lock] = asyncio.Lock()
1365
1645
 
1366
- __slots__ = ("_db", "_read_scopes", "_write_scope", "_wrapper_cache", "_auto_index")
1646
+ __slots__ = (
1647
+ "_db",
1648
+ "_read_scopes",
1649
+ "_write_scope",
1650
+ "_wrapper_cache",
1651
+ "_auto_index",
1652
+ "_query_validator",
1653
+ "_resource_limiter",
1654
+ "_app_slug",
1655
+ "_app_token",
1656
+ "_app_secrets_manager",
1657
+ "_token_verified",
1658
+ "_token_verification_lock",
1659
+ )
1367
1660
 
1368
1661
  def __init__(
1369
1662
  self,
@@ -1371,33 +1664,150 @@ class ScopedMongoWrapper:
1371
1664
  read_scopes: List[str],
1372
1665
  write_scope: str,
1373
1666
  auto_index: bool = True,
1667
+ query_validator: Optional[QueryValidator] = None,
1668
+ resource_limiter: Optional[ResourceLimiter] = None,
1669
+ app_slug: Optional[str] = None,
1670
+ app_token: Optional[str] = None,
1671
+ app_secrets_manager: Optional["AppSecretsManager"] = None,
1374
1672
  ):
1375
1673
  self._db = real_db
1376
1674
  self._read_scopes = read_scopes
1377
1675
  self._write_scope = write_scope
1378
1676
  self._auto_index = auto_index
1379
1677
 
1678
+ # Query security and resource limits (shared across all collections)
1679
+ self._query_validator = query_validator or QueryValidator()
1680
+ self._resource_limiter = resource_limiter or ResourceLimiter()
1681
+
1682
+ # Token verification for app authentication
1683
+ self._app_slug = app_slug
1684
+ self._app_token = app_token
1685
+ self._app_secrets_manager = app_secrets_manager
1686
+ self._token_verified = False
1687
+ self._token_verification_lock = asyncio.Lock()
1688
+
1380
1689
  # Cache for created collection wrappers.
1381
1690
  self._wrapper_cache: Dict[str, ScopedCollectionWrapper] = {}
1382
1691
 
1383
- @property
1384
- def database(self) -> AsyncIOMotorDatabase:
1692
+ async def _verify_token_if_needed(self) -> None:
1385
1693
  """
1386
- Access the underlying AsyncIOMotorDatabase (unscoped).
1694
+ Verify app token lazily on first database operation.
1387
1695
 
1388
- This is useful for advanced operations that need direct access to the
1389
- real database without scoping, such as index management.
1696
+ This method ensures token verification happens even when get_scoped_db()
1697
+ is called from an async context where sync verification was skipped.
1390
1698
 
1391
- Returns:
1392
- The underlying AsyncIOMotorDatabase instance
1699
+ Raises:
1700
+ ValueError: If token verification fails
1701
+ """
1702
+ # If already verified, skip
1703
+ if self._token_verified:
1704
+ return
1393
1705
 
1394
- Example:
1395
- # Access underlying database for index management
1396
- real_db = db.raw.database
1397
- collection = real_db["my_collection"]
1398
- index_manager = AsyncAtlasIndexManager(collection)
1706
+ # If no token or secrets manager, skip verification
1707
+ if not self._app_token or not self._app_secrets_manager or not self._app_slug:
1708
+ self._token_verified = True
1709
+ return
1710
+
1711
+ # Use lock to prevent race conditions
1712
+ async with self._token_verification_lock:
1713
+ # Double-check after acquiring lock
1714
+ if self._token_verified:
1715
+ return
1716
+
1717
+ # Verify token
1718
+ is_valid = await self._app_secrets_manager.verify_app_secret(
1719
+ self._app_slug, self._app_token
1720
+ )
1721
+
1722
+ if not is_valid:
1723
+ logger.warning(f"Security: Invalid app token for '{self._app_slug}'")
1724
+ raise ValueError("Invalid app token")
1725
+
1726
+ # Mark as verified
1727
+ self._token_verified = True
1728
+ logger.debug(f"Token verified for app '{self._app_slug}'")
1729
+
1730
+ def _validate_cross_app_access(self, prefixed_name: str) -> None:
1731
+ """
1732
+ Validate that cross-app collection access is authorized.
1733
+
1734
+ Args:
1735
+ prefixed_name: Prefixed collection name (e.g., "other_app_collection")
1736
+
1737
+ Raises:
1738
+ ValueError: If cross-app access is not authorized
1739
+ """
1740
+ # Extract app slug from prefixed name
1741
+ target_app = _extract_app_slug_from_prefixed_name(prefixed_name)
1742
+ if target_app is None:
1743
+ return # Same-app access or not a valid prefixed name
1744
+
1745
+ # Check if target app is in read_scopes
1746
+ if target_app not in self._read_scopes:
1747
+ logger.warning(
1748
+ f"Security: Unauthorized cross-app access attempt. "
1749
+ f"Collection: '{prefixed_name}', Target app: '{target_app}', "
1750
+ f"Read scopes: {self._read_scopes}, Write scope: {self._write_scope}"
1751
+ )
1752
+ raise ValueError(
1753
+ f"Access to collection '{prefixed_name}' not authorized. "
1754
+ f"App '{target_app}' is not in read_scopes {self._read_scopes}. "
1755
+ "Cross-app access must be explicitly granted via read_scopes."
1756
+ )
1757
+
1758
+ # Log authorized cross-app access for audit trail
1759
+ logger.info(
1760
+ f"Cross-app access authorized. "
1761
+ f"Collection: '{prefixed_name}', From app: '{self._write_scope}', "
1762
+ f"To app: '{target_app}'"
1763
+ )
1764
+
1765
+ def __getattribute__(self, name: str) -> Any:
1766
+ """
1767
+ Override to validate collection names before attribute access.
1768
+ This ensures validation happens even if MagicMock creates attributes dynamically.
1399
1769
  """
1400
- return self._db
1770
+ # Handle our own attributes first (use super() to avoid recursion)
1771
+ if name.startswith("_") or name in ("get_collection",):
1772
+ return super().__getattribute__(name)
1773
+
1774
+ # Validate collection name for security BEFORE checking if attribute exists
1775
+ # This ensures ValueError is raised even if MagicMock would create the attribute
1776
+ validation_error = None
1777
+ if not name.startswith("_"):
1778
+ try:
1779
+ _validate_collection_name(name, allow_prefixed=False)
1780
+ except ValueError as e:
1781
+ # Log the warning without accessing object attributes to avoid recursion
1782
+ # The validation error itself is what matters, not the logging details
1783
+ try:
1784
+ logger.warning(
1785
+ f"Security: Invalid collection name attempted. "
1786
+ f"Name: '{name}', Error: {e}"
1787
+ )
1788
+ except (AttributeError, RuntimeError):
1789
+ # If logging fails due to logger issues, continue -
1790
+ # validation error is what matters
1791
+ # Type 2: Recoverable - we can continue without logging
1792
+ pass
1793
+ # Store the error to raise after checking attribute existence
1794
+ # This ensures we raise ValueError even if MagicMock creates the attribute
1795
+ validation_error = ValueError(str(e))
1796
+
1797
+ # Continue with normal attribute access
1798
+ try:
1799
+ attr = super().__getattribute__(name)
1800
+ # If validation failed, raise ValueError now (even if attribute exists)
1801
+ if validation_error is not None:
1802
+ raise validation_error
1803
+ return attr
1804
+ except AttributeError:
1805
+ # Attribute doesn't exist
1806
+ # If validation failed, raise ValueError (from None: unrelated to AttributeError)
1807
+ if validation_error is not None:
1808
+ raise validation_error from None
1809
+ # Delegate to __getattr__ for collection creation
1810
+ return self.__getattr__(name)
1401
1811
 
1402
1812
  def __getattr__(self, name: str) -> ScopedCollectionWrapper:
1403
1813
  """
@@ -1406,6 +1816,17 @@ class ScopedMongoWrapper:
1406
1816
  If `name` is a collection, returns a `ScopedCollectionWrapper`.
1407
1817
  """
1408
1818
 
1819
+ # Explicitly block access to 'database' property (removed for security)
1820
+ if name == "database":
1821
+ logger.warning(
1822
+ f"Security: Attempted access to 'database' property. " f"App: {self._write_scope}"
1823
+ )
1824
+ raise AttributeError(
1825
+ "'database' property has been removed for security. "
1826
+ "Use collection.index_manager for index operations. "
1827
+ "All data access must go through scoped collections."
1828
+ )
1829
+
1409
1830
  # Prevent proxying private/special attributes
1410
1831
  if name.startswith("_"):
1411
1832
  raise AttributeError(
@@ -1413,11 +1834,33 @@ class ScopedMongoWrapper:
1413
1834
  "Access to private attributes is blocked."
1414
1835
  )
1415
1836
 
1837
+ # Note: Validation already happened in __getattribute__, but we validate again
1838
+ # for safety in case __getattr__ is called directly
1839
+ try:
1840
+ _validate_collection_name(name, allow_prefixed=False)
1841
+ except ValueError as e:
1842
+ logger.warning(
1843
+ f"Security: Invalid collection name attempted. "
1844
+ f"Name: '{name}', App: {self._write_scope}, Error: {e}"
1845
+ )
1846
+ raise
1847
+
1416
1848
  # Construct the prefixed collection name, e.g., "data_imaging_workouts"
1417
1849
  # `self._write_scope` holds the slug (e.g., "data_imaging")
1418
1850
  # `name` holds the base name (e.g., "workouts")
1419
1851
  prefixed_name = f"{self._write_scope}_{name}"
1420
1852
 
1853
+ # Validate prefixed name as well (for reserved names check)
1854
+ try:
1855
+ _validate_collection_name(prefixed_name, allow_prefixed=True)
1856
+ except ValueError as e:
1857
+ logger.warning(
1858
+ f"Security: Invalid prefixed collection name. "
1859
+ f"Base name: '{name}', Prefixed: '{prefixed_name}', "
1860
+ f"App: {self._write_scope}, Error: {e}"
1861
+ )
1862
+ raise
1863
+
1421
1864
  # Check cache first using the *prefixed_name*
1422
1865
  if prefixed_name in self._wrapper_cache:
1423
1866
  return self._wrapper_cache[prefixed_name]
@@ -1439,6 +1882,8 @@ class ScopedMongoWrapper:
1439
1882
  read_scopes=self._read_scopes,
1440
1883
  write_scope=self._write_scope,
1441
1884
  auto_index=self._auto_index,
1885
+ query_validator=self._query_validator,
1886
+ resource_limiter=self._resource_limiter,
1442
1887
  )
1443
1888
 
1444
1889
  # Magically ensure app_id index exists (it's always used in queries)
@@ -1476,17 +1921,13 @@ class ScopedMongoWrapper:
1476
1921
  f"connection is closed (likely during shutdown)"
1477
1922
  )
1478
1923
  async with ScopedMongoWrapper._app_id_index_lock:
1479
- ScopedMongoWrapper._app_id_index_cache.pop(
1480
- collection_name, None
1481
- )
1924
+ ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
1482
1925
  return
1483
1926
 
1484
1927
  has_index = await self._ensure_app_id_index(real_collection)
1485
1928
  # Update cache with result (inside lock for thread-safety)
1486
1929
  async with ScopedMongoWrapper._app_id_index_lock:
1487
- ScopedMongoWrapper._app_id_index_cache[collection_name] = (
1488
- has_index
1489
- )
1930
+ ScopedMongoWrapper._app_id_index_cache[collection_name] = has_index
1490
1931
  except (
1491
1932
  ConnectionFailure,
1492
1933
  ServerSelectionTimeoutError,
@@ -1499,30 +1940,82 @@ class ScopedMongoWrapper:
1499
1940
  )
1500
1941
  # Remove from cache on error so we can retry later
1501
1942
  async with ScopedMongoWrapper._app_id_index_lock:
1502
- ScopedMongoWrapper._app_id_index_cache.pop(
1503
- collection_name, None
1504
- )
1943
+ ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
1505
1944
  except OperationFailure as e:
1506
1945
  # Index creation failed for other reasons (non-critical)
1507
1946
  logger.debug(f"App_id index creation failed (non-critical): {e}")
1508
1947
  # Remove from cache on error so we can retry later
1509
1948
  async with ScopedMongoWrapper._app_id_index_lock:
1510
- ScopedMongoWrapper._app_id_index_cache.pop(
1511
- collection_name, None
1512
- )
1949
+ ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
1950
+ # Let other exceptions bubble up - they are non-recoverable (Type 4)
1513
1951
 
1514
1952
  # Check cache first (quick check before lock)
1515
1953
  if collection_name not in ScopedMongoWrapper._app_id_index_cache:
1516
1954
  # Fire and forget - task will check lock internally
1517
1955
  # (managed to prevent accumulation)
1518
- _create_managed_task(
1519
- _safe_app_id_index_check(), task_name="app_id_index_check"
1520
- )
1956
+ _create_managed_task(_safe_app_id_index_check(), task_name="app_id_index_check")
1521
1957
 
1522
1958
  # Store it in the cache for this instance using the *prefixed_name*
1523
1959
  self._wrapper_cache[prefixed_name] = wrapper
1524
1960
  return wrapper
1525
1961
 
1962
+ def _find_matched_app_for_collection(self, name: str) -> str | None:
1963
+ """
1964
+ Check if collection name matches any app slug in read_scopes (cross-app access).
1965
+
1966
+ Args:
1967
+ name: Collection name to check
1968
+
1969
+ Returns:
1970
+ Matched app slug if found, None otherwise
1971
+ """
1972
+ if "_" not in name:
1973
+ return None
1974
+
1975
+ # Check if any app slug in read_scopes matches the beginning of the name
1976
+ for app_slug in self._read_scopes:
1977
+ if name.startswith(f"{app_slug}_") and app_slug != self._write_scope:
1978
+ return app_slug
1979
+ return None
1980
+
1981
+ def _resolve_prefixed_collection_name(self, name: str, matched_app: str | None) -> str:
1982
+ """
1983
+ Resolve the prefixed collection name based on matched app or write scope.
1984
+
1985
+ Args:
1986
+ name: Collection name (base or prefixed)
1987
+ matched_app: Matched app slug if cross-app access, None otherwise
1988
+
1989
+ Returns:
1990
+ Prefixed collection name
1991
+
1992
+ Raises:
1993
+ ValueError: If prefixed name is invalid
1994
+ """
1995
+ if matched_app:
1996
+ # This is authorized cross-app access
1997
+ prefixed_name = name
1998
+ # Log authorized cross-app access for audit trail
1999
+ logger.info(
2000
+ f"Cross-app access authorized. "
2001
+ f"Collection: '{prefixed_name}', From app: '{self._write_scope}', "
2002
+ f"To app: '{matched_app}'"
2003
+ )
2004
+ else:
2005
+ # Regular collection name - prefix with write_scope
2006
+ prefixed_name = f"{self._write_scope}_{name}"
2007
+ # Validate prefixed name
2008
+ try:
2009
+ _validate_collection_name(prefixed_name, allow_prefixed=True)
2010
+ except ValueError as e:
2011
+ logger.warning(
2012
+ f"Security: Invalid prefixed collection name in get_collection(). "
2013
+ f"Base name: '{name}', Prefixed: '{prefixed_name}', "
2014
+ f"App: {self._write_scope}, Error: {e}"
2015
+ )
2016
+ raise
2017
+ return prefixed_name
2018
+
1526
2019
  def get_collection(self, name: str) -> ScopedCollectionWrapper:
1527
2020
  """
1528
2021
  Get a collection by name (Motor-like API).
@@ -1539,6 +2032,9 @@ class ScopedMongoWrapper:
1539
2032
  Returns:
1540
2033
  ScopedCollectionWrapper instance
1541
2034
 
2035
+ Raises:
2036
+ ValueError: If collection name is invalid or cross-app access is not authorized
2037
+
1542
2038
  Example:
1543
2039
  # Same-app collection (base name)
1544
2040
  collection = db.get_collection("my_collection")
@@ -1546,15 +2042,21 @@ class ScopedMongoWrapper:
1546
2042
  # Cross-app collection (fully prefixed)
1547
2043
  collection = db.get_collection("click_tracker_clicks")
1548
2044
  """
1549
- # Check if name is already fully prefixed (contains underscore and is longer)
1550
- # We use a heuristic: if name contains underscore and doesn't start with write_scope,
1551
- # assume it's already fully prefixed
1552
- if "_" in name and not name.startswith(f"{self._write_scope}_"):
1553
- # Assume it's already fully prefixed (cross-app access)
1554
- prefixed_name = name
1555
- else:
1556
- # Standard case: prefix with write_scope
1557
- prefixed_name = f"{self._write_scope}_{name}"
2045
+ # Validate collection name for security
2046
+ try:
2047
+ _validate_collection_name(name, allow_prefixed=True)
2048
+ except ValueError as e:
2049
+ logger.warning(
2050
+ f"Security: Invalid collection name in get_collection(). "
2051
+ f"Name: '{name}', App: {self._write_scope}, Error: {e}"
2052
+ )
2053
+ raise
2054
+
2055
+ # Check if name is already fully prefixed (cross-app access)
2056
+ matched_app = self._find_matched_app_for_collection(name)
2057
+
2058
+ # Resolve prefixed name based on matched app or write scope
2059
+ prefixed_name = self._resolve_prefixed_collection_name(name, matched_app)
1558
2060
 
1559
2061
  # Check cache first
1560
2062
  if prefixed_name in self._wrapper_cache:
@@ -1576,6 +2078,9 @@ class ScopedMongoWrapper:
1576
2078
  read_scopes=self._read_scopes,
1577
2079
  write_scope=self._write_scope,
1578
2080
  auto_index=self._auto_index,
2081
+ query_validator=self._query_validator,
2082
+ resource_limiter=self._resource_limiter,
2083
+ parent_wrapper=self,
1579
2084
  )
1580
2085
 
1581
2086
  # Magically ensure app_id index exists (background task)
@@ -1607,16 +2112,12 @@ class ScopedMongoWrapper:
1607
2112
  f"connection is closed (likely during shutdown)"
1608
2113
  )
1609
2114
  async with ScopedMongoWrapper._app_id_index_lock:
1610
- ScopedMongoWrapper._app_id_index_cache.pop(
1611
- collection_name, None
1612
- )
2115
+ ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
1613
2116
  return
1614
2117
 
1615
2118
  has_index = await self._ensure_app_id_index(real_collection)
1616
2119
  async with ScopedMongoWrapper._app_id_index_lock:
1617
- ScopedMongoWrapper._app_id_index_cache[collection_name] = (
1618
- has_index
1619
- )
2120
+ ScopedMongoWrapper._app_id_index_cache[collection_name] = has_index
1620
2121
  except (
1621
2122
  ConnectionFailure,
1622
2123
  ServerSelectionTimeoutError,
@@ -1628,27 +2129,53 @@ class ScopedMongoWrapper:
1628
2129
  f"connection error (likely during shutdown): {e}"
1629
2130
  )
1630
2131
  async with ScopedMongoWrapper._app_id_index_lock:
1631
- ScopedMongoWrapper._app_id_index_cache.pop(
1632
- collection_name, None
1633
- )
2132
+ ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
1634
2133
  except OperationFailure as e:
1635
2134
  # Index creation failed for other reasons (non-critical)
1636
2135
  logger.debug(f"App_id index creation failed (non-critical): {e}")
1637
2136
  async with ScopedMongoWrapper._app_id_index_lock:
1638
- ScopedMongoWrapper._app_id_index_cache.pop(
1639
- collection_name, None
1640
- )
2137
+ ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
2138
+ # Let other exceptions bubble up - they are non-recoverable (Type 4)
1641
2139
 
1642
2140
  if collection_name not in ScopedMongoWrapper._app_id_index_cache:
1643
2141
  # Use managed task creation to prevent accumulation
1644
- _create_managed_task(
1645
- _safe_app_id_index_check(), task_name="app_id_index_check"
1646
- )
2142
+ _create_managed_task(_safe_app_id_index_check(), task_name="app_id_index_check")
1647
2143
 
1648
2144
  # Store it in the cache
1649
2145
  self._wrapper_cache[prefixed_name] = wrapper
1650
2146
  return wrapper
1651
2147
 
2148
+ def __getitem__(self, name: str) -> ScopedCollectionWrapper:
2149
+ """
2150
+ Support bracket notation for collection access (e.g., db["collection_name"]).
2151
+
2152
+ This allows compatibility with code that uses bracket notation instead of
2153
+ attribute access (e.g., TokenBlacklist, SessionManager).
2154
+
2155
+ Args:
2156
+ name: Collection name (base name, will be prefixed with write_scope)
2157
+
2158
+ Returns:
2159
+ ScopedCollectionWrapper instance
2160
+
2161
+ Raises:
2162
+ ValueError: If collection name is invalid
2163
+
2164
+ Example:
2165
+ collection = db["my_collection"] # Same as db.my_collection
2166
+ """
2167
+ # Validate collection name for security (get_collection will do additional validation)
2168
+ try:
2169
+ _validate_collection_name(name, allow_prefixed=False)
2170
+ except ValueError as e:
2171
+ logger.warning(
2172
+ f"Security: Invalid collection name in __getitem__(). "
2173
+ f"Name: '{name}', App: {self._write_scope}, Error: {e}"
2174
+ )
2175
+ raise
2176
+
2177
+ return self.get_collection(name)
2178
+
1652
2179
  async def _ensure_app_id_index(self, collection: AsyncIOMotorCollection) -> bool:
1653
2180
  """
1654
2181
  Ensures app_id index exists on collection.
@@ -1680,11 +2207,7 @@ class ScopedMongoWrapper:
1680
2207
  return True
1681
2208
  except OperationFailure as e:
1682
2209
  # Handle index build aborted (e.g., database being dropped during teardown)
1683
- if (
1684
- e.code == 276
1685
- or "IndexBuildAborted" in str(e)
1686
- or "dropDatabase" in str(e)
1687
- ):
2210
+ if e.code == 276 or "IndexBuildAborted" in str(e) or "dropDatabase" in str(e):
1688
2211
  logger.debug(
1689
2212
  f"Skipping app_id index creation on {collection.name}: "
1690
2213
  f"index build aborted (likely during database drop/teardown): {e}"
@@ -1694,19 +2217,13 @@ class ScopedMongoWrapper:
1694
2217
  return True
1695
2218
  except OperationFailure as e:
1696
2219
  # Handle index build aborted (e.g., database being dropped during teardown)
1697
- if (
1698
- e.code == 276
1699
- or "IndexBuildAborted" in str(e)
1700
- or "dropDatabase" in str(e)
1701
- ):
2220
+ if e.code == 276 or "IndexBuildAborted" in str(e) or "dropDatabase" in str(e):
1702
2221
  logger.debug(
1703
2222
  f"Skipping app_id index creation on {collection.name}: "
1704
2223
  f"index build aborted (likely during database drop/teardown): {e}"
1705
2224
  )
1706
2225
  return False
1707
- logger.debug(
1708
- f"OperationFailure ensuring app_id index on {collection.name}: {e}"
1709
- )
2226
+ logger.debug(f"OperationFailure ensuring app_id index on {collection.name}: {e}")
1710
2227
  return False
1711
2228
  except (ConnectionFailure, ServerSelectionTimeoutError, InvalidOperation) as e:
1712
2229
  # Handle connection errors gracefully (e.g., during shutdown)