mdb-engine 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdb_engine/__init__.py +104 -11
- mdb_engine/auth/ARCHITECTURE.md +112 -0
- mdb_engine/auth/README.md +648 -11
- mdb_engine/auth/__init__.py +136 -29
- mdb_engine/auth/audit.py +592 -0
- mdb_engine/auth/base.py +252 -0
- mdb_engine/auth/casbin_factory.py +264 -69
- mdb_engine/auth/config_helpers.py +7 -6
- mdb_engine/auth/cookie_utils.py +3 -7
- mdb_engine/auth/csrf.py +373 -0
- mdb_engine/auth/decorators.py +3 -10
- mdb_engine/auth/dependencies.py +47 -50
- mdb_engine/auth/helpers.py +3 -3
- mdb_engine/auth/integration.py +53 -80
- mdb_engine/auth/jwt.py +2 -6
- mdb_engine/auth/middleware.py +77 -34
- mdb_engine/auth/oso_factory.py +18 -38
- mdb_engine/auth/provider.py +270 -171
- mdb_engine/auth/rate_limiter.py +504 -0
- mdb_engine/auth/restrictions.py +8 -24
- mdb_engine/auth/session_manager.py +14 -29
- mdb_engine/auth/shared_middleware.py +600 -0
- mdb_engine/auth/shared_users.py +759 -0
- mdb_engine/auth/token_store.py +14 -28
- mdb_engine/auth/users.py +54 -113
- mdb_engine/auth/utils.py +213 -15
- mdb_engine/cli/commands/generate.py +545 -9
- mdb_engine/cli/commands/validate.py +3 -7
- mdb_engine/cli/utils.py +3 -3
- mdb_engine/config.py +7 -21
- mdb_engine/constants.py +65 -0
- mdb_engine/core/README.md +117 -6
- mdb_engine/core/__init__.py +39 -7
- mdb_engine/core/app_registration.py +22 -41
- mdb_engine/core/app_secrets.py +290 -0
- mdb_engine/core/connection.py +18 -9
- mdb_engine/core/encryption.py +223 -0
- mdb_engine/core/engine.py +1057 -93
- mdb_engine/core/index_management.py +12 -16
- mdb_engine/core/manifest.py +459 -150
- mdb_engine/core/ray_integration.py +435 -0
- mdb_engine/core/seeding.py +10 -18
- mdb_engine/core/service_initialization.py +12 -23
- mdb_engine/core/types.py +2 -5
- mdb_engine/database/README.md +140 -17
- mdb_engine/database/__init__.py +17 -6
- mdb_engine/database/abstraction.py +25 -37
- mdb_engine/database/connection.py +11 -18
- mdb_engine/database/query_validator.py +367 -0
- mdb_engine/database/resource_limiter.py +204 -0
- mdb_engine/database/scoped_wrapper.py +713 -196
- mdb_engine/dependencies.py +426 -0
- mdb_engine/di/__init__.py +34 -0
- mdb_engine/di/container.py +248 -0
- mdb_engine/di/providers.py +205 -0
- mdb_engine/di/scopes.py +139 -0
- mdb_engine/embeddings/README.md +54 -24
- mdb_engine/embeddings/__init__.py +31 -24
- mdb_engine/embeddings/dependencies.py +37 -154
- mdb_engine/embeddings/service.py +11 -25
- mdb_engine/exceptions.py +92 -0
- mdb_engine/indexes/README.md +30 -13
- mdb_engine/indexes/__init__.py +1 -0
- mdb_engine/indexes/helpers.py +1 -1
- mdb_engine/indexes/manager.py +50 -114
- mdb_engine/memory/README.md +2 -2
- mdb_engine/memory/__init__.py +1 -2
- mdb_engine/memory/service.py +30 -87
- mdb_engine/observability/README.md +4 -2
- mdb_engine/observability/__init__.py +26 -9
- mdb_engine/observability/health.py +8 -9
- mdb_engine/observability/metrics.py +32 -12
- mdb_engine/repositories/__init__.py +34 -0
- mdb_engine/repositories/base.py +325 -0
- mdb_engine/repositories/mongo.py +233 -0
- mdb_engine/repositories/unit_of_work.py +166 -0
- mdb_engine/routing/README.md +1 -1
- mdb_engine/routing/__init__.py +1 -3
- mdb_engine/routing/websockets.py +25 -60
- mdb_engine-0.2.0.dist-info/METADATA +313 -0
- mdb_engine-0.2.0.dist-info/RECORD +96 -0
- mdb_engine-0.1.6.dist-info/METADATA +0 -213
- mdb_engine-0.1.6.dist-info/RECORD +0 -75
- {mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/WHEEL +0 -0
- {mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/entry_points.txt +0 -0
- {mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -26,27 +26,65 @@ a familiar (Motor-like) developer experience with automatic index optimization.
|
|
|
26
26
|
|
|
27
27
|
import asyncio
|
|
28
28
|
import logging
|
|
29
|
+
import re
|
|
29
30
|
import time
|
|
30
|
-
from typing import (
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
31
|
+
from typing import (
|
|
32
|
+
TYPE_CHECKING,
|
|
33
|
+
Any,
|
|
34
|
+
ClassVar,
|
|
35
|
+
Coroutine,
|
|
36
|
+
Dict,
|
|
37
|
+
List,
|
|
38
|
+
Mapping,
|
|
39
|
+
Optional,
|
|
40
|
+
Tuple,
|
|
41
|
+
Union,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
if TYPE_CHECKING:
|
|
45
|
+
from ..core.app_secrets import AppSecretsManager
|
|
46
|
+
|
|
47
|
+
from motor.motor_asyncio import (
|
|
48
|
+
AsyncIOMotorCollection,
|
|
49
|
+
AsyncIOMotorCursor,
|
|
50
|
+
AsyncIOMotorDatabase,
|
|
51
|
+
)
|
|
35
52
|
from pymongo import ASCENDING, DESCENDING, TEXT
|
|
36
|
-
from pymongo.errors import (
|
|
37
|
-
|
|
38
|
-
|
|
53
|
+
from pymongo.errors import (
|
|
54
|
+
AutoReconnect,
|
|
55
|
+
CollectionInvalid,
|
|
56
|
+
ConnectionFailure,
|
|
57
|
+
InvalidOperation,
|
|
58
|
+
OperationFailure,
|
|
59
|
+
PyMongoError,
|
|
60
|
+
ServerSelectionTimeoutError,
|
|
61
|
+
)
|
|
39
62
|
from pymongo.operations import SearchIndexModel
|
|
40
|
-
from pymongo.results import (
|
|
41
|
-
|
|
63
|
+
from pymongo.results import (
|
|
64
|
+
DeleteResult,
|
|
65
|
+
InsertManyResult,
|
|
66
|
+
InsertOneResult,
|
|
67
|
+
UpdateResult,
|
|
68
|
+
)
|
|
42
69
|
|
|
43
70
|
# Import constants
|
|
44
|
-
from ..constants import (
|
|
45
|
-
|
|
46
|
-
|
|
71
|
+
from ..constants import (
|
|
72
|
+
AUTO_INDEX_HINT_THRESHOLD,
|
|
73
|
+
DEFAULT_DROP_TIMEOUT,
|
|
74
|
+
DEFAULT_POLL_INTERVAL,
|
|
75
|
+
DEFAULT_SEARCH_TIMEOUT,
|
|
76
|
+
MAX_COLLECTION_NAME_LENGTH,
|
|
77
|
+
MAX_INDEX_FIELDS,
|
|
78
|
+
MIN_COLLECTION_NAME_LENGTH,
|
|
79
|
+
RESERVED_COLLECTION_NAMES,
|
|
80
|
+
RESERVED_COLLECTION_PREFIXES,
|
|
81
|
+
)
|
|
47
82
|
from ..exceptions import MongoDBEngineError
|
|
83
|
+
|
|
48
84
|
# Import observability
|
|
49
85
|
from ..observability import record_operation
|
|
86
|
+
from .query_validator import QueryValidator
|
|
87
|
+
from .resource_limiter import ResourceLimiter
|
|
50
88
|
|
|
51
89
|
# --- FIX: Configure logger *before* first use ---
|
|
52
90
|
logger = logging.getLogger(__name__)
|
|
@@ -60,9 +98,7 @@ GEO2DSPHERE = "2dsphere"
|
|
|
60
98
|
|
|
61
99
|
|
|
62
100
|
# --- HELPER FUNCTION FOR MANAGED TASK CREATION ---
|
|
63
|
-
def _create_managed_task(
|
|
64
|
-
coro: Coroutine[Any, Any, Any], task_name: Optional[str] = None
|
|
65
|
-
) -> None:
|
|
101
|
+
def _create_managed_task(coro: Coroutine[Any, Any, Any], task_name: Optional[str] = None) -> None:
|
|
66
102
|
"""
|
|
67
103
|
Creates a background task using asyncio.create_task().
|
|
68
104
|
|
|
@@ -86,6 +122,149 @@ def _create_managed_task(
|
|
|
86
122
|
# --- END HELPER FUNCTION ---
|
|
87
123
|
|
|
88
124
|
|
|
125
|
+
# ##########################################################################
|
|
126
|
+
# SECURITY VALIDATION FUNCTIONS
|
|
127
|
+
# ##########################################################################
|
|
128
|
+
|
|
129
|
+
# Collection name pattern: alphanumeric, underscore, dot, hyphen
|
|
130
|
+
# Must start with alphanumeric or underscore
|
|
131
|
+
# MongoDB allows: [a-zA-Z0-9_.-] but cannot start with number or special char
|
|
132
|
+
COLLECTION_NAME_PATTERN: re.Pattern = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_.-]*$")
|
|
133
|
+
"""Regex pattern for valid MongoDB collection names."""
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _validate_collection_name(name: str, allow_prefixed: bool = False) -> None:
|
|
137
|
+
"""
|
|
138
|
+
Validate collection name for security.
|
|
139
|
+
|
|
140
|
+
Validates that collection names:
|
|
141
|
+
- Meet MongoDB naming requirements
|
|
142
|
+
- Are not reserved system names
|
|
143
|
+
- Do not use reserved prefixes
|
|
144
|
+
- Are within length limits
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
name: Collection name to validate
|
|
148
|
+
allow_prefixed: If True, allows prefixed names (e.g., "app_collection")
|
|
149
|
+
for cross-app access validation
|
|
150
|
+
|
|
151
|
+
Raises:
|
|
152
|
+
ValueError: If collection name is invalid, reserved, or uses reserved prefix
|
|
153
|
+
"""
|
|
154
|
+
if not name:
|
|
155
|
+
raise ValueError("Collection name cannot be empty")
|
|
156
|
+
|
|
157
|
+
# Check length
|
|
158
|
+
if len(name) < MIN_COLLECTION_NAME_LENGTH:
|
|
159
|
+
raise ValueError(
|
|
160
|
+
f"Collection name too short (minimum {MIN_COLLECTION_NAME_LENGTH} character): {name}"
|
|
161
|
+
)
|
|
162
|
+
if len(name) > MAX_COLLECTION_NAME_LENGTH:
|
|
163
|
+
raise ValueError(
|
|
164
|
+
f"Collection name too long (maximum {MAX_COLLECTION_NAME_LENGTH} characters): {name}"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Check pattern (MongoDB naming rules)
|
|
168
|
+
if not COLLECTION_NAME_PATTERN.match(name):
|
|
169
|
+
raise ValueError(
|
|
170
|
+
f"Invalid collection name format: '{name}'. "
|
|
171
|
+
"Collection names must start with a letter or underscore and "
|
|
172
|
+
"contain only alphanumeric characters, underscores, dots, or hyphens."
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# MongoDB doesn't allow collection names to end with a dot
|
|
176
|
+
if name.endswith("."):
|
|
177
|
+
raise ValueError(
|
|
178
|
+
f"Invalid collection name format: '{name}'. " "Collection names cannot end with a dot."
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Check for path traversal attempts
|
|
182
|
+
if ".." in name or "/" in name or "\\" in name:
|
|
183
|
+
raise ValueError(
|
|
184
|
+
f"Invalid collection name format: '{name}'. "
|
|
185
|
+
f"Collection names must start with a letter or underscore and contain "
|
|
186
|
+
f"only alphanumeric characters, underscores, dots, or hyphens."
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Check reserved names (exact match)
|
|
190
|
+
if name in RESERVED_COLLECTION_NAMES:
|
|
191
|
+
logger.warning(f"Security: Attempted access to reserved collection name: {name}")
|
|
192
|
+
raise ValueError(
|
|
193
|
+
f"Collection name '{name}' is reserved and cannot be accessed through scoped database."
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Check reserved prefixes
|
|
197
|
+
name_lower = name.lower()
|
|
198
|
+
for prefix in RESERVED_COLLECTION_PREFIXES:
|
|
199
|
+
if name_lower.startswith(prefix):
|
|
200
|
+
logger.warning(
|
|
201
|
+
f"Security: Attempted access to collection with reserved prefix '{prefix}': {name}"
|
|
202
|
+
)
|
|
203
|
+
raise ValueError(
|
|
204
|
+
f"Collection name '{name}' uses reserved prefix '{prefix}' and cannot be accessed."
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _extract_app_slug_from_prefixed_name(prefixed_name: str) -> Optional[str]:
|
|
209
|
+
"""
|
|
210
|
+
Extract app slug from a prefixed collection name.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
prefixed_name: Collection name that may be prefixed (e.g., "app_slug_collection")
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
App slug if name is prefixed, None otherwise
|
|
217
|
+
"""
|
|
218
|
+
if "_" not in prefixed_name:
|
|
219
|
+
return None
|
|
220
|
+
|
|
221
|
+
# Split on first underscore
|
|
222
|
+
parts = prefixed_name.split("_", 1)
|
|
223
|
+
if len(parts) != 2:
|
|
224
|
+
return None
|
|
225
|
+
|
|
226
|
+
app_slug = parts[0]
|
|
227
|
+
# Basic validation - app slug should be non-empty
|
|
228
|
+
if app_slug:
|
|
229
|
+
return app_slug
|
|
230
|
+
return None
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class _SecureCollectionProxy:
|
|
234
|
+
"""
|
|
235
|
+
Proxy wrapper that blocks access to dangerous attributes on collections.
|
|
236
|
+
|
|
237
|
+
Prevents access to database/client attributes that could be used to bypass scoping.
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
__slots__ = ("_collection",)
|
|
241
|
+
|
|
242
|
+
def __init__(self, collection: AsyncIOMotorCollection):
|
|
243
|
+
self._collection = collection
|
|
244
|
+
|
|
245
|
+
def __getattr__(self, name: str) -> Any:
|
|
246
|
+
"""Block access to database/client attributes."""
|
|
247
|
+
if name in ("database", "client", "db"):
|
|
248
|
+
logger.warning(
|
|
249
|
+
f"Security: Attempted access to '{name}' attribute on collection. "
|
|
250
|
+
"This is blocked to prevent bypassing scoping."
|
|
251
|
+
)
|
|
252
|
+
raise AttributeError(
|
|
253
|
+
f"Access to '{name}' is blocked for security. "
|
|
254
|
+
"Use collection.index_manager for index operations. "
|
|
255
|
+
"All data access must go through scoped collections."
|
|
256
|
+
)
|
|
257
|
+
return getattr(self._collection, name)
|
|
258
|
+
|
|
259
|
+
def __setattr__(self, name: str, value: Any) -> None:
|
|
260
|
+
"""Allow setting _collection, delegate other attributes to underlying collection."""
|
|
261
|
+
if name == "_collection":
|
|
262
|
+
super().__setattr__(name, value)
|
|
263
|
+
else:
|
|
264
|
+
# Delegate to underlying collection for other attributes
|
|
265
|
+
setattr(self._collection, name, value)
|
|
266
|
+
|
|
267
|
+
|
|
89
268
|
# ##########################################################################
|
|
90
269
|
# ASYNCHRONOUS ATLAS INDEX MANAGER
|
|
91
270
|
# ##########################################################################
|
|
@@ -115,10 +294,11 @@ class AsyncAtlasIndexManager:
|
|
|
115
294
|
Initializes the manager with a direct reference to a
|
|
116
295
|
motor.motor_asyncio.AsyncIOMotorCollection.
|
|
117
296
|
"""
|
|
297
|
+
# Unwrap _SecureCollectionProxy if present to get the real collection
|
|
298
|
+
if isinstance(real_collection, _SecureCollectionProxy):
|
|
299
|
+
real_collection = real_collection._collection
|
|
118
300
|
if not isinstance(real_collection, AsyncIOMotorCollection):
|
|
119
|
-
raise TypeError(
|
|
120
|
-
f"Expected AsyncIOMotorCollection, got {type(real_collection)}"
|
|
121
|
-
)
|
|
301
|
+
raise TypeError(f"Expected AsyncIOMotorCollection, got {type(real_collection)}")
|
|
122
302
|
self._collection = real_collection
|
|
123
303
|
|
|
124
304
|
async def _ensure_collection_exists(self) -> None:
|
|
@@ -134,9 +314,7 @@ class AsyncAtlasIndexManager:
|
|
|
134
314
|
f"Continuing index creation."
|
|
135
315
|
)
|
|
136
316
|
else:
|
|
137
|
-
logger.exception(
|
|
138
|
-
"Failed to ensure collection exists - CollectionInvalid error"
|
|
139
|
-
)
|
|
317
|
+
logger.exception("Failed to ensure collection exists - CollectionInvalid error")
|
|
140
318
|
raise MongoDBEngineError(
|
|
141
319
|
f"Failed to create prerequisite collection '{self._collection.name}'",
|
|
142
320
|
context={"collection_name": self._collection.name},
|
|
@@ -208,9 +386,7 @@ class AsyncAtlasIndexManager:
|
|
|
208
386
|
)
|
|
209
387
|
return False # Will wait below
|
|
210
388
|
elif existing_index.get("queryable"):
|
|
211
|
-
logger.info(
|
|
212
|
-
f"Search index '{name}' is already queryable and definition is up-to-date."
|
|
213
|
-
)
|
|
389
|
+
logger.info(f"Search index '{name}' is already queryable and definition is up-to-date.")
|
|
214
390
|
return True
|
|
215
391
|
elif existing_index.get("status") == "FAILED":
|
|
216
392
|
logger.error(
|
|
@@ -231,22 +407,17 @@ class AsyncAtlasIndexManager:
|
|
|
231
407
|
"""Create a new search index."""
|
|
232
408
|
try:
|
|
233
409
|
logger.info(f"Creating new search index '{name}' of type '{index_type}'...")
|
|
234
|
-
search_index_model = SearchIndexModel(
|
|
235
|
-
definition=definition, name=name, type=index_type
|
|
236
|
-
)
|
|
410
|
+
search_index_model = SearchIndexModel(definition=definition, name=name, type=index_type)
|
|
237
411
|
await self._collection.create_search_index(model=search_index_model)
|
|
238
412
|
logger.info(f"Search index '{name}' build has been submitted.")
|
|
239
413
|
except OperationFailure as e:
|
|
240
414
|
if "IndexAlreadyExists" in str(e) or "DuplicateIndexName" in str(e):
|
|
241
|
-
logger.warning(
|
|
242
|
-
f"Race condition: Index '{name}' was created by another process."
|
|
243
|
-
)
|
|
415
|
+
logger.warning(f"Race condition: Index '{name}' was created by another process.")
|
|
244
416
|
else:
|
|
245
|
-
logger.
|
|
246
|
-
f"OperationFailure during search index creation "
|
|
247
|
-
f"for '{name}': {e.details}"
|
|
417
|
+
logger.exception(
|
|
418
|
+
f"OperationFailure during search index creation " f"for '{name}': {e.details}"
|
|
248
419
|
)
|
|
249
|
-
raise
|
|
420
|
+
raise
|
|
250
421
|
|
|
251
422
|
async def create_search_index(
|
|
252
423
|
self,
|
|
@@ -283,17 +454,13 @@ class AsyncAtlasIndexManager:
|
|
|
283
454
|
return True
|
|
284
455
|
|
|
285
456
|
except OperationFailure as e:
|
|
286
|
-
logger.exception(
|
|
287
|
-
f"OperationFailure during search index creation/check for '{name}'"
|
|
288
|
-
)
|
|
457
|
+
logger.exception(f"OperationFailure during search index creation/check for '{name}'")
|
|
289
458
|
raise MongoDBEngineError(
|
|
290
459
|
f"Failed to create/check search index '{name}'",
|
|
291
460
|
context={"index_name": name, "operation": "create_search_index"},
|
|
292
461
|
) from e
|
|
293
462
|
except (ConnectionFailure, ServerSelectionTimeoutError) as e:
|
|
294
|
-
logger.exception(
|
|
295
|
-
f"Connection error during search index creation/check for '{name}'"
|
|
296
|
-
)
|
|
463
|
+
logger.exception(f"Connection error during search index creation/check for '{name}'")
|
|
297
464
|
raise MongoDBEngineError(
|
|
298
465
|
f"Connection failed while creating/checking search index '{name}'",
|
|
299
466
|
context={"index_name": name, "operation": "create_search_index"},
|
|
@@ -362,9 +529,7 @@ class AsyncAtlasIndexManager:
|
|
|
362
529
|
except OperationFailure as e:
|
|
363
530
|
# Handle race condition where index was already dropped
|
|
364
531
|
if "IndexNotFound" in str(e):
|
|
365
|
-
logger.info(
|
|
366
|
-
f"Search index '{name}' was already deleted (race condition)."
|
|
367
|
-
)
|
|
532
|
+
logger.info(f"Search index '{name}' was already deleted (race condition).")
|
|
368
533
|
return True
|
|
369
534
|
logger.exception(f"OperationFailure dropping search index '{name}'")
|
|
370
535
|
raise MongoDBEngineError(
|
|
@@ -427,19 +592,13 @@ class AsyncAtlasIndexManager:
|
|
|
427
592
|
queryable or fails.
|
|
428
593
|
"""
|
|
429
594
|
start_time = time.time()
|
|
430
|
-
logger.info(
|
|
431
|
-
f"Waiting up to {timeout}s for search index '{name}' to become queryable..."
|
|
432
|
-
)
|
|
595
|
+
logger.info(f"Waiting up to {timeout}s for search index '{name}' to become queryable...")
|
|
433
596
|
|
|
434
597
|
while True:
|
|
435
598
|
elapsed = time.time() - start_time
|
|
436
599
|
if elapsed > timeout:
|
|
437
|
-
logger.error(
|
|
438
|
-
|
|
439
|
-
)
|
|
440
|
-
raise TimeoutError(
|
|
441
|
-
f"Index '{name}' did not become queryable within {timeout}s."
|
|
442
|
-
)
|
|
600
|
+
logger.error(f"Timeout: Index '{name}' did not become queryable within {timeout}s.")
|
|
601
|
+
raise TimeoutError(f"Index '{name}' did not become queryable within {timeout}s.")
|
|
443
602
|
|
|
444
603
|
index_info = None
|
|
445
604
|
try:
|
|
@@ -471,9 +630,7 @@ class AsyncAtlasIndexManager:
|
|
|
471
630
|
queryable = index_info.get("queryable")
|
|
472
631
|
if queryable:
|
|
473
632
|
# Success!
|
|
474
|
-
logger.info(
|
|
475
|
-
f"Search index '{name}' is queryable (Status: {status})."
|
|
476
|
-
)
|
|
633
|
+
logger.info(f"Search index '{name}' is queryable (Status: {status}).")
|
|
477
634
|
return True
|
|
478
635
|
|
|
479
636
|
# Not ready yet, log and wait
|
|
@@ -495,14 +652,10 @@ class AsyncAtlasIndexManager:
|
|
|
495
652
|
Private helper to poll until an index is successfully dropped.
|
|
496
653
|
"""
|
|
497
654
|
start_time = time.time()
|
|
498
|
-
logger.info(
|
|
499
|
-
f"Waiting up to {timeout}s for search index '{name}' to be dropped..."
|
|
500
|
-
)
|
|
655
|
+
logger.info(f"Waiting up to {timeout}s for search index '{name}' to be dropped...")
|
|
501
656
|
while True:
|
|
502
657
|
if time.time() - start_time > timeout:
|
|
503
|
-
logger.error(
|
|
504
|
-
f"Timeout: Index '{name}' was not dropped within {timeout}s."
|
|
505
|
-
)
|
|
658
|
+
logger.error(f"Timeout: Index '{name}' was not dropped within {timeout}s.")
|
|
506
659
|
raise TimeoutError(f"Index '{name}' was not dropped within {timeout}s.")
|
|
507
660
|
|
|
508
661
|
index_info = await self.get_search_index(name)
|
|
@@ -588,9 +741,7 @@ class AsyncAtlasIndexManager:
|
|
|
588
741
|
# Wait for index to be ready (MongoDB indexes are usually immediate, but we verify)
|
|
589
742
|
if wait_for_ready:
|
|
590
743
|
try:
|
|
591
|
-
is_ready = await self._wait_for_regular_index_ready(
|
|
592
|
-
name, timeout=30
|
|
593
|
-
)
|
|
744
|
+
is_ready = await self._wait_for_regular_index_ready(name, timeout=30)
|
|
594
745
|
if not is_ready:
|
|
595
746
|
logger.warning(
|
|
596
747
|
f"Regular index '{name}' may not be fully ready yet, "
|
|
@@ -606,11 +757,7 @@ class AsyncAtlasIndexManager:
|
|
|
606
757
|
return name
|
|
607
758
|
except OperationFailure as e:
|
|
608
759
|
# Handle index build aborted (e.g., database being dropped during teardown)
|
|
609
|
-
if (
|
|
610
|
-
e.code == 276
|
|
611
|
-
or "IndexBuildAborted" in str(e)
|
|
612
|
-
or "dropDatabase" in str(e)
|
|
613
|
-
):
|
|
760
|
+
if e.code == 276 or "IndexBuildAborted" in str(e) or "dropDatabase" in str(e):
|
|
614
761
|
logger.debug(
|
|
615
762
|
f"Skipping regular index creation '{index_name}': "
|
|
616
763
|
f"index build aborted (likely during database drop/teardown): {e}"
|
|
@@ -650,9 +797,7 @@ class AsyncAtlasIndexManager:
|
|
|
650
797
|
kwargs["name"] = name
|
|
651
798
|
return await self.create_index(keys, **kwargs)
|
|
652
799
|
|
|
653
|
-
async def create_geo_index(
|
|
654
|
-
self, field: str, name: Optional[str] = None, **kwargs: Any
|
|
655
|
-
) -> str:
|
|
800
|
+
async def create_geo_index(self, field: str, name: Optional[str] = None, **kwargs: Any) -> str:
|
|
656
801
|
"""Helper to create a standard 2dsphere index."""
|
|
657
802
|
keys = [(field, GEO2DSPHERE)]
|
|
658
803
|
if name:
|
|
@@ -681,9 +826,7 @@ class AsyncAtlasIndexManager:
|
|
|
681
826
|
context={"index_name": name, "operation": "drop_index"},
|
|
682
827
|
) from e
|
|
683
828
|
except InvalidOperation as e:
|
|
684
|
-
logger.debug(
|
|
685
|
-
f"Cannot drop regular index '{name}': MongoDB client is closed"
|
|
686
|
-
)
|
|
829
|
+
logger.debug(f"Cannot drop regular index '{name}': MongoDB client is closed")
|
|
687
830
|
raise MongoDBEngineError(
|
|
688
831
|
f"Cannot drop regular index '{name}': MongoDB client is closed",
|
|
689
832
|
context={"index_name": name, "operation": "drop_index"},
|
|
@@ -698,9 +841,7 @@ class AsyncAtlasIndexManager:
|
|
|
698
841
|
return []
|
|
699
842
|
except InvalidOperation:
|
|
700
843
|
# Client is closed (e.g., during shutdown/teardown)
|
|
701
|
-
logger.debug(
|
|
702
|
-
"Skipping list_indexes: MongoDB client is closed (likely during shutdown)"
|
|
703
|
-
)
|
|
844
|
+
logger.debug("Skipping list_indexes: MongoDB client is closed (likely during shutdown)")
|
|
704
845
|
return []
|
|
705
846
|
|
|
706
847
|
async def get_index(self, name: str) -> Optional[Dict[str, Any]]:
|
|
@@ -774,9 +915,7 @@ class AutoIndexManager:
|
|
|
774
915
|
"_pending_tasks",
|
|
775
916
|
)
|
|
776
917
|
|
|
777
|
-
def __init__(
|
|
778
|
-
self, collection: AsyncIOMotorCollection, index_manager: AsyncAtlasIndexManager
|
|
779
|
-
):
|
|
918
|
+
def __init__(self, collection: AsyncIOMotorCollection, index_manager: AsyncAtlasIndexManager):
|
|
780
919
|
self._collection = collection
|
|
781
920
|
self._index_manager = index_manager
|
|
782
921
|
# Cache of index creation decisions (index_name -> bool)
|
|
@@ -812,8 +951,7 @@ class AutoIndexManager:
|
|
|
812
951
|
if isinstance(value, dict):
|
|
813
952
|
# Handle operators like $gt, $gte, $lt, $lte, $ne, $in, $exists
|
|
814
953
|
if any(
|
|
815
|
-
op in value
|
|
816
|
-
for op in ["$gt", "$gte", "$lt", "$lte", "$ne", "$in", "$exists"]
|
|
954
|
+
op in value for op in ["$gt", "$gte", "$lt", "$lte", "$ne", "$in", "$exists"]
|
|
817
955
|
):
|
|
818
956
|
# These operators benefit from indexes
|
|
819
957
|
index_fields.append((field_name, ASCENDING))
|
|
@@ -888,9 +1026,7 @@ class AutoIndexManager:
|
|
|
888
1026
|
|
|
889
1027
|
# Create the index
|
|
890
1028
|
keys = all_fields
|
|
891
|
-
await self._index_manager.create_index(
|
|
892
|
-
keys, name=index_name, background=True
|
|
893
|
-
)
|
|
1029
|
+
await self._index_manager.create_index(keys, name=index_name, background=True)
|
|
894
1030
|
async with self._lock:
|
|
895
1031
|
self._creation_cache[index_name] = True
|
|
896
1032
|
logger.info(
|
|
@@ -986,9 +1122,7 @@ class AutoIndexManager:
|
|
|
986
1122
|
|
|
987
1123
|
# Create task and track it
|
|
988
1124
|
# Cleanup happens in _create_index_safely's finally block
|
|
989
|
-
task = asyncio.create_task(
|
|
990
|
-
self._create_index_safely(index_name, all_fields)
|
|
991
|
-
)
|
|
1125
|
+
task = asyncio.create_task(self._create_index_safely(index_name, all_fields))
|
|
992
1126
|
self._pending_tasks[index_name] = task
|
|
993
1127
|
|
|
994
1128
|
|
|
@@ -1028,6 +1162,9 @@ class ScopedCollectionWrapper:
|
|
|
1028
1162
|
"_index_manager",
|
|
1029
1163
|
"_auto_index_manager",
|
|
1030
1164
|
"_auto_index_enabled",
|
|
1165
|
+
"_query_validator",
|
|
1166
|
+
"_resource_limiter",
|
|
1167
|
+
"_parent_wrapper",
|
|
1031
1168
|
)
|
|
1032
1169
|
|
|
1033
1170
|
def __init__(
|
|
@@ -1036,6 +1173,9 @@ class ScopedCollectionWrapper:
|
|
|
1036
1173
|
read_scopes: List[str],
|
|
1037
1174
|
write_scope: str,
|
|
1038
1175
|
auto_index: bool = True,
|
|
1176
|
+
query_validator: Optional[QueryValidator] = None,
|
|
1177
|
+
resource_limiter: Optional[ResourceLimiter] = None,
|
|
1178
|
+
parent_wrapper: Optional["ScopedMongoWrapper"] = None,
|
|
1039
1179
|
):
|
|
1040
1180
|
self._collection = real_collection
|
|
1041
1181
|
self._read_scopes = read_scopes
|
|
@@ -1044,6 +1184,11 @@ class ScopedCollectionWrapper:
|
|
|
1044
1184
|
# Lazily instantiated and cached
|
|
1045
1185
|
self._index_manager: Optional[AsyncAtlasIndexManager] = None
|
|
1046
1186
|
self._auto_index_manager: Optional[AutoIndexManager] = None
|
|
1187
|
+
# Query security and resource limits
|
|
1188
|
+
self._query_validator = query_validator or QueryValidator()
|
|
1189
|
+
self._resource_limiter = resource_limiter or ResourceLimiter()
|
|
1190
|
+
# Reference to parent wrapper for token verification
|
|
1191
|
+
self._parent_wrapper = parent_wrapper
|
|
1047
1192
|
|
|
1048
1193
|
@property
|
|
1049
1194
|
def index_manager(self) -> AsyncAtlasIndexManager:
|
|
@@ -1060,7 +1205,9 @@ class ScopedCollectionWrapper:
|
|
|
1060
1205
|
# Create and cache it.
|
|
1061
1206
|
# Pass the *real* collection, not 'self', as indexes
|
|
1062
1207
|
# are not scoped by app_id.
|
|
1063
|
-
|
|
1208
|
+
# Access the real collection directly, bypassing the proxy
|
|
1209
|
+
real_collection = super().__getattribute__("_collection")
|
|
1210
|
+
self._index_manager = AsyncAtlasIndexManager(real_collection)
|
|
1064
1211
|
return self._index_manager
|
|
1065
1212
|
|
|
1066
1213
|
@property
|
|
@@ -1075,15 +1222,52 @@ class ScopedCollectionWrapper:
|
|
|
1075
1222
|
|
|
1076
1223
|
if self._auto_index_manager is None:
|
|
1077
1224
|
# Lazily instantiate auto-index manager
|
|
1225
|
+
# Access the real collection directly, bypassing the proxy
|
|
1226
|
+
real_collection = super().__getattribute__("_collection")
|
|
1078
1227
|
self._auto_index_manager = AutoIndexManager(
|
|
1079
|
-
|
|
1228
|
+
real_collection,
|
|
1080
1229
|
self.index_manager, # This will create index_manager if needed
|
|
1081
1230
|
)
|
|
1082
1231
|
return self._auto_index_manager
|
|
1083
1232
|
|
|
1084
|
-
def
|
|
1085
|
-
|
|
1086
|
-
|
|
1233
|
+
def __getattribute__(self, name: str) -> Any:
|
|
1234
|
+
"""
|
|
1235
|
+
Override to prevent access to dangerous attributes on _collection.
|
|
1236
|
+
|
|
1237
|
+
Blocks access to _collection.database and _collection.client to prevent
|
|
1238
|
+
bypassing scoping.
|
|
1239
|
+
"""
|
|
1240
|
+
# Allow access to our own attributes
|
|
1241
|
+
if name.startswith("_") and name not in (
|
|
1242
|
+
"_collection",
|
|
1243
|
+
"_read_scopes",
|
|
1244
|
+
"_write_scope",
|
|
1245
|
+
"_index_manager",
|
|
1246
|
+
"_auto_index_manager",
|
|
1247
|
+
"_auto_index_enabled",
|
|
1248
|
+
"_query_validator",
|
|
1249
|
+
"_resource_limiter",
|
|
1250
|
+
):
|
|
1251
|
+
return super().__getattribute__(name)
|
|
1252
|
+
|
|
1253
|
+
# If accessing _collection, wrap it to block database/client access
|
|
1254
|
+
if name == "_collection":
|
|
1255
|
+
collection = super().__getattribute__(name)
|
|
1256
|
+
# Return a proxy that blocks dangerous attributes
|
|
1257
|
+
return _SecureCollectionProxy(collection)
|
|
1258
|
+
|
|
1259
|
+
return super().__getattribute__(name)
|
|
1260
|
+
|
|
1261
|
+
def __setattr__(self, name: str, value: Any) -> None:
|
|
1262
|
+
"""Override to prevent modification of _collection."""
|
|
1263
|
+
if name == "_collection" and hasattr(self, "_collection"):
|
|
1264
|
+
raise AttributeError(
|
|
1265
|
+
"Cannot modify '_collection' attribute. "
|
|
1266
|
+
"Collection wrappers are immutable for security."
|
|
1267
|
+
)
|
|
1268
|
+
super().__setattr__(name, value)
|
|
1269
|
+
|
|
1270
|
+
def _inject_read_filter(self, filter: Optional[Mapping[str, Any]] = None) -> Dict[str, Any]:
|
|
1087
1271
|
"""
|
|
1088
1272
|
Combines the user's filter with our mandatory scope filter.
|
|
1089
1273
|
|
|
@@ -1099,9 +1283,7 @@ class ScopedCollectionWrapper:
|
|
|
1099
1283
|
# If filter exists, combine them robustly with $and
|
|
1100
1284
|
return {"$and": [filter, scope_filter]}
|
|
1101
1285
|
|
|
1102
|
-
async def insert_one(
|
|
1103
|
-
self, document: Mapping[str, Any], *args, **kwargs
|
|
1104
|
-
) -> InsertOneResult:
|
|
1286
|
+
async def insert_one(self, document: Mapping[str, Any], *args, **kwargs) -> InsertOneResult:
|
|
1105
1287
|
"""
|
|
1106
1288
|
Injects the app_id before writing.
|
|
1107
1289
|
|
|
@@ -1110,12 +1292,31 @@ class ScopedCollectionWrapper:
|
|
|
1110
1292
|
import time
|
|
1111
1293
|
|
|
1112
1294
|
start_time = time.time()
|
|
1113
|
-
|
|
1295
|
+
# Get collection name safely (may not exist for new collections)
|
|
1296
|
+
try:
|
|
1297
|
+
collection_name = self._collection.name
|
|
1298
|
+
except (AttributeError, TypeError):
|
|
1299
|
+
# Fallback if name is not accessible
|
|
1300
|
+
collection_name = "unknown"
|
|
1114
1301
|
|
|
1115
1302
|
try:
|
|
1303
|
+
# Verify token if needed (lazy verification for async contexts)
|
|
1304
|
+
if self._parent_wrapper:
|
|
1305
|
+
await self._parent_wrapper._verify_token_if_needed()
|
|
1306
|
+
|
|
1307
|
+
# Validate document size before insert
|
|
1308
|
+
self._resource_limiter.validate_document_size(document)
|
|
1309
|
+
|
|
1116
1310
|
# Use dictionary spread to create a non-mutating copy
|
|
1117
1311
|
doc_to_insert = {**document, "app_id": self._write_scope}
|
|
1118
|
-
|
|
1312
|
+
|
|
1313
|
+
# Enforce query timeout
|
|
1314
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1315
|
+
# Remove maxTimeMS - insert_one doesn't accept it
|
|
1316
|
+
kwargs_for_insert = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1317
|
+
|
|
1318
|
+
# Use self._collection.insert_one() - proxy delegates correctly
|
|
1319
|
+
result = await self._collection.insert_one(doc_to_insert, *args, **kwargs_for_insert)
|
|
1119
1320
|
duration_ms = (time.time() - start_time) * 1000
|
|
1120
1321
|
record_operation(
|
|
1121
1322
|
"database.insert_one",
|
|
@@ -1164,8 +1365,17 @@ class ScopedCollectionWrapper:
|
|
|
1164
1365
|
Safety: Uses a list comprehension to create copies of all documents,
|
|
1165
1366
|
avoiding in-place mutation of the original list.
|
|
1166
1367
|
"""
|
|
1368
|
+
# Validate all document sizes before insert
|
|
1369
|
+
self._resource_limiter.validate_documents_size(documents)
|
|
1370
|
+
|
|
1371
|
+
# Enforce query timeout
|
|
1372
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1373
|
+
# Remove maxTimeMS - insert_many doesn't accept it
|
|
1374
|
+
kwargs_for_insert = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1375
|
+
|
|
1167
1376
|
docs_to_insert = [{**doc, "app_id": self._write_scope} for doc in documents]
|
|
1168
|
-
|
|
1377
|
+
# Use self._collection.insert_many() - proxy delegates correctly
|
|
1378
|
+
return await self._collection.insert_many(docs_to_insert, *args, **kwargs_for_insert)
|
|
1169
1379
|
|
|
1170
1380
|
async def find_one(
|
|
1171
1381
|
self, filter: Optional[Mapping[str, Any]] = None, *args, **kwargs
|
|
@@ -1177,20 +1387,36 @@ class ScopedCollectionWrapper:
|
|
|
1177
1387
|
import time
|
|
1178
1388
|
|
|
1179
1389
|
start_time = time.time()
|
|
1180
|
-
|
|
1390
|
+
# Access real collection directly (bypass proxy) for name attribute
|
|
1391
|
+
# Use object.__getattribute__ to bypass our custom __getattribute__ that wraps in proxy
|
|
1392
|
+
real_collection = object.__getattribute__(self, "_collection")
|
|
1393
|
+
collection_name = real_collection.name
|
|
1181
1394
|
|
|
1182
1395
|
try:
|
|
1396
|
+
# Verify token if needed (lazy verification for async contexts)
|
|
1397
|
+
if self._parent_wrapper:
|
|
1398
|
+
await self._parent_wrapper._verify_token_if_needed()
|
|
1399
|
+
|
|
1400
|
+
# Validate query filter for security
|
|
1401
|
+
self._query_validator.validate_filter(filter)
|
|
1402
|
+
self._query_validator.validate_sort(kwargs.get("sort"))
|
|
1403
|
+
|
|
1404
|
+
# Enforce query timeout - but remove maxTimeMS for find_one
|
|
1405
|
+
# because Motor's find_one internally creates a cursor and some versions
|
|
1406
|
+
# don't handle maxTimeMS correctly when passed to find_one
|
|
1407
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1408
|
+
# Remove maxTimeMS to avoid cursor creation errors in find_one
|
|
1409
|
+
kwargs_for_find_one = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1410
|
+
|
|
1183
1411
|
# Magical auto-indexing: ensure indexes exist before querying
|
|
1184
1412
|
# Note: We analyze the user's filter, not the scoped filter, since
|
|
1185
1413
|
# app_id index is always ensured separately
|
|
1186
1414
|
if self.auto_index_manager:
|
|
1187
1415
|
sort = kwargs.get("sort")
|
|
1188
|
-
await self.auto_index_manager.ensure_index_for_query(
|
|
1189
|
-
filter=filter, sort=sort
|
|
1190
|
-
)
|
|
1416
|
+
await self.auto_index_manager.ensure_index_for_query(filter=filter, sort=sort)
|
|
1191
1417
|
|
|
1192
1418
|
scoped_filter = self._inject_read_filter(filter)
|
|
1193
|
-
result = await self._collection.find_one(scoped_filter, *args, **
|
|
1419
|
+
result = await self._collection.find_one(scoped_filter, *args, **kwargs_for_find_one)
|
|
1194
1420
|
duration_ms = (time.time() - start_time) * 1000
|
|
1195
1421
|
record_operation(
|
|
1196
1422
|
"database.find_one",
|
|
@@ -1200,7 +1426,7 @@ class ScopedCollectionWrapper:
|
|
|
1200
1426
|
app_slug=self._write_scope,
|
|
1201
1427
|
)
|
|
1202
1428
|
return result
|
|
1203
|
-
except
|
|
1429
|
+
except (PyMongoError, ValueError, TypeError, KeyError, AttributeError):
|
|
1204
1430
|
duration_ms = (time.time() - start_time) * 1000
|
|
1205
1431
|
record_operation(
|
|
1206
1432
|
"database.find_one",
|
|
@@ -1219,6 +1445,25 @@ class ScopedCollectionWrapper:
|
|
|
1219
1445
|
Returns an async cursor, just like motor.
|
|
1220
1446
|
Automatically ensures appropriate indexes exist for the query.
|
|
1221
1447
|
"""
|
|
1448
|
+
# Validate query filter for security
|
|
1449
|
+
self._query_validator.validate_filter(filter)
|
|
1450
|
+
self._query_validator.validate_sort(kwargs.get("sort"))
|
|
1451
|
+
|
|
1452
|
+
# Enforce result limit
|
|
1453
|
+
limit = kwargs.get("limit")
|
|
1454
|
+
if limit is not None:
|
|
1455
|
+
kwargs["limit"] = self._resource_limiter.enforce_result_limit(limit)
|
|
1456
|
+
|
|
1457
|
+
# Enforce batch size
|
|
1458
|
+
batch_size = kwargs.get("batch_size")
|
|
1459
|
+
if batch_size is not None:
|
|
1460
|
+
kwargs["batch_size"] = self._resource_limiter.enforce_batch_size(batch_size)
|
|
1461
|
+
|
|
1462
|
+
# Enforce query timeout - but remove maxTimeMS before passing to find()
|
|
1463
|
+
# because Cursor constructor doesn't accept maxTimeMS
|
|
1464
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1465
|
+
kwargs_for_find = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1466
|
+
|
|
1222
1467
|
# Magical auto-indexing: ensure indexes exist before querying
|
|
1223
1468
|
# Note: This is fire-and-forget, doesn't block cursor creation
|
|
1224
1469
|
if self.auto_index_manager:
|
|
@@ -1227,23 +1472,20 @@ class ScopedCollectionWrapper:
|
|
|
1227
1472
|
# Create a task to ensure index (fire and forget, managed to prevent accumulation)
|
|
1228
1473
|
async def _safe_index_task():
|
|
1229
1474
|
try:
|
|
1230
|
-
await self.auto_index_manager.ensure_index_for_query(
|
|
1231
|
-
filter=filter, sort=sort
|
|
1232
|
-
)
|
|
1475
|
+
await self.auto_index_manager.ensure_index_for_query(filter=filter, sort=sort)
|
|
1233
1476
|
except (
|
|
1234
1477
|
OperationFailure,
|
|
1235
1478
|
ConnectionFailure,
|
|
1236
1479
|
ServerSelectionTimeoutError,
|
|
1237
1480
|
InvalidOperation,
|
|
1238
1481
|
) as e:
|
|
1239
|
-
logger.debug(
|
|
1240
|
-
|
|
1241
|
-
)
|
|
1482
|
+
logger.debug(f"Auto-index creation failed for query (non-critical): {e}")
|
|
1483
|
+
# Let other exceptions bubble up - they are non-recoverable (Type 4)
|
|
1242
1484
|
|
|
1243
1485
|
_create_managed_task(_safe_index_task(), task_name="auto_index_check")
|
|
1244
1486
|
|
|
1245
1487
|
scoped_filter = self._inject_read_filter(filter)
|
|
1246
|
-
return self._collection.find(scoped_filter, *args, **
|
|
1488
|
+
return self._collection.find(scoped_filter, *args, **kwargs_for_find)
|
|
1247
1489
|
|
|
1248
1490
|
async def update_one(
|
|
1249
1491
|
self, filter: Mapping[str, Any], update: Mapping[str, Any], *args, **kwargs
|
|
@@ -1252,8 +1494,16 @@ class ScopedCollectionWrapper:
|
|
|
1252
1494
|
Applies the read scope to the filter.
|
|
1253
1495
|
Note: This only scopes the *filter*, not the update operation.
|
|
1254
1496
|
"""
|
|
1497
|
+
# Validate query filter for security
|
|
1498
|
+
self._query_validator.validate_filter(filter)
|
|
1499
|
+
|
|
1500
|
+
# Enforce query timeout
|
|
1501
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1502
|
+
# Remove maxTimeMS - update_one doesn't accept it
|
|
1503
|
+
kwargs_for_update = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1504
|
+
|
|
1255
1505
|
scoped_filter = self._inject_read_filter(filter)
|
|
1256
|
-
return await self._collection.update_one(scoped_filter, update, *args, **
|
|
1506
|
+
return await self._collection.update_one(scoped_filter, update, *args, **kwargs_for_update)
|
|
1257
1507
|
|
|
1258
1508
|
async def update_many(
|
|
1259
1509
|
self, filter: Mapping[str, Any], update: Mapping[str, Any], *args, **kwargs
|
|
@@ -1262,24 +1512,42 @@ class ScopedCollectionWrapper:
|
|
|
1262
1512
|
Applies the read scope to the filter.
|
|
1263
1513
|
Note: This only scopes the *filter*, not the update operation.
|
|
1264
1514
|
"""
|
|
1515
|
+
# Validate query filter for security
|
|
1516
|
+
self._query_validator.validate_filter(filter)
|
|
1517
|
+
|
|
1518
|
+
# Enforce query timeout
|
|
1519
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1520
|
+
# Remove maxTimeMS - update_many doesn't accept it
|
|
1521
|
+
kwargs_for_update = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1522
|
+
|
|
1265
1523
|
scoped_filter = self._inject_read_filter(filter)
|
|
1266
|
-
return await self._collection.update_many(
|
|
1267
|
-
scoped_filter, update, *args, **kwargs
|
|
1268
|
-
)
|
|
1524
|
+
return await self._collection.update_many(scoped_filter, update, *args, **kwargs_for_update)
|
|
1269
1525
|
|
|
1270
|
-
async def delete_one(
|
|
1271
|
-
self, filter: Mapping[str, Any], *args, **kwargs
|
|
1272
|
-
) -> DeleteResult:
|
|
1526
|
+
async def delete_one(self, filter: Mapping[str, Any], *args, **kwargs) -> DeleteResult:
|
|
1273
1527
|
"""Applies the read scope to the filter."""
|
|
1528
|
+
# Validate query filter for security
|
|
1529
|
+
self._query_validator.validate_filter(filter)
|
|
1530
|
+
|
|
1531
|
+
# Enforce query timeout
|
|
1532
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1533
|
+
# Remove maxTimeMS - delete_one doesn't accept it
|
|
1534
|
+
kwargs_for_delete = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1535
|
+
|
|
1274
1536
|
scoped_filter = self._inject_read_filter(filter)
|
|
1275
|
-
return await self._collection.delete_one(scoped_filter, *args, **
|
|
1537
|
+
return await self._collection.delete_one(scoped_filter, *args, **kwargs_for_delete)
|
|
1276
1538
|
|
|
1277
|
-
async def delete_many(
|
|
1278
|
-
self, filter: Mapping[str, Any], *args, **kwargs
|
|
1279
|
-
) -> DeleteResult:
|
|
1539
|
+
async def delete_many(self, filter: Mapping[str, Any], *args, **kwargs) -> DeleteResult:
|
|
1280
1540
|
"""Applies the read scope to the filter."""
|
|
1541
|
+
# Validate query filter for security
|
|
1542
|
+
self._query_validator.validate_filter(filter)
|
|
1543
|
+
|
|
1544
|
+
# Enforce query timeout
|
|
1545
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1546
|
+
# Remove maxTimeMS - delete_many doesn't accept it
|
|
1547
|
+
kwargs_for_delete = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1548
|
+
|
|
1281
1549
|
scoped_filter = self._inject_read_filter(filter)
|
|
1282
|
-
return await self._collection.delete_many(scoped_filter, *args, **
|
|
1550
|
+
return await self._collection.delete_many(scoped_filter, *args, **kwargs_for_delete)
|
|
1283
1551
|
|
|
1284
1552
|
async def count_documents(
|
|
1285
1553
|
self, filter: Optional[Mapping[str, Any]] = None, *args, **kwargs
|
|
@@ -1288,22 +1556,34 @@ class ScopedCollectionWrapper:
|
|
|
1288
1556
|
Applies the read scope to the filter for counting.
|
|
1289
1557
|
Automatically ensures appropriate indexes exist for the query.
|
|
1290
1558
|
"""
|
|
1559
|
+
# Validate query filter for security
|
|
1560
|
+
self._query_validator.validate_filter(filter)
|
|
1561
|
+
|
|
1562
|
+
# Note: count_documents doesn't reliably support maxTimeMS in all Motor versions
|
|
1563
|
+
# Remove it to avoid cursor creation errors when auto-indexing triggers list_indexes()
|
|
1564
|
+
kwargs_for_count = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1565
|
+
# Don't enforce timeout for count_documents to avoid issues with cursor operations
|
|
1566
|
+
|
|
1291
1567
|
# Magical auto-indexing: ensure indexes exist before querying
|
|
1292
1568
|
if self.auto_index_manager:
|
|
1293
1569
|
await self.auto_index_manager.ensure_index_for_query(filter=filter)
|
|
1294
1570
|
|
|
1295
1571
|
scoped_filter = self._inject_read_filter(filter)
|
|
1296
|
-
return await self._collection.count_documents(scoped_filter, *args, **
|
|
1572
|
+
return await self._collection.count_documents(scoped_filter, *args, **kwargs_for_count)
|
|
1297
1573
|
|
|
1298
|
-
def aggregate(
|
|
1299
|
-
self, pipeline: List[Dict[str, Any]], *args, **kwargs
|
|
1300
|
-
) -> AsyncIOMotorCursor:
|
|
1574
|
+
def aggregate(self, pipeline: List[Dict[str, Any]], *args, **kwargs) -> AsyncIOMotorCursor:
|
|
1301
1575
|
"""
|
|
1302
1576
|
Injects a scope filter into the pipeline. For normal pipelines, we prepend
|
|
1303
1577
|
a $match stage. However, if the first stage is $vectorSearch, we embed
|
|
1304
1578
|
the read_scope filter into its 'filter' property, because $vectorSearch must
|
|
1305
1579
|
remain the very first stage in Atlas.
|
|
1306
1580
|
"""
|
|
1581
|
+
# Validate aggregation pipeline for security
|
|
1582
|
+
self._query_validator.validate_pipeline(pipeline)
|
|
1583
|
+
|
|
1584
|
+
# Enforce query timeout - Motor's aggregate() accepts maxTimeMS
|
|
1585
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1586
|
+
|
|
1307
1587
|
if not pipeline:
|
|
1308
1588
|
# No stages given, just prepend our $match
|
|
1309
1589
|
scope_match_stage = {"$match": {"app_id": {"$in": self._read_scopes}}}
|
|
@@ -1363,7 +1643,20 @@ class ScopedMongoWrapper:
|
|
|
1363
1643
|
# Lock to prevent race conditions when multiple requests try to create the same index
|
|
1364
1644
|
_app_id_index_lock: ClassVar[asyncio.Lock] = asyncio.Lock()
|
|
1365
1645
|
|
|
1366
|
-
__slots__ = (
|
|
1646
|
+
__slots__ = (
|
|
1647
|
+
"_db",
|
|
1648
|
+
"_read_scopes",
|
|
1649
|
+
"_write_scope",
|
|
1650
|
+
"_wrapper_cache",
|
|
1651
|
+
"_auto_index",
|
|
1652
|
+
"_query_validator",
|
|
1653
|
+
"_resource_limiter",
|
|
1654
|
+
"_app_slug",
|
|
1655
|
+
"_app_token",
|
|
1656
|
+
"_app_secrets_manager",
|
|
1657
|
+
"_token_verified",
|
|
1658
|
+
"_token_verification_lock",
|
|
1659
|
+
)
|
|
1367
1660
|
|
|
1368
1661
|
def __init__(
|
|
1369
1662
|
self,
|
|
@@ -1371,33 +1664,150 @@ class ScopedMongoWrapper:
|
|
|
1371
1664
|
read_scopes: List[str],
|
|
1372
1665
|
write_scope: str,
|
|
1373
1666
|
auto_index: bool = True,
|
|
1667
|
+
query_validator: Optional[QueryValidator] = None,
|
|
1668
|
+
resource_limiter: Optional[ResourceLimiter] = None,
|
|
1669
|
+
app_slug: Optional[str] = None,
|
|
1670
|
+
app_token: Optional[str] = None,
|
|
1671
|
+
app_secrets_manager: Optional["AppSecretsManager"] = None,
|
|
1374
1672
|
):
|
|
1375
1673
|
self._db = real_db
|
|
1376
1674
|
self._read_scopes = read_scopes
|
|
1377
1675
|
self._write_scope = write_scope
|
|
1378
1676
|
self._auto_index = auto_index
|
|
1379
1677
|
|
|
1678
|
+
# Query security and resource limits (shared across all collections)
|
|
1679
|
+
self._query_validator = query_validator or QueryValidator()
|
|
1680
|
+
self._resource_limiter = resource_limiter or ResourceLimiter()
|
|
1681
|
+
|
|
1682
|
+
# Token verification for app authentication
|
|
1683
|
+
self._app_slug = app_slug
|
|
1684
|
+
self._app_token = app_token
|
|
1685
|
+
self._app_secrets_manager = app_secrets_manager
|
|
1686
|
+
self._token_verified = False
|
|
1687
|
+
self._token_verification_lock = asyncio.Lock()
|
|
1688
|
+
|
|
1380
1689
|
# Cache for created collection wrappers.
|
|
1381
1690
|
self._wrapper_cache: Dict[str, ScopedCollectionWrapper] = {}
|
|
1382
1691
|
|
|
1383
|
-
|
|
1384
|
-
def database(self) -> AsyncIOMotorDatabase:
|
|
1692
|
+
async def _verify_token_if_needed(self) -> None:
|
|
1385
1693
|
"""
|
|
1386
|
-
|
|
1694
|
+
Verify app token lazily on first database operation.
|
|
1387
1695
|
|
|
1388
|
-
This
|
|
1389
|
-
|
|
1696
|
+
This method ensures token verification happens even when get_scoped_db()
|
|
1697
|
+
is called from an async context where sync verification was skipped.
|
|
1390
1698
|
|
|
1391
|
-
|
|
1392
|
-
|
|
1699
|
+
Raises:
|
|
1700
|
+
ValueError: If token verification fails
|
|
1701
|
+
"""
|
|
1702
|
+
# If already verified, skip
|
|
1703
|
+
if self._token_verified:
|
|
1704
|
+
return
|
|
1393
1705
|
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1706
|
+
# If no token or secrets manager, skip verification
|
|
1707
|
+
if not self._app_token or not self._app_secrets_manager or not self._app_slug:
|
|
1708
|
+
self._token_verified = True
|
|
1709
|
+
return
|
|
1710
|
+
|
|
1711
|
+
# Use lock to prevent race conditions
|
|
1712
|
+
async with self._token_verification_lock:
|
|
1713
|
+
# Double-check after acquiring lock
|
|
1714
|
+
if self._token_verified:
|
|
1715
|
+
return
|
|
1716
|
+
|
|
1717
|
+
# Verify token
|
|
1718
|
+
is_valid = await self._app_secrets_manager.verify_app_secret(
|
|
1719
|
+
self._app_slug, self._app_token
|
|
1720
|
+
)
|
|
1721
|
+
|
|
1722
|
+
if not is_valid:
|
|
1723
|
+
logger.warning(f"Security: Invalid app token for '{self._app_slug}'")
|
|
1724
|
+
raise ValueError("Invalid app token")
|
|
1725
|
+
|
|
1726
|
+
# Mark as verified
|
|
1727
|
+
self._token_verified = True
|
|
1728
|
+
logger.debug(f"Token verified for app '{self._app_slug}'")
|
|
1729
|
+
|
|
1730
|
+
def _validate_cross_app_access(self, prefixed_name: str) -> None:
|
|
1731
|
+
"""
|
|
1732
|
+
Validate that cross-app collection access is authorized.
|
|
1733
|
+
|
|
1734
|
+
Args:
|
|
1735
|
+
prefixed_name: Prefixed collection name (e.g., "other_app_collection")
|
|
1736
|
+
|
|
1737
|
+
Raises:
|
|
1738
|
+
ValueError: If cross-app access is not authorized
|
|
1739
|
+
"""
|
|
1740
|
+
# Extract app slug from prefixed name
|
|
1741
|
+
target_app = _extract_app_slug_from_prefixed_name(prefixed_name)
|
|
1742
|
+
if target_app is None:
|
|
1743
|
+
return # Same-app access or not a valid prefixed name
|
|
1744
|
+
|
|
1745
|
+
# Check if target app is in read_scopes
|
|
1746
|
+
if target_app not in self._read_scopes:
|
|
1747
|
+
logger.warning(
|
|
1748
|
+
f"Security: Unauthorized cross-app access attempt. "
|
|
1749
|
+
f"Collection: '{prefixed_name}', Target app: '{target_app}', "
|
|
1750
|
+
f"Read scopes: {self._read_scopes}, Write scope: {self._write_scope}"
|
|
1751
|
+
)
|
|
1752
|
+
raise ValueError(
|
|
1753
|
+
f"Access to collection '{prefixed_name}' not authorized. "
|
|
1754
|
+
f"App '{target_app}' is not in read_scopes {self._read_scopes}. "
|
|
1755
|
+
"Cross-app access must be explicitly granted via read_scopes."
|
|
1756
|
+
)
|
|
1757
|
+
|
|
1758
|
+
# Log authorized cross-app access for audit trail
|
|
1759
|
+
logger.info(
|
|
1760
|
+
f"Cross-app access authorized. "
|
|
1761
|
+
f"Collection: '{prefixed_name}', From app: '{self._write_scope}', "
|
|
1762
|
+
f"To app: '{target_app}'"
|
|
1763
|
+
)
|
|
1764
|
+
|
|
1765
|
+
def __getattribute__(self, name: str) -> Any:
|
|
1766
|
+
"""
|
|
1767
|
+
Override to validate collection names before attribute access.
|
|
1768
|
+
This ensures validation happens even if MagicMock creates attributes dynamically.
|
|
1399
1769
|
"""
|
|
1400
|
-
|
|
1770
|
+
# Handle our own attributes first (use super() to avoid recursion)
|
|
1771
|
+
if name.startswith("_") or name in ("get_collection",):
|
|
1772
|
+
return super().__getattribute__(name)
|
|
1773
|
+
|
|
1774
|
+
# Validate collection name for security BEFORE checking if attribute exists
|
|
1775
|
+
# This ensures ValueError is raised even if MagicMock would create the attribute
|
|
1776
|
+
validation_error = None
|
|
1777
|
+
if not name.startswith("_"):
|
|
1778
|
+
try:
|
|
1779
|
+
_validate_collection_name(name, allow_prefixed=False)
|
|
1780
|
+
except ValueError as e:
|
|
1781
|
+
# Log the warning without accessing object attributes to avoid recursion
|
|
1782
|
+
# The validation error itself is what matters, not the logging details
|
|
1783
|
+
try:
|
|
1784
|
+
logger.warning(
|
|
1785
|
+
f"Security: Invalid collection name attempted. "
|
|
1786
|
+
f"Name: '{name}', Error: {e}"
|
|
1787
|
+
)
|
|
1788
|
+
except (AttributeError, RuntimeError):
|
|
1789
|
+
# If logging fails due to logger issues, continue -
|
|
1790
|
+
# validation error is what matters
|
|
1791
|
+
# Type 2: Recoverable - we can continue without logging
|
|
1792
|
+
pass
|
|
1793
|
+
# Store the error to raise after checking attribute existence
|
|
1794
|
+
# This ensures we raise ValueError even if MagicMock creates the attribute
|
|
1795
|
+
validation_error = ValueError(str(e))
|
|
1796
|
+
|
|
1797
|
+
# Continue with normal attribute access
|
|
1798
|
+
try:
|
|
1799
|
+
attr = super().__getattribute__(name)
|
|
1800
|
+
# If validation failed, raise ValueError now (even if attribute exists)
|
|
1801
|
+
if validation_error is not None:
|
|
1802
|
+
raise validation_error
|
|
1803
|
+
return attr
|
|
1804
|
+
except AttributeError:
|
|
1805
|
+
# Attribute doesn't exist
|
|
1806
|
+
# If validation failed, raise ValueError (from None: unrelated to AttributeError)
|
|
1807
|
+
if validation_error is not None:
|
|
1808
|
+
raise validation_error from None
|
|
1809
|
+
# Delegate to __getattr__ for collection creation
|
|
1810
|
+
return self.__getattr__(name)
|
|
1401
1811
|
|
|
1402
1812
|
def __getattr__(self, name: str) -> ScopedCollectionWrapper:
|
|
1403
1813
|
"""
|
|
@@ -1406,6 +1816,17 @@ class ScopedMongoWrapper:
|
|
|
1406
1816
|
If `name` is a collection, returns a `ScopedCollectionWrapper`.
|
|
1407
1817
|
"""
|
|
1408
1818
|
|
|
1819
|
+
# Explicitly block access to 'database' property (removed for security)
|
|
1820
|
+
if name == "database":
|
|
1821
|
+
logger.warning(
|
|
1822
|
+
f"Security: Attempted access to 'database' property. " f"App: {self._write_scope}"
|
|
1823
|
+
)
|
|
1824
|
+
raise AttributeError(
|
|
1825
|
+
"'database' property has been removed for security. "
|
|
1826
|
+
"Use collection.index_manager for index operations. "
|
|
1827
|
+
"All data access must go through scoped collections."
|
|
1828
|
+
)
|
|
1829
|
+
|
|
1409
1830
|
# Prevent proxying private/special attributes
|
|
1410
1831
|
if name.startswith("_"):
|
|
1411
1832
|
raise AttributeError(
|
|
@@ -1413,11 +1834,33 @@ class ScopedMongoWrapper:
|
|
|
1413
1834
|
"Access to private attributes is blocked."
|
|
1414
1835
|
)
|
|
1415
1836
|
|
|
1837
|
+
# Note: Validation already happened in __getattribute__, but we validate again
|
|
1838
|
+
# for safety in case __getattr__ is called directly
|
|
1839
|
+
try:
|
|
1840
|
+
_validate_collection_name(name, allow_prefixed=False)
|
|
1841
|
+
except ValueError as e:
|
|
1842
|
+
logger.warning(
|
|
1843
|
+
f"Security: Invalid collection name attempted. "
|
|
1844
|
+
f"Name: '{name}', App: {self._write_scope}, Error: {e}"
|
|
1845
|
+
)
|
|
1846
|
+
raise
|
|
1847
|
+
|
|
1416
1848
|
# Construct the prefixed collection name, e.g., "data_imaging_workouts"
|
|
1417
1849
|
# `self._write_scope` holds the slug (e.g., "data_imaging")
|
|
1418
1850
|
# `name` holds the base name (e.g., "workouts")
|
|
1419
1851
|
prefixed_name = f"{self._write_scope}_{name}"
|
|
1420
1852
|
|
|
1853
|
+
# Validate prefixed name as well (for reserved names check)
|
|
1854
|
+
try:
|
|
1855
|
+
_validate_collection_name(prefixed_name, allow_prefixed=True)
|
|
1856
|
+
except ValueError as e:
|
|
1857
|
+
logger.warning(
|
|
1858
|
+
f"Security: Invalid prefixed collection name. "
|
|
1859
|
+
f"Base name: '{name}', Prefixed: '{prefixed_name}', "
|
|
1860
|
+
f"App: {self._write_scope}, Error: {e}"
|
|
1861
|
+
)
|
|
1862
|
+
raise
|
|
1863
|
+
|
|
1421
1864
|
# Check cache first using the *prefixed_name*
|
|
1422
1865
|
if prefixed_name in self._wrapper_cache:
|
|
1423
1866
|
return self._wrapper_cache[prefixed_name]
|
|
@@ -1439,6 +1882,8 @@ class ScopedMongoWrapper:
|
|
|
1439
1882
|
read_scopes=self._read_scopes,
|
|
1440
1883
|
write_scope=self._write_scope,
|
|
1441
1884
|
auto_index=self._auto_index,
|
|
1885
|
+
query_validator=self._query_validator,
|
|
1886
|
+
resource_limiter=self._resource_limiter,
|
|
1442
1887
|
)
|
|
1443
1888
|
|
|
1444
1889
|
# Magically ensure app_id index exists (it's always used in queries)
|
|
@@ -1476,17 +1921,13 @@ class ScopedMongoWrapper:
|
|
|
1476
1921
|
f"connection is closed (likely during shutdown)"
|
|
1477
1922
|
)
|
|
1478
1923
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1479
|
-
ScopedMongoWrapper._app_id_index_cache.pop(
|
|
1480
|
-
collection_name, None
|
|
1481
|
-
)
|
|
1924
|
+
ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
|
|
1482
1925
|
return
|
|
1483
1926
|
|
|
1484
1927
|
has_index = await self._ensure_app_id_index(real_collection)
|
|
1485
1928
|
# Update cache with result (inside lock for thread-safety)
|
|
1486
1929
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1487
|
-
ScopedMongoWrapper._app_id_index_cache[collection_name] =
|
|
1488
|
-
has_index
|
|
1489
|
-
)
|
|
1930
|
+
ScopedMongoWrapper._app_id_index_cache[collection_name] = has_index
|
|
1490
1931
|
except (
|
|
1491
1932
|
ConnectionFailure,
|
|
1492
1933
|
ServerSelectionTimeoutError,
|
|
@@ -1499,30 +1940,82 @@ class ScopedMongoWrapper:
|
|
|
1499
1940
|
)
|
|
1500
1941
|
# Remove from cache on error so we can retry later
|
|
1501
1942
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1502
|
-
ScopedMongoWrapper._app_id_index_cache.pop(
|
|
1503
|
-
collection_name, None
|
|
1504
|
-
)
|
|
1943
|
+
ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
|
|
1505
1944
|
except OperationFailure as e:
|
|
1506
1945
|
# Index creation failed for other reasons (non-critical)
|
|
1507
1946
|
logger.debug(f"App_id index creation failed (non-critical): {e}")
|
|
1508
1947
|
# Remove from cache on error so we can retry later
|
|
1509
1948
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1510
|
-
ScopedMongoWrapper._app_id_index_cache.pop(
|
|
1511
|
-
|
|
1512
|
-
)
|
|
1949
|
+
ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
|
|
1950
|
+
# Let other exceptions bubble up - they are non-recoverable (Type 4)
|
|
1513
1951
|
|
|
1514
1952
|
# Check cache first (quick check before lock)
|
|
1515
1953
|
if collection_name not in ScopedMongoWrapper._app_id_index_cache:
|
|
1516
1954
|
# Fire and forget - task will check lock internally
|
|
1517
1955
|
# (managed to prevent accumulation)
|
|
1518
|
-
_create_managed_task(
|
|
1519
|
-
_safe_app_id_index_check(), task_name="app_id_index_check"
|
|
1520
|
-
)
|
|
1956
|
+
_create_managed_task(_safe_app_id_index_check(), task_name="app_id_index_check")
|
|
1521
1957
|
|
|
1522
1958
|
# Store it in the cache for this instance using the *prefixed_name*
|
|
1523
1959
|
self._wrapper_cache[prefixed_name] = wrapper
|
|
1524
1960
|
return wrapper
|
|
1525
1961
|
|
|
1962
|
+
def _find_matched_app_for_collection(self, name: str) -> str | None:
|
|
1963
|
+
"""
|
|
1964
|
+
Check if collection name matches any app slug in read_scopes (cross-app access).
|
|
1965
|
+
|
|
1966
|
+
Args:
|
|
1967
|
+
name: Collection name to check
|
|
1968
|
+
|
|
1969
|
+
Returns:
|
|
1970
|
+
Matched app slug if found, None otherwise
|
|
1971
|
+
"""
|
|
1972
|
+
if "_" not in name:
|
|
1973
|
+
return None
|
|
1974
|
+
|
|
1975
|
+
# Check if any app slug in read_scopes matches the beginning of the name
|
|
1976
|
+
for app_slug in self._read_scopes:
|
|
1977
|
+
if name.startswith(f"{app_slug}_") and app_slug != self._write_scope:
|
|
1978
|
+
return app_slug
|
|
1979
|
+
return None
|
|
1980
|
+
|
|
1981
|
+
def _resolve_prefixed_collection_name(self, name: str, matched_app: str | None) -> str:
|
|
1982
|
+
"""
|
|
1983
|
+
Resolve the prefixed collection name based on matched app or write scope.
|
|
1984
|
+
|
|
1985
|
+
Args:
|
|
1986
|
+
name: Collection name (base or prefixed)
|
|
1987
|
+
matched_app: Matched app slug if cross-app access, None otherwise
|
|
1988
|
+
|
|
1989
|
+
Returns:
|
|
1990
|
+
Prefixed collection name
|
|
1991
|
+
|
|
1992
|
+
Raises:
|
|
1993
|
+
ValueError: If prefixed name is invalid
|
|
1994
|
+
"""
|
|
1995
|
+
if matched_app:
|
|
1996
|
+
# This is authorized cross-app access
|
|
1997
|
+
prefixed_name = name
|
|
1998
|
+
# Log authorized cross-app access for audit trail
|
|
1999
|
+
logger.info(
|
|
2000
|
+
f"Cross-app access authorized. "
|
|
2001
|
+
f"Collection: '{prefixed_name}', From app: '{self._write_scope}', "
|
|
2002
|
+
f"To app: '{matched_app}'"
|
|
2003
|
+
)
|
|
2004
|
+
else:
|
|
2005
|
+
# Regular collection name - prefix with write_scope
|
|
2006
|
+
prefixed_name = f"{self._write_scope}_{name}"
|
|
2007
|
+
# Validate prefixed name
|
|
2008
|
+
try:
|
|
2009
|
+
_validate_collection_name(prefixed_name, allow_prefixed=True)
|
|
2010
|
+
except ValueError as e:
|
|
2011
|
+
logger.warning(
|
|
2012
|
+
f"Security: Invalid prefixed collection name in get_collection(). "
|
|
2013
|
+
f"Base name: '{name}', Prefixed: '{prefixed_name}', "
|
|
2014
|
+
f"App: {self._write_scope}, Error: {e}"
|
|
2015
|
+
)
|
|
2016
|
+
raise
|
|
2017
|
+
return prefixed_name
|
|
2018
|
+
|
|
1526
2019
|
def get_collection(self, name: str) -> ScopedCollectionWrapper:
|
|
1527
2020
|
"""
|
|
1528
2021
|
Get a collection by name (Motor-like API).
|
|
@@ -1539,6 +2032,9 @@ class ScopedMongoWrapper:
|
|
|
1539
2032
|
Returns:
|
|
1540
2033
|
ScopedCollectionWrapper instance
|
|
1541
2034
|
|
|
2035
|
+
Raises:
|
|
2036
|
+
ValueError: If collection name is invalid or cross-app access is not authorized
|
|
2037
|
+
|
|
1542
2038
|
Example:
|
|
1543
2039
|
# Same-app collection (base name)
|
|
1544
2040
|
collection = db.get_collection("my_collection")
|
|
@@ -1546,15 +2042,21 @@ class ScopedMongoWrapper:
|
|
|
1546
2042
|
# Cross-app collection (fully prefixed)
|
|
1547
2043
|
collection = db.get_collection("click_tracker_clicks")
|
|
1548
2044
|
"""
|
|
1549
|
-
#
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
2045
|
+
# Validate collection name for security
|
|
2046
|
+
try:
|
|
2047
|
+
_validate_collection_name(name, allow_prefixed=True)
|
|
2048
|
+
except ValueError as e:
|
|
2049
|
+
logger.warning(
|
|
2050
|
+
f"Security: Invalid collection name in get_collection(). "
|
|
2051
|
+
f"Name: '{name}', App: {self._write_scope}, Error: {e}"
|
|
2052
|
+
)
|
|
2053
|
+
raise
|
|
2054
|
+
|
|
2055
|
+
# Check if name is already fully prefixed (cross-app access)
|
|
2056
|
+
matched_app = self._find_matched_app_for_collection(name)
|
|
2057
|
+
|
|
2058
|
+
# Resolve prefixed name based on matched app or write scope
|
|
2059
|
+
prefixed_name = self._resolve_prefixed_collection_name(name, matched_app)
|
|
1558
2060
|
|
|
1559
2061
|
# Check cache first
|
|
1560
2062
|
if prefixed_name in self._wrapper_cache:
|
|
@@ -1576,6 +2078,9 @@ class ScopedMongoWrapper:
|
|
|
1576
2078
|
read_scopes=self._read_scopes,
|
|
1577
2079
|
write_scope=self._write_scope,
|
|
1578
2080
|
auto_index=self._auto_index,
|
|
2081
|
+
query_validator=self._query_validator,
|
|
2082
|
+
resource_limiter=self._resource_limiter,
|
|
2083
|
+
parent_wrapper=self,
|
|
1579
2084
|
)
|
|
1580
2085
|
|
|
1581
2086
|
# Magically ensure app_id index exists (background task)
|
|
@@ -1607,16 +2112,12 @@ class ScopedMongoWrapper:
|
|
|
1607
2112
|
f"connection is closed (likely during shutdown)"
|
|
1608
2113
|
)
|
|
1609
2114
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1610
|
-
ScopedMongoWrapper._app_id_index_cache.pop(
|
|
1611
|
-
collection_name, None
|
|
1612
|
-
)
|
|
2115
|
+
ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
|
|
1613
2116
|
return
|
|
1614
2117
|
|
|
1615
2118
|
has_index = await self._ensure_app_id_index(real_collection)
|
|
1616
2119
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1617
|
-
ScopedMongoWrapper._app_id_index_cache[collection_name] =
|
|
1618
|
-
has_index
|
|
1619
|
-
)
|
|
2120
|
+
ScopedMongoWrapper._app_id_index_cache[collection_name] = has_index
|
|
1620
2121
|
except (
|
|
1621
2122
|
ConnectionFailure,
|
|
1622
2123
|
ServerSelectionTimeoutError,
|
|
@@ -1628,27 +2129,53 @@ class ScopedMongoWrapper:
|
|
|
1628
2129
|
f"connection error (likely during shutdown): {e}"
|
|
1629
2130
|
)
|
|
1630
2131
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1631
|
-
ScopedMongoWrapper._app_id_index_cache.pop(
|
|
1632
|
-
collection_name, None
|
|
1633
|
-
)
|
|
2132
|
+
ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
|
|
1634
2133
|
except OperationFailure as e:
|
|
1635
2134
|
# Index creation failed for other reasons (non-critical)
|
|
1636
2135
|
logger.debug(f"App_id index creation failed (non-critical): {e}")
|
|
1637
2136
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1638
|
-
ScopedMongoWrapper._app_id_index_cache.pop(
|
|
1639
|
-
|
|
1640
|
-
)
|
|
2137
|
+
ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
|
|
2138
|
+
# Let other exceptions bubble up - they are non-recoverable (Type 4)
|
|
1641
2139
|
|
|
1642
2140
|
if collection_name not in ScopedMongoWrapper._app_id_index_cache:
|
|
1643
2141
|
# Use managed task creation to prevent accumulation
|
|
1644
|
-
_create_managed_task(
|
|
1645
|
-
_safe_app_id_index_check(), task_name="app_id_index_check"
|
|
1646
|
-
)
|
|
2142
|
+
_create_managed_task(_safe_app_id_index_check(), task_name="app_id_index_check")
|
|
1647
2143
|
|
|
1648
2144
|
# Store it in the cache
|
|
1649
2145
|
self._wrapper_cache[prefixed_name] = wrapper
|
|
1650
2146
|
return wrapper
|
|
1651
2147
|
|
|
2148
|
+
def __getitem__(self, name: str) -> ScopedCollectionWrapper:
|
|
2149
|
+
"""
|
|
2150
|
+
Support bracket notation for collection access (e.g., db["collection_name"]).
|
|
2151
|
+
|
|
2152
|
+
This allows compatibility with code that uses bracket notation instead of
|
|
2153
|
+
attribute access (e.g., TokenBlacklist, SessionManager).
|
|
2154
|
+
|
|
2155
|
+
Args:
|
|
2156
|
+
name: Collection name (base name, will be prefixed with write_scope)
|
|
2157
|
+
|
|
2158
|
+
Returns:
|
|
2159
|
+
ScopedCollectionWrapper instance
|
|
2160
|
+
|
|
2161
|
+
Raises:
|
|
2162
|
+
ValueError: If collection name is invalid
|
|
2163
|
+
|
|
2164
|
+
Example:
|
|
2165
|
+
collection = db["my_collection"] # Same as db.my_collection
|
|
2166
|
+
"""
|
|
2167
|
+
# Validate collection name for security (get_collection will do additional validation)
|
|
2168
|
+
try:
|
|
2169
|
+
_validate_collection_name(name, allow_prefixed=False)
|
|
2170
|
+
except ValueError as e:
|
|
2171
|
+
logger.warning(
|
|
2172
|
+
f"Security: Invalid collection name in __getitem__(). "
|
|
2173
|
+
f"Name: '{name}', App: {self._write_scope}, Error: {e}"
|
|
2174
|
+
)
|
|
2175
|
+
raise
|
|
2176
|
+
|
|
2177
|
+
return self.get_collection(name)
|
|
2178
|
+
|
|
1652
2179
|
async def _ensure_app_id_index(self, collection: AsyncIOMotorCollection) -> bool:
|
|
1653
2180
|
"""
|
|
1654
2181
|
Ensures app_id index exists on collection.
|
|
@@ -1680,11 +2207,7 @@ class ScopedMongoWrapper:
|
|
|
1680
2207
|
return True
|
|
1681
2208
|
except OperationFailure as e:
|
|
1682
2209
|
# Handle index build aborted (e.g., database being dropped during teardown)
|
|
1683
|
-
if (
|
|
1684
|
-
e.code == 276
|
|
1685
|
-
or "IndexBuildAborted" in str(e)
|
|
1686
|
-
or "dropDatabase" in str(e)
|
|
1687
|
-
):
|
|
2210
|
+
if e.code == 276 or "IndexBuildAborted" in str(e) or "dropDatabase" in str(e):
|
|
1688
2211
|
logger.debug(
|
|
1689
2212
|
f"Skipping app_id index creation on {collection.name}: "
|
|
1690
2213
|
f"index build aborted (likely during database drop/teardown): {e}"
|
|
@@ -1694,19 +2217,13 @@ class ScopedMongoWrapper:
|
|
|
1694
2217
|
return True
|
|
1695
2218
|
except OperationFailure as e:
|
|
1696
2219
|
# Handle index build aborted (e.g., database being dropped during teardown)
|
|
1697
|
-
if (
|
|
1698
|
-
e.code == 276
|
|
1699
|
-
or "IndexBuildAborted" in str(e)
|
|
1700
|
-
or "dropDatabase" in str(e)
|
|
1701
|
-
):
|
|
2220
|
+
if e.code == 276 or "IndexBuildAborted" in str(e) or "dropDatabase" in str(e):
|
|
1702
2221
|
logger.debug(
|
|
1703
2222
|
f"Skipping app_id index creation on {collection.name}: "
|
|
1704
2223
|
f"index build aborted (likely during database drop/teardown): {e}"
|
|
1705
2224
|
)
|
|
1706
2225
|
return False
|
|
1707
|
-
logger.debug(
|
|
1708
|
-
f"OperationFailure ensuring app_id index on {collection.name}: {e}"
|
|
1709
|
-
)
|
|
2226
|
+
logger.debug(f"OperationFailure ensuring app_id index on {collection.name}: {e}")
|
|
1710
2227
|
return False
|
|
1711
2228
|
except (ConnectionFailure, ServerSelectionTimeoutError, InvalidOperation) as e:
|
|
1712
2229
|
# Handle connection errors gracefully (e.g., during shutdown)
|