mdb-engine 0.1.6__py3-none-any.whl → 0.4.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdb_engine/__init__.py +116 -11
- mdb_engine/auth/ARCHITECTURE.md +112 -0
- mdb_engine/auth/README.md +654 -11
- mdb_engine/auth/__init__.py +136 -29
- mdb_engine/auth/audit.py +592 -0
- mdb_engine/auth/base.py +252 -0
- mdb_engine/auth/casbin_factory.py +265 -70
- mdb_engine/auth/config_defaults.py +5 -5
- mdb_engine/auth/config_helpers.py +19 -18
- mdb_engine/auth/cookie_utils.py +12 -16
- mdb_engine/auth/csrf.py +483 -0
- mdb_engine/auth/decorators.py +10 -16
- mdb_engine/auth/dependencies.py +69 -71
- mdb_engine/auth/helpers.py +3 -3
- mdb_engine/auth/integration.py +61 -88
- mdb_engine/auth/jwt.py +11 -15
- mdb_engine/auth/middleware.py +79 -35
- mdb_engine/auth/oso_factory.py +21 -41
- mdb_engine/auth/provider.py +270 -171
- mdb_engine/auth/rate_limiter.py +505 -0
- mdb_engine/auth/restrictions.py +21 -36
- mdb_engine/auth/session_manager.py +24 -41
- mdb_engine/auth/shared_middleware.py +977 -0
- mdb_engine/auth/shared_users.py +775 -0
- mdb_engine/auth/token_lifecycle.py +10 -12
- mdb_engine/auth/token_store.py +17 -32
- mdb_engine/auth/users.py +99 -159
- mdb_engine/auth/utils.py +236 -42
- mdb_engine/cli/commands/generate.py +546 -10
- mdb_engine/cli/commands/validate.py +3 -7
- mdb_engine/cli/utils.py +7 -7
- mdb_engine/config.py +13 -28
- mdb_engine/constants.py +65 -0
- mdb_engine/core/README.md +117 -6
- mdb_engine/core/__init__.py +39 -7
- mdb_engine/core/app_registration.py +31 -50
- mdb_engine/core/app_secrets.py +289 -0
- mdb_engine/core/connection.py +20 -12
- mdb_engine/core/encryption.py +222 -0
- mdb_engine/core/engine.py +2862 -115
- mdb_engine/core/index_management.py +12 -16
- mdb_engine/core/manifest.py +628 -204
- mdb_engine/core/ray_integration.py +436 -0
- mdb_engine/core/seeding.py +13 -21
- mdb_engine/core/service_initialization.py +20 -30
- mdb_engine/core/types.py +40 -43
- mdb_engine/database/README.md +140 -17
- mdb_engine/database/__init__.py +17 -6
- mdb_engine/database/abstraction.py +37 -50
- mdb_engine/database/connection.py +51 -30
- mdb_engine/database/query_validator.py +367 -0
- mdb_engine/database/resource_limiter.py +204 -0
- mdb_engine/database/scoped_wrapper.py +747 -237
- mdb_engine/dependencies.py +427 -0
- mdb_engine/di/__init__.py +34 -0
- mdb_engine/di/container.py +247 -0
- mdb_engine/di/providers.py +206 -0
- mdb_engine/di/scopes.py +139 -0
- mdb_engine/embeddings/README.md +54 -24
- mdb_engine/embeddings/__init__.py +31 -24
- mdb_engine/embeddings/dependencies.py +38 -155
- mdb_engine/embeddings/service.py +78 -75
- mdb_engine/exceptions.py +104 -12
- mdb_engine/indexes/README.md +30 -13
- mdb_engine/indexes/__init__.py +1 -0
- mdb_engine/indexes/helpers.py +11 -11
- mdb_engine/indexes/manager.py +59 -123
- mdb_engine/memory/README.md +95 -4
- mdb_engine/memory/__init__.py +1 -2
- mdb_engine/memory/service.py +363 -1168
- mdb_engine/observability/README.md +4 -2
- mdb_engine/observability/__init__.py +26 -9
- mdb_engine/observability/health.py +17 -17
- mdb_engine/observability/logging.py +10 -10
- mdb_engine/observability/metrics.py +40 -19
- mdb_engine/repositories/__init__.py +34 -0
- mdb_engine/repositories/base.py +325 -0
- mdb_engine/repositories/mongo.py +233 -0
- mdb_engine/repositories/unit_of_work.py +166 -0
- mdb_engine/routing/README.md +1 -1
- mdb_engine/routing/__init__.py +1 -3
- mdb_engine/routing/websockets.py +41 -75
- mdb_engine/utils/__init__.py +3 -1
- mdb_engine/utils/mongo.py +117 -0
- mdb_engine-0.4.12.dist-info/METADATA +492 -0
- mdb_engine-0.4.12.dist-info/RECORD +97 -0
- {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/WHEEL +1 -1
- mdb_engine-0.1.6.dist-info/METADATA +0 -213
- mdb_engine-0.1.6.dist-info/RECORD +0 -75
- {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/entry_points.txt +0 -0
- {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/licenses/LICENSE +0 -0
- {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/top_level.txt +0 -0
|
@@ -26,27 +26,60 @@ a familiar (Motor-like) developer experience with automatic index optimization.
|
|
|
26
26
|
|
|
27
27
|
import asyncio
|
|
28
28
|
import logging
|
|
29
|
+
import re
|
|
29
30
|
import time
|
|
30
|
-
from
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
31
|
+
from collections.abc import Coroutine, Mapping
|
|
32
|
+
from typing import (
|
|
33
|
+
TYPE_CHECKING,
|
|
34
|
+
Any,
|
|
35
|
+
ClassVar,
|
|
36
|
+
Optional,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
if TYPE_CHECKING:
|
|
40
|
+
from ..core.app_secrets import AppSecretsManager
|
|
41
|
+
|
|
42
|
+
from motor.motor_asyncio import (
|
|
43
|
+
AsyncIOMotorCollection,
|
|
44
|
+
AsyncIOMotorCursor,
|
|
45
|
+
AsyncIOMotorDatabase,
|
|
46
|
+
)
|
|
35
47
|
from pymongo import ASCENDING, DESCENDING, TEXT
|
|
36
|
-
from pymongo.errors import (
|
|
37
|
-
|
|
38
|
-
|
|
48
|
+
from pymongo.errors import (
|
|
49
|
+
AutoReconnect,
|
|
50
|
+
CollectionInvalid,
|
|
51
|
+
ConnectionFailure,
|
|
52
|
+
InvalidOperation,
|
|
53
|
+
OperationFailure,
|
|
54
|
+
PyMongoError,
|
|
55
|
+
ServerSelectionTimeoutError,
|
|
56
|
+
)
|
|
39
57
|
from pymongo.operations import SearchIndexModel
|
|
40
|
-
from pymongo.results import (
|
|
41
|
-
|
|
58
|
+
from pymongo.results import (
|
|
59
|
+
DeleteResult,
|
|
60
|
+
InsertManyResult,
|
|
61
|
+
InsertOneResult,
|
|
62
|
+
UpdateResult,
|
|
63
|
+
)
|
|
42
64
|
|
|
43
65
|
# Import constants
|
|
44
|
-
from ..constants import (
|
|
45
|
-
|
|
46
|
-
|
|
66
|
+
from ..constants import (
|
|
67
|
+
AUTO_INDEX_HINT_THRESHOLD,
|
|
68
|
+
DEFAULT_DROP_TIMEOUT,
|
|
69
|
+
DEFAULT_POLL_INTERVAL,
|
|
70
|
+
DEFAULT_SEARCH_TIMEOUT,
|
|
71
|
+
MAX_COLLECTION_NAME_LENGTH,
|
|
72
|
+
MAX_INDEX_FIELDS,
|
|
73
|
+
MIN_COLLECTION_NAME_LENGTH,
|
|
74
|
+
RESERVED_COLLECTION_NAMES,
|
|
75
|
+
RESERVED_COLLECTION_PREFIXES,
|
|
76
|
+
)
|
|
47
77
|
from ..exceptions import MongoDBEngineError
|
|
78
|
+
|
|
48
79
|
# Import observability
|
|
49
80
|
from ..observability import record_operation
|
|
81
|
+
from .query_validator import QueryValidator
|
|
82
|
+
from .resource_limiter import ResourceLimiter
|
|
50
83
|
|
|
51
84
|
# --- FIX: Configure logger *before* first use ---
|
|
52
85
|
logger = logging.getLogger(__name__)
|
|
@@ -60,9 +93,7 @@ GEO2DSPHERE = "2dsphere"
|
|
|
60
93
|
|
|
61
94
|
|
|
62
95
|
# --- HELPER FUNCTION FOR MANAGED TASK CREATION ---
|
|
63
|
-
def _create_managed_task(
|
|
64
|
-
coro: Coroutine[Any, Any, Any], task_name: Optional[str] = None
|
|
65
|
-
) -> None:
|
|
96
|
+
def _create_managed_task(coro: Coroutine[Any, Any, Any], task_name: str | None = None) -> None:
|
|
66
97
|
"""
|
|
67
98
|
Creates a background task using asyncio.create_task().
|
|
68
99
|
|
|
@@ -86,6 +117,149 @@ def _create_managed_task(
|
|
|
86
117
|
# --- END HELPER FUNCTION ---
|
|
87
118
|
|
|
88
119
|
|
|
120
|
+
# ##########################################################################
|
|
121
|
+
# SECURITY VALIDATION FUNCTIONS
|
|
122
|
+
# ##########################################################################
|
|
123
|
+
|
|
124
|
+
# Collection name pattern: alphanumeric, underscore, dot, hyphen
|
|
125
|
+
# Must start with alphanumeric or underscore
|
|
126
|
+
# MongoDB allows: [a-zA-Z0-9_.-] but cannot start with number or special char
|
|
127
|
+
COLLECTION_NAME_PATTERN: re.Pattern = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_.-]*$")
|
|
128
|
+
"""Regex pattern for valid MongoDB collection names."""
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _validate_collection_name(name: str, allow_prefixed: bool = False) -> None:
|
|
132
|
+
"""
|
|
133
|
+
Validate collection name for security.
|
|
134
|
+
|
|
135
|
+
Validates that collection names:
|
|
136
|
+
- Meet MongoDB naming requirements
|
|
137
|
+
- Are not reserved system names
|
|
138
|
+
- Do not use reserved prefixes
|
|
139
|
+
- Are within length limits
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
name: Collection name to validate
|
|
143
|
+
allow_prefixed: If True, allows prefixed names (e.g., "app_collection")
|
|
144
|
+
for cross-app access validation
|
|
145
|
+
|
|
146
|
+
Raises:
|
|
147
|
+
ValueError: If collection name is invalid, reserved, or uses reserved prefix
|
|
148
|
+
"""
|
|
149
|
+
if not name:
|
|
150
|
+
raise ValueError("Collection name cannot be empty")
|
|
151
|
+
|
|
152
|
+
# Check length
|
|
153
|
+
if len(name) < MIN_COLLECTION_NAME_LENGTH:
|
|
154
|
+
raise ValueError(
|
|
155
|
+
f"Collection name too short (minimum {MIN_COLLECTION_NAME_LENGTH} character): {name}"
|
|
156
|
+
)
|
|
157
|
+
if len(name) > MAX_COLLECTION_NAME_LENGTH:
|
|
158
|
+
raise ValueError(
|
|
159
|
+
f"Collection name too long (maximum {MAX_COLLECTION_NAME_LENGTH} characters): {name}"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Check pattern (MongoDB naming rules)
|
|
163
|
+
if not COLLECTION_NAME_PATTERN.match(name):
|
|
164
|
+
raise ValueError(
|
|
165
|
+
f"Invalid collection name format: '{name}'. "
|
|
166
|
+
"Collection names must start with a letter or underscore and "
|
|
167
|
+
"contain only alphanumeric characters, underscores, dots, or hyphens."
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# MongoDB doesn't allow collection names to end with a dot
|
|
171
|
+
if name.endswith("."):
|
|
172
|
+
raise ValueError(
|
|
173
|
+
f"Invalid collection name format: '{name}'. " "Collection names cannot end with a dot."
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Check for path traversal attempts
|
|
177
|
+
if ".." in name or "/" in name or "\\" in name:
|
|
178
|
+
raise ValueError(
|
|
179
|
+
f"Invalid collection name format: '{name}'. "
|
|
180
|
+
f"Collection names must start with a letter or underscore and contain "
|
|
181
|
+
f"only alphanumeric characters, underscores, dots, or hyphens."
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Check reserved names (exact match)
|
|
185
|
+
if name in RESERVED_COLLECTION_NAMES:
|
|
186
|
+
logger.warning(f"Security: Attempted access to reserved collection name: {name}")
|
|
187
|
+
raise ValueError(
|
|
188
|
+
f"Collection name '{name}' is reserved and cannot be accessed through scoped database."
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Check reserved prefixes
|
|
192
|
+
name_lower = name.lower()
|
|
193
|
+
for prefix in RESERVED_COLLECTION_PREFIXES:
|
|
194
|
+
if name_lower.startswith(prefix):
|
|
195
|
+
logger.warning(
|
|
196
|
+
f"Security: Attempted access to collection with reserved prefix '{prefix}': {name}"
|
|
197
|
+
)
|
|
198
|
+
raise ValueError(
|
|
199
|
+
f"Collection name '{name}' uses reserved prefix '{prefix}' and cannot be accessed."
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _extract_app_slug_from_prefixed_name(prefixed_name: str) -> str | None:
|
|
204
|
+
"""
|
|
205
|
+
Extract app slug from a prefixed collection name.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
prefixed_name: Collection name that may be prefixed (e.g., "app_slug_collection")
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
App slug if name is prefixed, None otherwise
|
|
212
|
+
"""
|
|
213
|
+
if "_" not in prefixed_name:
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
# Split on first underscore
|
|
217
|
+
parts = prefixed_name.split("_", 1)
|
|
218
|
+
if len(parts) != 2:
|
|
219
|
+
return None
|
|
220
|
+
|
|
221
|
+
app_slug = parts[0]
|
|
222
|
+
# Basic validation - app slug should be non-empty
|
|
223
|
+
if app_slug:
|
|
224
|
+
return app_slug
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class _SecureCollectionProxy:
|
|
229
|
+
"""
|
|
230
|
+
Proxy wrapper that blocks access to dangerous attributes on collections.
|
|
231
|
+
|
|
232
|
+
Prevents access to database/client attributes that could be used to bypass scoping.
|
|
233
|
+
"""
|
|
234
|
+
|
|
235
|
+
__slots__ = ("_collection",)
|
|
236
|
+
|
|
237
|
+
def __init__(self, collection: AsyncIOMotorCollection):
|
|
238
|
+
self._collection = collection
|
|
239
|
+
|
|
240
|
+
def __getattr__(self, name: str) -> Any:
|
|
241
|
+
"""Block access to database/client attributes."""
|
|
242
|
+
if name in ("database", "client", "db"):
|
|
243
|
+
logger.warning(
|
|
244
|
+
f"Security: Attempted access to '{name}' attribute on collection. "
|
|
245
|
+
"This is blocked to prevent bypassing scoping."
|
|
246
|
+
)
|
|
247
|
+
raise AttributeError(
|
|
248
|
+
f"Access to '{name}' is blocked for security. "
|
|
249
|
+
"Use collection.index_manager for index operations. "
|
|
250
|
+
"All data access must go through scoped collections."
|
|
251
|
+
)
|
|
252
|
+
return getattr(self._collection, name)
|
|
253
|
+
|
|
254
|
+
def __setattr__(self, name: str, value: Any) -> None:
|
|
255
|
+
"""Allow setting _collection, delegate other attributes to underlying collection."""
|
|
256
|
+
if name == "_collection":
|
|
257
|
+
super().__setattr__(name, value)
|
|
258
|
+
else:
|
|
259
|
+
# Delegate to underlying collection for other attributes
|
|
260
|
+
setattr(self._collection, name, value)
|
|
261
|
+
|
|
262
|
+
|
|
89
263
|
# ##########################################################################
|
|
90
264
|
# ASYNCHRONOUS ATLAS INDEX MANAGER
|
|
91
265
|
# ##########################################################################
|
|
@@ -115,10 +289,11 @@ class AsyncAtlasIndexManager:
|
|
|
115
289
|
Initializes the manager with a direct reference to a
|
|
116
290
|
motor.motor_asyncio.AsyncIOMotorCollection.
|
|
117
291
|
"""
|
|
292
|
+
# Unwrap _SecureCollectionProxy if present to get the real collection
|
|
293
|
+
if isinstance(real_collection, _SecureCollectionProxy):
|
|
294
|
+
real_collection = real_collection._collection
|
|
118
295
|
if not isinstance(real_collection, AsyncIOMotorCollection):
|
|
119
|
-
raise TypeError(
|
|
120
|
-
f"Expected AsyncIOMotorCollection, got {type(real_collection)}"
|
|
121
|
-
)
|
|
296
|
+
raise TypeError(f"Expected AsyncIOMotorCollection, got {type(real_collection)}")
|
|
122
297
|
self._collection = real_collection
|
|
123
298
|
|
|
124
299
|
async def _ensure_collection_exists(self) -> None:
|
|
@@ -134,9 +309,7 @@ class AsyncAtlasIndexManager:
|
|
|
134
309
|
f"Continuing index creation."
|
|
135
310
|
)
|
|
136
311
|
else:
|
|
137
|
-
logger.exception(
|
|
138
|
-
"Failed to ensure collection exists - CollectionInvalid error"
|
|
139
|
-
)
|
|
312
|
+
logger.exception("Failed to ensure collection exists - CollectionInvalid error")
|
|
140
313
|
raise MongoDBEngineError(
|
|
141
314
|
f"Failed to create prerequisite collection '{self._collection.name}'",
|
|
142
315
|
context={"collection_name": self._collection.name},
|
|
@@ -157,11 +330,11 @@ class AsyncAtlasIndexManager:
|
|
|
157
330
|
|
|
158
331
|
def _check_definition_changed(
|
|
159
332
|
self,
|
|
160
|
-
definition:
|
|
161
|
-
latest_def:
|
|
333
|
+
definition: dict[str, Any],
|
|
334
|
+
latest_def: dict[str, Any],
|
|
162
335
|
index_type: str,
|
|
163
336
|
name: str,
|
|
164
|
-
) ->
|
|
337
|
+
) -> tuple[bool, str]:
|
|
165
338
|
"""Check if index definition has changed."""
|
|
166
339
|
definition_changed = False
|
|
167
340
|
change_reason = ""
|
|
@@ -184,8 +357,8 @@ class AsyncAtlasIndexManager:
|
|
|
184
357
|
|
|
185
358
|
async def _handle_existing_index(
|
|
186
359
|
self,
|
|
187
|
-
existing_index:
|
|
188
|
-
definition:
|
|
360
|
+
existing_index: dict[str, Any],
|
|
361
|
+
definition: dict[str, Any],
|
|
189
362
|
index_type: str,
|
|
190
363
|
name: str,
|
|
191
364
|
) -> bool:
|
|
@@ -208,9 +381,7 @@ class AsyncAtlasIndexManager:
|
|
|
208
381
|
)
|
|
209
382
|
return False # Will wait below
|
|
210
383
|
elif existing_index.get("queryable"):
|
|
211
|
-
logger.info(
|
|
212
|
-
f"Search index '{name}' is already queryable and definition is up-to-date."
|
|
213
|
-
)
|
|
384
|
+
logger.info(f"Search index '{name}' is already queryable and definition is up-to-date.")
|
|
214
385
|
return True
|
|
215
386
|
elif existing_index.get("status") == "FAILED":
|
|
216
387
|
logger.error(
|
|
@@ -226,32 +397,27 @@ class AsyncAtlasIndexManager:
|
|
|
226
397
|
return False # Will wait below
|
|
227
398
|
|
|
228
399
|
async def _create_new_search_index(
|
|
229
|
-
self, name: str, definition:
|
|
400
|
+
self, name: str, definition: dict[str, Any], index_type: str
|
|
230
401
|
) -> None:
|
|
231
402
|
"""Create a new search index."""
|
|
232
403
|
try:
|
|
233
404
|
logger.info(f"Creating new search index '{name}' of type '{index_type}'...")
|
|
234
|
-
search_index_model = SearchIndexModel(
|
|
235
|
-
definition=definition, name=name, type=index_type
|
|
236
|
-
)
|
|
405
|
+
search_index_model = SearchIndexModel(definition=definition, name=name, type=index_type)
|
|
237
406
|
await self._collection.create_search_index(model=search_index_model)
|
|
238
407
|
logger.info(f"Search index '{name}' build has been submitted.")
|
|
239
408
|
except OperationFailure as e:
|
|
240
409
|
if "IndexAlreadyExists" in str(e) or "DuplicateIndexName" in str(e):
|
|
241
|
-
logger.warning(
|
|
242
|
-
f"Race condition: Index '{name}' was created by another process."
|
|
243
|
-
)
|
|
410
|
+
logger.warning(f"Race condition: Index '{name}' was created by another process.")
|
|
244
411
|
else:
|
|
245
|
-
logger.
|
|
246
|
-
f"OperationFailure during search index creation "
|
|
247
|
-
f"for '{name}': {e.details}"
|
|
412
|
+
logger.exception(
|
|
413
|
+
f"OperationFailure during search index creation " f"for '{name}': {e.details}"
|
|
248
414
|
)
|
|
249
|
-
raise
|
|
415
|
+
raise
|
|
250
416
|
|
|
251
417
|
async def create_search_index(
|
|
252
418
|
self,
|
|
253
419
|
name: str,
|
|
254
|
-
definition:
|
|
420
|
+
definition: dict[str, Any],
|
|
255
421
|
index_type: str = "search",
|
|
256
422
|
wait_for_ready: bool = True,
|
|
257
423
|
timeout: int = DEFAULT_SEARCH_TIMEOUT,
|
|
@@ -283,17 +449,13 @@ class AsyncAtlasIndexManager:
|
|
|
283
449
|
return True
|
|
284
450
|
|
|
285
451
|
except OperationFailure as e:
|
|
286
|
-
logger.exception(
|
|
287
|
-
f"OperationFailure during search index creation/check for '{name}'"
|
|
288
|
-
)
|
|
452
|
+
logger.exception(f"OperationFailure during search index creation/check for '{name}'")
|
|
289
453
|
raise MongoDBEngineError(
|
|
290
454
|
f"Failed to create/check search index '{name}'",
|
|
291
455
|
context={"index_name": name, "operation": "create_search_index"},
|
|
292
456
|
) from e
|
|
293
457
|
except (ConnectionFailure, ServerSelectionTimeoutError) as e:
|
|
294
|
-
logger.exception(
|
|
295
|
-
f"Connection error during search index creation/check for '{name}'"
|
|
296
|
-
)
|
|
458
|
+
logger.exception(f"Connection error during search index creation/check for '{name}'")
|
|
297
459
|
raise MongoDBEngineError(
|
|
298
460
|
f"Connection failed while creating/checking search index '{name}'",
|
|
299
461
|
context={"index_name": name, "operation": "create_search_index"},
|
|
@@ -305,7 +467,7 @@ class AsyncAtlasIndexManager:
|
|
|
305
467
|
context={"index_name": name, "operation": "create_search_index"},
|
|
306
468
|
) from e
|
|
307
469
|
|
|
308
|
-
async def get_search_index(self, name: str) ->
|
|
470
|
+
async def get_search_index(self, name: str) -> dict[str, Any] | None:
|
|
309
471
|
"""
|
|
310
472
|
Retrieves the definition and status of a single search index by name
|
|
311
473
|
using the $listSearchIndexes aggregation stage.
|
|
@@ -329,7 +491,7 @@ class AsyncAtlasIndexManager:
|
|
|
329
491
|
context={"index_name": name, "operation": "get_search_index"},
|
|
330
492
|
) from e
|
|
331
493
|
|
|
332
|
-
async def list_search_indexes(self) ->
|
|
494
|
+
async def list_search_indexes(self) -> list[dict[str, Any]]:
|
|
333
495
|
"""Lists all Atlas Search indexes for the collection."""
|
|
334
496
|
try:
|
|
335
497
|
return await self._collection.list_search_indexes().to_list(None)
|
|
@@ -362,9 +524,7 @@ class AsyncAtlasIndexManager:
|
|
|
362
524
|
except OperationFailure as e:
|
|
363
525
|
# Handle race condition where index was already dropped
|
|
364
526
|
if "IndexNotFound" in str(e):
|
|
365
|
-
logger.info(
|
|
366
|
-
f"Search index '{name}' was already deleted (race condition)."
|
|
367
|
-
)
|
|
527
|
+
logger.info(f"Search index '{name}' was already deleted (race condition).")
|
|
368
528
|
return True
|
|
369
529
|
logger.exception(f"OperationFailure dropping search index '{name}'")
|
|
370
530
|
raise MongoDBEngineError(
|
|
@@ -387,7 +547,7 @@ class AsyncAtlasIndexManager:
|
|
|
387
547
|
async def update_search_index(
|
|
388
548
|
self,
|
|
389
549
|
name: str,
|
|
390
|
-
definition:
|
|
550
|
+
definition: dict[str, Any],
|
|
391
551
|
wait_for_ready: bool = True,
|
|
392
552
|
timeout: int = DEFAULT_SEARCH_TIMEOUT,
|
|
393
553
|
) -> bool:
|
|
@@ -427,19 +587,13 @@ class AsyncAtlasIndexManager:
|
|
|
427
587
|
queryable or fails.
|
|
428
588
|
"""
|
|
429
589
|
start_time = time.time()
|
|
430
|
-
logger.info(
|
|
431
|
-
f"Waiting up to {timeout}s for search index '{name}' to become queryable..."
|
|
432
|
-
)
|
|
590
|
+
logger.info(f"Waiting up to {timeout}s for search index '{name}' to become queryable...")
|
|
433
591
|
|
|
434
592
|
while True:
|
|
435
593
|
elapsed = time.time() - start_time
|
|
436
594
|
if elapsed > timeout:
|
|
437
|
-
logger.error(
|
|
438
|
-
|
|
439
|
-
)
|
|
440
|
-
raise TimeoutError(
|
|
441
|
-
f"Index '{name}' did not become queryable within {timeout}s."
|
|
442
|
-
)
|
|
595
|
+
logger.error(f"Timeout: Index '{name}' did not become queryable within {timeout}s.")
|
|
596
|
+
raise TimeoutError(f"Index '{name}' did not become queryable within {timeout}s.")
|
|
443
597
|
|
|
444
598
|
index_info = None
|
|
445
599
|
try:
|
|
@@ -471,9 +625,7 @@ class AsyncAtlasIndexManager:
|
|
|
471
625
|
queryable = index_info.get("queryable")
|
|
472
626
|
if queryable:
|
|
473
627
|
# Success!
|
|
474
|
-
logger.info(
|
|
475
|
-
f"Search index '{name}' is queryable (Status: {status})."
|
|
476
|
-
)
|
|
628
|
+
logger.info(f"Search index '{name}' is queryable (Status: {status}).")
|
|
477
629
|
return True
|
|
478
630
|
|
|
479
631
|
# Not ready yet, log and wait
|
|
@@ -495,14 +647,10 @@ class AsyncAtlasIndexManager:
|
|
|
495
647
|
Private helper to poll until an index is successfully dropped.
|
|
496
648
|
"""
|
|
497
649
|
start_time = time.time()
|
|
498
|
-
logger.info(
|
|
499
|
-
f"Waiting up to {timeout}s for search index '{name}' to be dropped..."
|
|
500
|
-
)
|
|
650
|
+
logger.info(f"Waiting up to {timeout}s for search index '{name}' to be dropped...")
|
|
501
651
|
while True:
|
|
502
652
|
if time.time() - start_time > timeout:
|
|
503
|
-
logger.error(
|
|
504
|
-
f"Timeout: Index '{name}' was not dropped within {timeout}s."
|
|
505
|
-
)
|
|
653
|
+
logger.error(f"Timeout: Index '{name}' was not dropped within {timeout}s.")
|
|
506
654
|
raise TimeoutError(f"Index '{name}' was not dropped within {timeout}s.")
|
|
507
655
|
|
|
508
656
|
index_info = await self.get_search_index(name)
|
|
@@ -522,7 +670,7 @@ class AsyncAtlasIndexManager:
|
|
|
522
670
|
# consistent async API with the search index methods.
|
|
523
671
|
|
|
524
672
|
async def create_index( # noqa: C901
|
|
525
|
-
self, keys:
|
|
673
|
+
self, keys: str | list[tuple[str, int | str]], **kwargs: Any
|
|
526
674
|
) -> str:
|
|
527
675
|
"""
|
|
528
676
|
Creates a standard (non-search) database index.
|
|
@@ -588,9 +736,7 @@ class AsyncAtlasIndexManager:
|
|
|
588
736
|
# Wait for index to be ready (MongoDB indexes are usually immediate, but we verify)
|
|
589
737
|
if wait_for_ready:
|
|
590
738
|
try:
|
|
591
|
-
is_ready = await self._wait_for_regular_index_ready(
|
|
592
|
-
name, timeout=30
|
|
593
|
-
)
|
|
739
|
+
is_ready = await self._wait_for_regular_index_ready(name, timeout=30)
|
|
594
740
|
if not is_ready:
|
|
595
741
|
logger.warning(
|
|
596
742
|
f"Regular index '{name}' may not be fully ready yet, "
|
|
@@ -606,11 +752,7 @@ class AsyncAtlasIndexManager:
|
|
|
606
752
|
return name
|
|
607
753
|
except OperationFailure as e:
|
|
608
754
|
# Handle index build aborted (e.g., database being dropped during teardown)
|
|
609
|
-
if (
|
|
610
|
-
e.code == 276
|
|
611
|
-
or "IndexBuildAborted" in str(e)
|
|
612
|
-
or "dropDatabase" in str(e)
|
|
613
|
-
):
|
|
755
|
+
if e.code == 276 or "IndexBuildAborted" in str(e) or "dropDatabase" in str(e):
|
|
614
756
|
logger.debug(
|
|
615
757
|
f"Skipping regular index creation '{index_name}': "
|
|
616
758
|
f"index build aborted (likely during database drop/teardown): {e}"
|
|
@@ -637,8 +779,8 @@ class AsyncAtlasIndexManager:
|
|
|
637
779
|
|
|
638
780
|
async def create_text_index(
|
|
639
781
|
self,
|
|
640
|
-
fields:
|
|
641
|
-
weights:
|
|
782
|
+
fields: list[str],
|
|
783
|
+
weights: dict[str, int] | None = None,
|
|
642
784
|
name: str = "text_index",
|
|
643
785
|
**kwargs: Any,
|
|
644
786
|
) -> str:
|
|
@@ -650,9 +792,7 @@ class AsyncAtlasIndexManager:
|
|
|
650
792
|
kwargs["name"] = name
|
|
651
793
|
return await self.create_index(keys, **kwargs)
|
|
652
794
|
|
|
653
|
-
async def create_geo_index(
|
|
654
|
-
self, field: str, name: Optional[str] = None, **kwargs: Any
|
|
655
|
-
) -> str:
|
|
795
|
+
async def create_geo_index(self, field: str, name: str | None = None, **kwargs: Any) -> str:
|
|
656
796
|
"""Helper to create a standard 2dsphere index."""
|
|
657
797
|
keys = [(field, GEO2DSPHERE)]
|
|
658
798
|
if name:
|
|
@@ -681,15 +821,13 @@ class AsyncAtlasIndexManager:
|
|
|
681
821
|
context={"index_name": name, "operation": "drop_index"},
|
|
682
822
|
) from e
|
|
683
823
|
except InvalidOperation as e:
|
|
684
|
-
logger.debug(
|
|
685
|
-
f"Cannot drop regular index '{name}': MongoDB client is closed"
|
|
686
|
-
)
|
|
824
|
+
logger.debug(f"Cannot drop regular index '{name}': MongoDB client is closed")
|
|
687
825
|
raise MongoDBEngineError(
|
|
688
826
|
f"Cannot drop regular index '{name}': MongoDB client is closed",
|
|
689
827
|
context={"index_name": name, "operation": "drop_index"},
|
|
690
828
|
) from e
|
|
691
829
|
|
|
692
|
-
async def list_indexes(self) ->
|
|
830
|
+
async def list_indexes(self) -> list[dict[str, Any]]:
|
|
693
831
|
"""Lists all standard (non-search) indexes on the collection."""
|
|
694
832
|
try:
|
|
695
833
|
return await self._collection.list_indexes().to_list(None)
|
|
@@ -698,12 +836,10 @@ class AsyncAtlasIndexManager:
|
|
|
698
836
|
return []
|
|
699
837
|
except InvalidOperation:
|
|
700
838
|
# Client is closed (e.g., during shutdown/teardown)
|
|
701
|
-
logger.debug(
|
|
702
|
-
"Skipping list_indexes: MongoDB client is closed (likely during shutdown)"
|
|
703
|
-
)
|
|
839
|
+
logger.debug("Skipping list_indexes: MongoDB client is closed (likely during shutdown)")
|
|
704
840
|
return []
|
|
705
841
|
|
|
706
|
-
async def get_index(self, name: str) ->
|
|
842
|
+
async def get_index(self, name: str) -> dict[str, Any] | None:
|
|
707
843
|
"""Gets a single standard index by name."""
|
|
708
844
|
indexes = await self.list_indexes()
|
|
709
845
|
return next((index for index in indexes if index.get("name") == name), None)
|
|
@@ -774,23 +910,21 @@ class AutoIndexManager:
|
|
|
774
910
|
"_pending_tasks",
|
|
775
911
|
)
|
|
776
912
|
|
|
777
|
-
def __init__(
|
|
778
|
-
self, collection: AsyncIOMotorCollection, index_manager: AsyncAtlasIndexManager
|
|
779
|
-
):
|
|
913
|
+
def __init__(self, collection: AsyncIOMotorCollection, index_manager: AsyncAtlasIndexManager):
|
|
780
914
|
self._collection = collection
|
|
781
915
|
self._index_manager = index_manager
|
|
782
916
|
# Cache of index creation decisions (index_name -> bool)
|
|
783
|
-
self._creation_cache:
|
|
917
|
+
self._creation_cache: dict[str, bool] = {}
|
|
784
918
|
# Async lock to prevent race conditions during index creation
|
|
785
919
|
self._lock = asyncio.Lock()
|
|
786
920
|
# Track query patterns to determine which indexes to create
|
|
787
|
-
self._query_counts:
|
|
921
|
+
self._query_counts: dict[str, int] = {}
|
|
788
922
|
# Track in-flight index creation tasks to prevent duplicates
|
|
789
|
-
self._pending_tasks:
|
|
923
|
+
self._pending_tasks: dict[str, asyncio.Task] = {}
|
|
790
924
|
|
|
791
925
|
def _extract_index_fields_from_filter(
|
|
792
|
-
self, filter:
|
|
793
|
-
) ->
|
|
926
|
+
self, filter: Mapping[str, Any] | None
|
|
927
|
+
) -> list[tuple[str, int]]:
|
|
794
928
|
"""
|
|
795
929
|
Extracts potential index fields from a MongoDB query filter.
|
|
796
930
|
|
|
@@ -805,15 +939,14 @@ class AutoIndexManager:
|
|
|
805
939
|
if not filter:
|
|
806
940
|
return []
|
|
807
941
|
|
|
808
|
-
index_fields:
|
|
942
|
+
index_fields: list[tuple[str, int]] = []
|
|
809
943
|
|
|
810
944
|
def analyze_value(value: Any, field_name: str) -> None:
|
|
811
945
|
"""Recursively analyze filter values to extract index candidates."""
|
|
812
946
|
if isinstance(value, dict):
|
|
813
947
|
# Handle operators like $gt, $gte, $lt, $lte, $ne, $in, $exists
|
|
814
948
|
if any(
|
|
815
|
-
op in value
|
|
816
|
-
for op in ["$gt", "$gte", "$lt", "$lte", "$ne", "$in", "$exists"]
|
|
949
|
+
op in value for op in ["$gt", "$gte", "$lt", "$lte", "$ne", "$in", "$exists"]
|
|
817
950
|
):
|
|
818
951
|
# These operators benefit from indexes
|
|
819
952
|
index_fields.append((field_name, ASCENDING))
|
|
@@ -838,8 +971,8 @@ class AutoIndexManager:
|
|
|
838
971
|
return list(set(index_fields)) # Remove duplicates
|
|
839
972
|
|
|
840
973
|
def _extract_sort_fields(
|
|
841
|
-
self, sort:
|
|
842
|
-
) ->
|
|
974
|
+
self, sort: list[tuple[str, int]] | dict[str, int] | None
|
|
975
|
+
) -> list[tuple[str, int]]:
|
|
843
976
|
"""
|
|
844
977
|
Extracts index fields from sort specification.
|
|
845
978
|
|
|
@@ -855,7 +988,7 @@ class AutoIndexManager:
|
|
|
855
988
|
else:
|
|
856
989
|
return []
|
|
857
990
|
|
|
858
|
-
def _generate_index_name(self, fields:
|
|
991
|
+
def _generate_index_name(self, fields: list[tuple[str, int]]) -> str:
|
|
859
992
|
"""Generate a human-readable index name from field list."""
|
|
860
993
|
if not fields:
|
|
861
994
|
return "auto_idx_empty"
|
|
@@ -868,7 +1001,7 @@ class AutoIndexManager:
|
|
|
868
1001
|
return f"auto_{'_'.join(parts)}"
|
|
869
1002
|
|
|
870
1003
|
async def _create_index_safely(
|
|
871
|
-
self, index_name: str, all_fields:
|
|
1004
|
+
self, index_name: str, all_fields: list[tuple[str, int]]
|
|
872
1005
|
) -> None:
|
|
873
1006
|
"""
|
|
874
1007
|
Safely create an index, handling errors gracefully.
|
|
@@ -888,9 +1021,7 @@ class AutoIndexManager:
|
|
|
888
1021
|
|
|
889
1022
|
# Create the index
|
|
890
1023
|
keys = all_fields
|
|
891
|
-
await self._index_manager.create_index(
|
|
892
|
-
keys, name=index_name, background=True
|
|
893
|
-
)
|
|
1024
|
+
await self._index_manager.create_index(keys, name=index_name, background=True)
|
|
894
1025
|
async with self._lock:
|
|
895
1026
|
self._creation_cache[index_name] = True
|
|
896
1027
|
logger.info(
|
|
@@ -916,8 +1047,8 @@ class AutoIndexManager:
|
|
|
916
1047
|
|
|
917
1048
|
async def ensure_index_for_query(
|
|
918
1049
|
self,
|
|
919
|
-
filter:
|
|
920
|
-
sort:
|
|
1050
|
+
filter: Mapping[str, Any] | None = None,
|
|
1051
|
+
sort: list[tuple[str, int]] | dict[str, int] | None = None,
|
|
921
1052
|
hint_threshold: int = AUTO_INDEX_HINT_THRESHOLD,
|
|
922
1053
|
) -> None:
|
|
923
1054
|
"""
|
|
@@ -986,9 +1117,7 @@ class AutoIndexManager:
|
|
|
986
1117
|
|
|
987
1118
|
# Create task and track it
|
|
988
1119
|
# Cleanup happens in _create_index_safely's finally block
|
|
989
|
-
task = asyncio.create_task(
|
|
990
|
-
self._create_index_safely(index_name, all_fields)
|
|
991
|
-
)
|
|
1120
|
+
task = asyncio.create_task(self._create_index_safely(index_name, all_fields))
|
|
992
1121
|
self._pending_tasks[index_name] = task
|
|
993
1122
|
|
|
994
1123
|
|
|
@@ -1028,22 +1157,33 @@ class ScopedCollectionWrapper:
|
|
|
1028
1157
|
"_index_manager",
|
|
1029
1158
|
"_auto_index_manager",
|
|
1030
1159
|
"_auto_index_enabled",
|
|
1160
|
+
"_query_validator",
|
|
1161
|
+
"_resource_limiter",
|
|
1162
|
+
"_parent_wrapper",
|
|
1031
1163
|
)
|
|
1032
1164
|
|
|
1033
1165
|
def __init__(
|
|
1034
1166
|
self,
|
|
1035
1167
|
real_collection: AsyncIOMotorCollection,
|
|
1036
|
-
read_scopes:
|
|
1168
|
+
read_scopes: list[str],
|
|
1037
1169
|
write_scope: str,
|
|
1038
1170
|
auto_index: bool = True,
|
|
1171
|
+
query_validator: QueryValidator | None = None,
|
|
1172
|
+
resource_limiter: ResourceLimiter | None = None,
|
|
1173
|
+
parent_wrapper: Optional["ScopedMongoWrapper"] = None,
|
|
1039
1174
|
):
|
|
1040
1175
|
self._collection = real_collection
|
|
1041
1176
|
self._read_scopes = read_scopes
|
|
1042
1177
|
self._write_scope = write_scope
|
|
1043
1178
|
self._auto_index_enabled = auto_index
|
|
1044
1179
|
# Lazily instantiated and cached
|
|
1045
|
-
self._index_manager:
|
|
1046
|
-
self._auto_index_manager:
|
|
1180
|
+
self._index_manager: AsyncAtlasIndexManager | None = None
|
|
1181
|
+
self._auto_index_manager: AutoIndexManager | None = None
|
|
1182
|
+
# Query security and resource limits
|
|
1183
|
+
self._query_validator = query_validator or QueryValidator()
|
|
1184
|
+
self._resource_limiter = resource_limiter or ResourceLimiter()
|
|
1185
|
+
# Reference to parent wrapper for token verification
|
|
1186
|
+
self._parent_wrapper = parent_wrapper
|
|
1047
1187
|
|
|
1048
1188
|
@property
|
|
1049
1189
|
def index_manager(self) -> AsyncAtlasIndexManager:
|
|
@@ -1060,11 +1200,13 @@ class ScopedCollectionWrapper:
|
|
|
1060
1200
|
# Create and cache it.
|
|
1061
1201
|
# Pass the *real* collection, not 'self', as indexes
|
|
1062
1202
|
# are not scoped by app_id.
|
|
1063
|
-
|
|
1203
|
+
# Access the real collection directly, bypassing the proxy
|
|
1204
|
+
real_collection = super().__getattribute__("_collection")
|
|
1205
|
+
self._index_manager = AsyncAtlasIndexManager(real_collection)
|
|
1064
1206
|
return self._index_manager
|
|
1065
1207
|
|
|
1066
1208
|
@property
|
|
1067
|
-
def auto_index_manager(self) ->
|
|
1209
|
+
def auto_index_manager(self) -> AutoIndexManager | None:
|
|
1068
1210
|
"""
|
|
1069
1211
|
Gets the AutoIndexManager for magical automatic index creation.
|
|
1070
1212
|
|
|
@@ -1075,15 +1217,52 @@ class ScopedCollectionWrapper:
|
|
|
1075
1217
|
|
|
1076
1218
|
if self._auto_index_manager is None:
|
|
1077
1219
|
# Lazily instantiate auto-index manager
|
|
1220
|
+
# Access the real collection directly, bypassing the proxy
|
|
1221
|
+
real_collection = super().__getattribute__("_collection")
|
|
1078
1222
|
self._auto_index_manager = AutoIndexManager(
|
|
1079
|
-
|
|
1223
|
+
real_collection,
|
|
1080
1224
|
self.index_manager, # This will create index_manager if needed
|
|
1081
1225
|
)
|
|
1082
1226
|
return self._auto_index_manager
|
|
1083
1227
|
|
|
1084
|
-
def
|
|
1085
|
-
|
|
1086
|
-
|
|
1228
|
+
def __getattribute__(self, name: str) -> Any:
|
|
1229
|
+
"""
|
|
1230
|
+
Override to prevent access to dangerous attributes on _collection.
|
|
1231
|
+
|
|
1232
|
+
Blocks access to _collection.database and _collection.client to prevent
|
|
1233
|
+
bypassing scoping.
|
|
1234
|
+
"""
|
|
1235
|
+
# Allow access to our own attributes
|
|
1236
|
+
if name.startswith("_") and name not in (
|
|
1237
|
+
"_collection",
|
|
1238
|
+
"_read_scopes",
|
|
1239
|
+
"_write_scope",
|
|
1240
|
+
"_index_manager",
|
|
1241
|
+
"_auto_index_manager",
|
|
1242
|
+
"_auto_index_enabled",
|
|
1243
|
+
"_query_validator",
|
|
1244
|
+
"_resource_limiter",
|
|
1245
|
+
):
|
|
1246
|
+
return super().__getattribute__(name)
|
|
1247
|
+
|
|
1248
|
+
# If accessing _collection, wrap it to block database/client access
|
|
1249
|
+
if name == "_collection":
|
|
1250
|
+
collection = super().__getattribute__(name)
|
|
1251
|
+
# Return a proxy that blocks dangerous attributes
|
|
1252
|
+
return _SecureCollectionProxy(collection)
|
|
1253
|
+
|
|
1254
|
+
return super().__getattribute__(name)
|
|
1255
|
+
|
|
1256
|
+
def __setattr__(self, name: str, value: Any) -> None:
|
|
1257
|
+
"""Override to prevent modification of _collection."""
|
|
1258
|
+
if name == "_collection" and hasattr(self, "_collection"):
|
|
1259
|
+
raise AttributeError(
|
|
1260
|
+
"Cannot modify '_collection' attribute. "
|
|
1261
|
+
"Collection wrappers are immutable for security."
|
|
1262
|
+
)
|
|
1263
|
+
super().__setattr__(name, value)
|
|
1264
|
+
|
|
1265
|
+
def _inject_read_filter(self, filter: Mapping[str, Any] | None = None) -> dict[str, Any]:
|
|
1087
1266
|
"""
|
|
1088
1267
|
Combines the user's filter with our mandatory scope filter.
|
|
1089
1268
|
|
|
@@ -1099,9 +1278,7 @@ class ScopedCollectionWrapper:
|
|
|
1099
1278
|
# If filter exists, combine them robustly with $and
|
|
1100
1279
|
return {"$and": [filter, scope_filter]}
|
|
1101
1280
|
|
|
1102
|
-
async def insert_one(
|
|
1103
|
-
self, document: Mapping[str, Any], *args, **kwargs
|
|
1104
|
-
) -> InsertOneResult:
|
|
1281
|
+
async def insert_one(self, document: Mapping[str, Any], *args, **kwargs) -> InsertOneResult:
|
|
1105
1282
|
"""
|
|
1106
1283
|
Injects the app_id before writing.
|
|
1107
1284
|
|
|
@@ -1110,12 +1287,31 @@ class ScopedCollectionWrapper:
|
|
|
1110
1287
|
import time
|
|
1111
1288
|
|
|
1112
1289
|
start_time = time.time()
|
|
1113
|
-
|
|
1290
|
+
# Get collection name safely (may not exist for new collections)
|
|
1291
|
+
try:
|
|
1292
|
+
collection_name = self._collection.name
|
|
1293
|
+
except (AttributeError, TypeError):
|
|
1294
|
+
# Fallback if name is not accessible
|
|
1295
|
+
collection_name = "unknown"
|
|
1114
1296
|
|
|
1115
1297
|
try:
|
|
1298
|
+
# Verify token if needed (lazy verification for async contexts)
|
|
1299
|
+
if self._parent_wrapper:
|
|
1300
|
+
await self._parent_wrapper._verify_token_if_needed()
|
|
1301
|
+
|
|
1302
|
+
# Validate document size before insert
|
|
1303
|
+
self._resource_limiter.validate_document_size(document)
|
|
1304
|
+
|
|
1116
1305
|
# Use dictionary spread to create a non-mutating copy
|
|
1117
1306
|
doc_to_insert = {**document, "app_id": self._write_scope}
|
|
1118
|
-
|
|
1307
|
+
|
|
1308
|
+
# Enforce query timeout
|
|
1309
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1310
|
+
# Remove maxTimeMS - insert_one doesn't accept it
|
|
1311
|
+
kwargs_for_insert = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1312
|
+
|
|
1313
|
+
# Use self._collection.insert_one() - proxy delegates correctly
|
|
1314
|
+
result = await self._collection.insert_one(doc_to_insert, *args, **kwargs_for_insert)
|
|
1119
1315
|
duration_ms = (time.time() - start_time) * 1000
|
|
1120
1316
|
record_operation(
|
|
1121
1317
|
"database.insert_one",
|
|
@@ -1156,7 +1352,7 @@ class ScopedCollectionWrapper:
|
|
|
1156
1352
|
) from e
|
|
1157
1353
|
|
|
1158
1354
|
async def insert_many(
|
|
1159
|
-
self, documents:
|
|
1355
|
+
self, documents: list[Mapping[str, Any]], *args, **kwargs
|
|
1160
1356
|
) -> InsertManyResult:
|
|
1161
1357
|
"""
|
|
1162
1358
|
Injects the app_id into all documents before writing.
|
|
@@ -1164,12 +1360,21 @@ class ScopedCollectionWrapper:
|
|
|
1164
1360
|
Safety: Uses a list comprehension to create copies of all documents,
|
|
1165
1361
|
avoiding in-place mutation of the original list.
|
|
1166
1362
|
"""
|
|
1363
|
+
# Validate all document sizes before insert
|
|
1364
|
+
self._resource_limiter.validate_documents_size(documents)
|
|
1365
|
+
|
|
1366
|
+
# Enforce query timeout
|
|
1367
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1368
|
+
# Remove maxTimeMS - insert_many doesn't accept it
|
|
1369
|
+
kwargs_for_insert = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1370
|
+
|
|
1167
1371
|
docs_to_insert = [{**doc, "app_id": self._write_scope} for doc in documents]
|
|
1168
|
-
|
|
1372
|
+
# Use self._collection.insert_many() - proxy delegates correctly
|
|
1373
|
+
return await self._collection.insert_many(docs_to_insert, *args, **kwargs_for_insert)
|
|
1169
1374
|
|
|
1170
1375
|
async def find_one(
|
|
1171
|
-
self, filter:
|
|
1172
|
-
) ->
|
|
1376
|
+
self, filter: Mapping[str, Any] | None = None, *args, **kwargs
|
|
1377
|
+
) -> dict[str, Any] | None:
|
|
1173
1378
|
"""
|
|
1174
1379
|
Applies the read scope to the filter.
|
|
1175
1380
|
Automatically ensures appropriate indexes exist for the query.
|
|
@@ -1177,20 +1382,36 @@ class ScopedCollectionWrapper:
|
|
|
1177
1382
|
import time
|
|
1178
1383
|
|
|
1179
1384
|
start_time = time.time()
|
|
1180
|
-
|
|
1385
|
+
# Access real collection directly (bypass proxy) for name attribute
|
|
1386
|
+
# Use object.__getattribute__ to bypass our custom __getattribute__ that wraps in proxy
|
|
1387
|
+
real_collection = object.__getattribute__(self, "_collection")
|
|
1388
|
+
collection_name = real_collection.name
|
|
1181
1389
|
|
|
1182
1390
|
try:
|
|
1391
|
+
# Verify token if needed (lazy verification for async contexts)
|
|
1392
|
+
if self._parent_wrapper:
|
|
1393
|
+
await self._parent_wrapper._verify_token_if_needed()
|
|
1394
|
+
|
|
1395
|
+
# Validate query filter for security
|
|
1396
|
+
self._query_validator.validate_filter(filter)
|
|
1397
|
+
self._query_validator.validate_sort(kwargs.get("sort"))
|
|
1398
|
+
|
|
1399
|
+
# Enforce query timeout - but remove maxTimeMS for find_one
|
|
1400
|
+
# because Motor's find_one internally creates a cursor and some versions
|
|
1401
|
+
# don't handle maxTimeMS correctly when passed to find_one
|
|
1402
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1403
|
+
# Remove maxTimeMS to avoid cursor creation errors in find_one
|
|
1404
|
+
kwargs_for_find_one = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1405
|
+
|
|
1183
1406
|
# Magical auto-indexing: ensure indexes exist before querying
|
|
1184
1407
|
# Note: We analyze the user's filter, not the scoped filter, since
|
|
1185
1408
|
# app_id index is always ensured separately
|
|
1186
1409
|
if self.auto_index_manager:
|
|
1187
1410
|
sort = kwargs.get("sort")
|
|
1188
|
-
await self.auto_index_manager.ensure_index_for_query(
|
|
1189
|
-
filter=filter, sort=sort
|
|
1190
|
-
)
|
|
1411
|
+
await self.auto_index_manager.ensure_index_for_query(filter=filter, sort=sort)
|
|
1191
1412
|
|
|
1192
1413
|
scoped_filter = self._inject_read_filter(filter)
|
|
1193
|
-
result = await self._collection.find_one(scoped_filter, *args, **
|
|
1414
|
+
result = await self._collection.find_one(scoped_filter, *args, **kwargs_for_find_one)
|
|
1194
1415
|
duration_ms = (time.time() - start_time) * 1000
|
|
1195
1416
|
record_operation(
|
|
1196
1417
|
"database.find_one",
|
|
@@ -1200,7 +1421,7 @@ class ScopedCollectionWrapper:
|
|
|
1200
1421
|
app_slug=self._write_scope,
|
|
1201
1422
|
)
|
|
1202
1423
|
return result
|
|
1203
|
-
except
|
|
1424
|
+
except (PyMongoError, ValueError, TypeError, KeyError, AttributeError):
|
|
1204
1425
|
duration_ms = (time.time() - start_time) * 1000
|
|
1205
1426
|
record_operation(
|
|
1206
1427
|
"database.find_one",
|
|
@@ -1211,14 +1432,31 @@ class ScopedCollectionWrapper:
|
|
|
1211
1432
|
)
|
|
1212
1433
|
raise
|
|
1213
1434
|
|
|
1214
|
-
def find(
|
|
1215
|
-
self, filter: Optional[Mapping[str, Any]] = None, *args, **kwargs
|
|
1216
|
-
) -> AsyncIOMotorCursor:
|
|
1435
|
+
def find(self, filter: Mapping[str, Any] | None = None, *args, **kwargs) -> AsyncIOMotorCursor:
|
|
1217
1436
|
"""
|
|
1218
1437
|
Applies the read scope to the filter.
|
|
1219
1438
|
Returns an async cursor, just like motor.
|
|
1220
1439
|
Automatically ensures appropriate indexes exist for the query.
|
|
1221
1440
|
"""
|
|
1441
|
+
# Validate query filter for security
|
|
1442
|
+
self._query_validator.validate_filter(filter)
|
|
1443
|
+
self._query_validator.validate_sort(kwargs.get("sort"))
|
|
1444
|
+
|
|
1445
|
+
# Enforce result limit
|
|
1446
|
+
limit = kwargs.get("limit")
|
|
1447
|
+
if limit is not None:
|
|
1448
|
+
kwargs["limit"] = self._resource_limiter.enforce_result_limit(limit)
|
|
1449
|
+
|
|
1450
|
+
# Enforce batch size
|
|
1451
|
+
batch_size = kwargs.get("batch_size")
|
|
1452
|
+
if batch_size is not None:
|
|
1453
|
+
kwargs["batch_size"] = self._resource_limiter.enforce_batch_size(batch_size)
|
|
1454
|
+
|
|
1455
|
+
# Enforce query timeout - but remove maxTimeMS before passing to find()
|
|
1456
|
+
# because Cursor constructor doesn't accept maxTimeMS
|
|
1457
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1458
|
+
kwargs_for_find = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1459
|
+
|
|
1222
1460
|
# Magical auto-indexing: ensure indexes exist before querying
|
|
1223
1461
|
# Note: This is fire-and-forget, doesn't block cursor creation
|
|
1224
1462
|
if self.auto_index_manager:
|
|
@@ -1227,23 +1465,20 @@ class ScopedCollectionWrapper:
|
|
|
1227
1465
|
# Create a task to ensure index (fire and forget, managed to prevent accumulation)
|
|
1228
1466
|
async def _safe_index_task():
|
|
1229
1467
|
try:
|
|
1230
|
-
await self.auto_index_manager.ensure_index_for_query(
|
|
1231
|
-
filter=filter, sort=sort
|
|
1232
|
-
)
|
|
1468
|
+
await self.auto_index_manager.ensure_index_for_query(filter=filter, sort=sort)
|
|
1233
1469
|
except (
|
|
1234
1470
|
OperationFailure,
|
|
1235
1471
|
ConnectionFailure,
|
|
1236
1472
|
ServerSelectionTimeoutError,
|
|
1237
1473
|
InvalidOperation,
|
|
1238
1474
|
) as e:
|
|
1239
|
-
logger.debug(
|
|
1240
|
-
|
|
1241
|
-
)
|
|
1475
|
+
logger.debug(f"Auto-index creation failed for query (non-critical): {e}")
|
|
1476
|
+
# Let other exceptions bubble up - they are non-recoverable (Type 4)
|
|
1242
1477
|
|
|
1243
1478
|
_create_managed_task(_safe_index_task(), task_name="auto_index_check")
|
|
1244
1479
|
|
|
1245
1480
|
scoped_filter = self._inject_read_filter(filter)
|
|
1246
|
-
return self._collection.find(scoped_filter, *args, **
|
|
1481
|
+
return self._collection.find(scoped_filter, *args, **kwargs_for_find)
|
|
1247
1482
|
|
|
1248
1483
|
async def update_one(
|
|
1249
1484
|
self, filter: Mapping[str, Any], update: Mapping[str, Any], *args, **kwargs
|
|
@@ -1252,8 +1487,16 @@ class ScopedCollectionWrapper:
|
|
|
1252
1487
|
Applies the read scope to the filter.
|
|
1253
1488
|
Note: This only scopes the *filter*, not the update operation.
|
|
1254
1489
|
"""
|
|
1490
|
+
# Validate query filter for security
|
|
1491
|
+
self._query_validator.validate_filter(filter)
|
|
1492
|
+
|
|
1493
|
+
# Enforce query timeout
|
|
1494
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1495
|
+
# Remove maxTimeMS - update_one doesn't accept it
|
|
1496
|
+
kwargs_for_update = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1497
|
+
|
|
1255
1498
|
scoped_filter = self._inject_read_filter(filter)
|
|
1256
|
-
return await self._collection.update_one(scoped_filter, update, *args, **
|
|
1499
|
+
return await self._collection.update_one(scoped_filter, update, *args, **kwargs_for_update)
|
|
1257
1500
|
|
|
1258
1501
|
async def update_many(
|
|
1259
1502
|
self, filter: Mapping[str, Any], update: Mapping[str, Any], *args, **kwargs
|
|
@@ -1262,48 +1505,78 @@ class ScopedCollectionWrapper:
|
|
|
1262
1505
|
Applies the read scope to the filter.
|
|
1263
1506
|
Note: This only scopes the *filter*, not the update operation.
|
|
1264
1507
|
"""
|
|
1508
|
+
# Validate query filter for security
|
|
1509
|
+
self._query_validator.validate_filter(filter)
|
|
1510
|
+
|
|
1511
|
+
# Enforce query timeout
|
|
1512
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1513
|
+
# Remove maxTimeMS - update_many doesn't accept it
|
|
1514
|
+
kwargs_for_update = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1515
|
+
|
|
1265
1516
|
scoped_filter = self._inject_read_filter(filter)
|
|
1266
|
-
return await self._collection.update_many(
|
|
1267
|
-
scoped_filter, update, *args, **kwargs
|
|
1268
|
-
)
|
|
1517
|
+
return await self._collection.update_many(scoped_filter, update, *args, **kwargs_for_update)
|
|
1269
1518
|
|
|
1270
|
-
async def delete_one(
|
|
1271
|
-
self, filter: Mapping[str, Any], *args, **kwargs
|
|
1272
|
-
) -> DeleteResult:
|
|
1519
|
+
async def delete_one(self, filter: Mapping[str, Any], *args, **kwargs) -> DeleteResult:
|
|
1273
1520
|
"""Applies the read scope to the filter."""
|
|
1521
|
+
# Validate query filter for security
|
|
1522
|
+
self._query_validator.validate_filter(filter)
|
|
1523
|
+
|
|
1524
|
+
# Enforce query timeout
|
|
1525
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1526
|
+
# Remove maxTimeMS - delete_one doesn't accept it
|
|
1527
|
+
kwargs_for_delete = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1528
|
+
|
|
1274
1529
|
scoped_filter = self._inject_read_filter(filter)
|
|
1275
|
-
return await self._collection.delete_one(scoped_filter, *args, **
|
|
1530
|
+
return await self._collection.delete_one(scoped_filter, *args, **kwargs_for_delete)
|
|
1276
1531
|
|
|
1277
|
-
async def delete_many(
|
|
1278
|
-
self, filter: Mapping[str, Any], *args, **kwargs
|
|
1279
|
-
) -> DeleteResult:
|
|
1532
|
+
async def delete_many(self, filter: Mapping[str, Any], *args, **kwargs) -> DeleteResult:
|
|
1280
1533
|
"""Applies the read scope to the filter."""
|
|
1534
|
+
# Validate query filter for security
|
|
1535
|
+
self._query_validator.validate_filter(filter)
|
|
1536
|
+
|
|
1537
|
+
# Enforce query timeout
|
|
1538
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1539
|
+
# Remove maxTimeMS - delete_many doesn't accept it
|
|
1540
|
+
kwargs_for_delete = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1541
|
+
|
|
1281
1542
|
scoped_filter = self._inject_read_filter(filter)
|
|
1282
|
-
return await self._collection.delete_many(scoped_filter, *args, **
|
|
1543
|
+
return await self._collection.delete_many(scoped_filter, *args, **kwargs_for_delete)
|
|
1283
1544
|
|
|
1284
1545
|
async def count_documents(
|
|
1285
|
-
self, filter:
|
|
1546
|
+
self, filter: Mapping[str, Any] | None = None, *args, **kwargs
|
|
1286
1547
|
) -> int:
|
|
1287
1548
|
"""
|
|
1288
1549
|
Applies the read scope to the filter for counting.
|
|
1289
1550
|
Automatically ensures appropriate indexes exist for the query.
|
|
1290
1551
|
"""
|
|
1552
|
+
# Validate query filter for security
|
|
1553
|
+
self._query_validator.validate_filter(filter)
|
|
1554
|
+
|
|
1555
|
+
# Note: count_documents doesn't reliably support maxTimeMS in all Motor versions
|
|
1556
|
+
# Remove it to avoid cursor creation errors when auto-indexing triggers list_indexes()
|
|
1557
|
+
kwargs_for_count = {k: v for k, v in kwargs.items() if k != "maxTimeMS"}
|
|
1558
|
+
# Don't enforce timeout for count_documents to avoid issues with cursor operations
|
|
1559
|
+
|
|
1291
1560
|
# Magical auto-indexing: ensure indexes exist before querying
|
|
1292
1561
|
if self.auto_index_manager:
|
|
1293
1562
|
await self.auto_index_manager.ensure_index_for_query(filter=filter)
|
|
1294
1563
|
|
|
1295
1564
|
scoped_filter = self._inject_read_filter(filter)
|
|
1296
|
-
return await self._collection.count_documents(scoped_filter, *args, **
|
|
1565
|
+
return await self._collection.count_documents(scoped_filter, *args, **kwargs_for_count)
|
|
1297
1566
|
|
|
1298
|
-
def aggregate(
|
|
1299
|
-
self, pipeline: List[Dict[str, Any]], *args, **kwargs
|
|
1300
|
-
) -> AsyncIOMotorCursor:
|
|
1567
|
+
def aggregate(self, pipeline: list[dict[str, Any]], *args, **kwargs) -> AsyncIOMotorCursor:
|
|
1301
1568
|
"""
|
|
1302
1569
|
Injects a scope filter into the pipeline. For normal pipelines, we prepend
|
|
1303
1570
|
a $match stage. However, if the first stage is $vectorSearch, we embed
|
|
1304
1571
|
the read_scope filter into its 'filter' property, because $vectorSearch must
|
|
1305
1572
|
remain the very first stage in Atlas.
|
|
1306
1573
|
"""
|
|
1574
|
+
# Validate aggregation pipeline for security
|
|
1575
|
+
self._query_validator.validate_pipeline(pipeline)
|
|
1576
|
+
|
|
1577
|
+
# Enforce query timeout - Motor's aggregate() accepts maxTimeMS
|
|
1578
|
+
kwargs = self._resource_limiter.enforce_query_timeout(kwargs)
|
|
1579
|
+
|
|
1307
1580
|
if not pipeline:
|
|
1308
1581
|
# No stages given, just prepend our $match
|
|
1309
1582
|
scope_match_stage = {"$match": {"app_id": {"$in": self._read_scopes}}}
|
|
@@ -1359,45 +1632,175 @@ class ScopedMongoWrapper:
|
|
|
1359
1632
|
|
|
1360
1633
|
# Class-level cache for collections that have app_id index checked
|
|
1361
1634
|
# Key: collection name, Value: boolean (True if index exists, False if check is pending)
|
|
1362
|
-
_app_id_index_cache: ClassVar[
|
|
1635
|
+
_app_id_index_cache: ClassVar[dict[str, bool]] = {}
|
|
1363
1636
|
# Lock to prevent race conditions when multiple requests try to create the same index
|
|
1364
1637
|
_app_id_index_lock: ClassVar[asyncio.Lock] = asyncio.Lock()
|
|
1365
1638
|
|
|
1366
|
-
__slots__ = (
|
|
1639
|
+
__slots__ = (
|
|
1640
|
+
"_db",
|
|
1641
|
+
"_read_scopes",
|
|
1642
|
+
"_write_scope",
|
|
1643
|
+
"_wrapper_cache",
|
|
1644
|
+
"_auto_index",
|
|
1645
|
+
"_query_validator",
|
|
1646
|
+
"_resource_limiter",
|
|
1647
|
+
"_app_slug",
|
|
1648
|
+
"_app_token",
|
|
1649
|
+
"_app_secrets_manager",
|
|
1650
|
+
"_token_verified",
|
|
1651
|
+
"_token_verification_lock",
|
|
1652
|
+
)
|
|
1367
1653
|
|
|
1368
1654
|
def __init__(
|
|
1369
1655
|
self,
|
|
1370
1656
|
real_db: AsyncIOMotorDatabase,
|
|
1371
|
-
read_scopes:
|
|
1657
|
+
read_scopes: list[str],
|
|
1372
1658
|
write_scope: str,
|
|
1373
1659
|
auto_index: bool = True,
|
|
1660
|
+
query_validator: QueryValidator | None = None,
|
|
1661
|
+
resource_limiter: ResourceLimiter | None = None,
|
|
1662
|
+
app_slug: str | None = None,
|
|
1663
|
+
app_token: str | None = None,
|
|
1664
|
+
app_secrets_manager: Optional["AppSecretsManager"] = None,
|
|
1374
1665
|
):
|
|
1375
1666
|
self._db = real_db
|
|
1376
1667
|
self._read_scopes = read_scopes
|
|
1377
1668
|
self._write_scope = write_scope
|
|
1378
1669
|
self._auto_index = auto_index
|
|
1379
1670
|
|
|
1671
|
+
# Query security and resource limits (shared across all collections)
|
|
1672
|
+
self._query_validator = query_validator or QueryValidator()
|
|
1673
|
+
self._resource_limiter = resource_limiter or ResourceLimiter()
|
|
1674
|
+
|
|
1675
|
+
# Token verification for app authentication
|
|
1676
|
+
self._app_slug = app_slug
|
|
1677
|
+
self._app_token = app_token
|
|
1678
|
+
self._app_secrets_manager = app_secrets_manager
|
|
1679
|
+
self._token_verified = False
|
|
1680
|
+
self._token_verification_lock = asyncio.Lock()
|
|
1681
|
+
|
|
1380
1682
|
# Cache for created collection wrappers.
|
|
1381
|
-
self._wrapper_cache:
|
|
1683
|
+
self._wrapper_cache: dict[str, ScopedCollectionWrapper] = {}
|
|
1382
1684
|
|
|
1383
|
-
|
|
1384
|
-
def database(self) -> AsyncIOMotorDatabase:
|
|
1685
|
+
async def _verify_token_if_needed(self) -> None:
|
|
1385
1686
|
"""
|
|
1386
|
-
|
|
1687
|
+
Verify app token lazily on first database operation.
|
|
1387
1688
|
|
|
1388
|
-
This
|
|
1389
|
-
|
|
1689
|
+
This method ensures token verification happens even when get_scoped_db()
|
|
1690
|
+
is called from an async context where sync verification was skipped.
|
|
1390
1691
|
|
|
1391
|
-
|
|
1392
|
-
|
|
1692
|
+
Raises:
|
|
1693
|
+
ValueError: If token verification fails
|
|
1694
|
+
"""
|
|
1695
|
+
# If already verified, skip
|
|
1696
|
+
if self._token_verified:
|
|
1697
|
+
return
|
|
1393
1698
|
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1699
|
+
# If no token or secrets manager, skip verification
|
|
1700
|
+
if not self._app_token or not self._app_secrets_manager or not self._app_slug:
|
|
1701
|
+
self._token_verified = True
|
|
1702
|
+
return
|
|
1703
|
+
|
|
1704
|
+
# Use lock to prevent race conditions
|
|
1705
|
+
async with self._token_verification_lock:
|
|
1706
|
+
# Double-check after acquiring lock
|
|
1707
|
+
if self._token_verified:
|
|
1708
|
+
return
|
|
1709
|
+
|
|
1710
|
+
# Verify token
|
|
1711
|
+
is_valid = await self._app_secrets_manager.verify_app_secret(
|
|
1712
|
+
self._app_slug, self._app_token
|
|
1713
|
+
)
|
|
1714
|
+
|
|
1715
|
+
if not is_valid:
|
|
1716
|
+
logger.warning(f"Security: Invalid app token for '{self._app_slug}'")
|
|
1717
|
+
raise ValueError("Invalid app token")
|
|
1718
|
+
|
|
1719
|
+
# Mark as verified
|
|
1720
|
+
self._token_verified = True
|
|
1721
|
+
logger.debug(f"Token verified for app '{self._app_slug}'")
|
|
1722
|
+
|
|
1723
|
+
def _validate_cross_app_access(self, prefixed_name: str) -> None:
|
|
1724
|
+
"""
|
|
1725
|
+
Validate that cross-app collection access is authorized.
|
|
1726
|
+
|
|
1727
|
+
Args:
|
|
1728
|
+
prefixed_name: Prefixed collection name (e.g., "other_app_collection")
|
|
1729
|
+
|
|
1730
|
+
Raises:
|
|
1731
|
+
ValueError: If cross-app access is not authorized
|
|
1732
|
+
"""
|
|
1733
|
+
# Extract app slug from prefixed name
|
|
1734
|
+
target_app = _extract_app_slug_from_prefixed_name(prefixed_name)
|
|
1735
|
+
if target_app is None:
|
|
1736
|
+
return # Same-app access or not a valid prefixed name
|
|
1737
|
+
|
|
1738
|
+
# Check if target app is in read_scopes
|
|
1739
|
+
if target_app not in self._read_scopes:
|
|
1740
|
+
logger.warning(
|
|
1741
|
+
f"Security: Unauthorized cross-app access attempt. "
|
|
1742
|
+
f"Collection: '{prefixed_name}', Target app: '{target_app}', "
|
|
1743
|
+
f"Read scopes: {self._read_scopes}, Write scope: {self._write_scope}"
|
|
1744
|
+
)
|
|
1745
|
+
raise ValueError(
|
|
1746
|
+
f"Access to collection '{prefixed_name}' not authorized. "
|
|
1747
|
+
f"App '{target_app}' is not in read_scopes {self._read_scopes}. "
|
|
1748
|
+
"Cross-app access must be explicitly granted via read_scopes."
|
|
1749
|
+
)
|
|
1750
|
+
|
|
1751
|
+
# Log authorized cross-app access for audit trail
|
|
1752
|
+
logger.info(
|
|
1753
|
+
f"Cross-app access authorized. "
|
|
1754
|
+
f"Collection: '{prefixed_name}', From app: '{self._write_scope}', "
|
|
1755
|
+
f"To app: '{target_app}'"
|
|
1756
|
+
)
|
|
1757
|
+
|
|
1758
|
+
def __getattribute__(self, name: str) -> Any:
|
|
1759
|
+
"""
|
|
1760
|
+
Override to validate collection names before attribute access.
|
|
1761
|
+
This ensures validation happens even if MagicMock creates attributes dynamically.
|
|
1399
1762
|
"""
|
|
1400
|
-
|
|
1763
|
+
# Handle our own attributes first (use super() to avoid recursion)
|
|
1764
|
+
if name.startswith("_") or name in ("get_collection",):
|
|
1765
|
+
return super().__getattribute__(name)
|
|
1766
|
+
|
|
1767
|
+
# Validate collection name for security BEFORE checking if attribute exists
|
|
1768
|
+
# This ensures ValueError is raised even if MagicMock would create the attribute
|
|
1769
|
+
validation_error = None
|
|
1770
|
+
if not name.startswith("_"):
|
|
1771
|
+
try:
|
|
1772
|
+
_validate_collection_name(name, allow_prefixed=False)
|
|
1773
|
+
except ValueError as e:
|
|
1774
|
+
# Log the warning without accessing object attributes to avoid recursion
|
|
1775
|
+
# The validation error itself is what matters, not the logging details
|
|
1776
|
+
try:
|
|
1777
|
+
logger.warning(
|
|
1778
|
+
f"Security: Invalid collection name attempted. "
|
|
1779
|
+
f"Name: '{name}', Error: {e}"
|
|
1780
|
+
)
|
|
1781
|
+
except (AttributeError, RuntimeError):
|
|
1782
|
+
# If logging fails due to logger issues, continue -
|
|
1783
|
+
# validation error is what matters
|
|
1784
|
+
# Type 2: Recoverable - we can continue without logging
|
|
1785
|
+
pass
|
|
1786
|
+
# Store the error to raise after checking attribute existence
|
|
1787
|
+
# This ensures we raise ValueError even if MagicMock creates the attribute
|
|
1788
|
+
validation_error = ValueError(str(e))
|
|
1789
|
+
|
|
1790
|
+
# Continue with normal attribute access
|
|
1791
|
+
try:
|
|
1792
|
+
attr = super().__getattribute__(name)
|
|
1793
|
+
# If validation failed, raise ValueError now (even if attribute exists)
|
|
1794
|
+
if validation_error is not None:
|
|
1795
|
+
raise validation_error
|
|
1796
|
+
return attr
|
|
1797
|
+
except AttributeError:
|
|
1798
|
+
# Attribute doesn't exist
|
|
1799
|
+
# If validation failed, raise ValueError (from None: unrelated to AttributeError)
|
|
1800
|
+
if validation_error is not None:
|
|
1801
|
+
raise validation_error from None
|
|
1802
|
+
# Delegate to __getattr__ for collection creation
|
|
1803
|
+
return self.__getattr__(name)
|
|
1401
1804
|
|
|
1402
1805
|
def __getattr__(self, name: str) -> ScopedCollectionWrapper:
|
|
1403
1806
|
"""
|
|
@@ -1406,6 +1809,17 @@ class ScopedMongoWrapper:
|
|
|
1406
1809
|
If `name` is a collection, returns a `ScopedCollectionWrapper`.
|
|
1407
1810
|
"""
|
|
1408
1811
|
|
|
1812
|
+
# Explicitly block access to 'database' property (removed for security)
|
|
1813
|
+
if name == "database":
|
|
1814
|
+
logger.warning(
|
|
1815
|
+
f"Security: Attempted access to 'database' property. " f"App: {self._write_scope}"
|
|
1816
|
+
)
|
|
1817
|
+
raise AttributeError(
|
|
1818
|
+
"'database' property has been removed for security. "
|
|
1819
|
+
"Use collection.index_manager for index operations. "
|
|
1820
|
+
"All data access must go through scoped collections."
|
|
1821
|
+
)
|
|
1822
|
+
|
|
1409
1823
|
# Prevent proxying private/special attributes
|
|
1410
1824
|
if name.startswith("_"):
|
|
1411
1825
|
raise AttributeError(
|
|
@@ -1413,11 +1827,33 @@ class ScopedMongoWrapper:
|
|
|
1413
1827
|
"Access to private attributes is blocked."
|
|
1414
1828
|
)
|
|
1415
1829
|
|
|
1830
|
+
# Note: Validation already happened in __getattribute__, but we validate again
|
|
1831
|
+
# for safety in case __getattr__ is called directly
|
|
1832
|
+
try:
|
|
1833
|
+
_validate_collection_name(name, allow_prefixed=False)
|
|
1834
|
+
except ValueError as e:
|
|
1835
|
+
logger.warning(
|
|
1836
|
+
f"Security: Invalid collection name attempted. "
|
|
1837
|
+
f"Name: '{name}', App: {self._write_scope}, Error: {e}"
|
|
1838
|
+
)
|
|
1839
|
+
raise
|
|
1840
|
+
|
|
1416
1841
|
# Construct the prefixed collection name, e.g., "data_imaging_workouts"
|
|
1417
1842
|
# `self._write_scope` holds the slug (e.g., "data_imaging")
|
|
1418
1843
|
# `name` holds the base name (e.g., "workouts")
|
|
1419
1844
|
prefixed_name = f"{self._write_scope}_{name}"
|
|
1420
1845
|
|
|
1846
|
+
# Validate prefixed name as well (for reserved names check)
|
|
1847
|
+
try:
|
|
1848
|
+
_validate_collection_name(prefixed_name, allow_prefixed=True)
|
|
1849
|
+
except ValueError as e:
|
|
1850
|
+
logger.warning(
|
|
1851
|
+
f"Security: Invalid prefixed collection name. "
|
|
1852
|
+
f"Base name: '{name}', Prefixed: '{prefixed_name}', "
|
|
1853
|
+
f"App: {self._write_scope}, Error: {e}"
|
|
1854
|
+
)
|
|
1855
|
+
raise
|
|
1856
|
+
|
|
1421
1857
|
# Check cache first using the *prefixed_name*
|
|
1422
1858
|
if prefixed_name in self._wrapper_cache:
|
|
1423
1859
|
return self._wrapper_cache[prefixed_name]
|
|
@@ -1439,6 +1875,8 @@ class ScopedMongoWrapper:
|
|
|
1439
1875
|
read_scopes=self._read_scopes,
|
|
1440
1876
|
write_scope=self._write_scope,
|
|
1441
1877
|
auto_index=self._auto_index,
|
|
1878
|
+
query_validator=self._query_validator,
|
|
1879
|
+
resource_limiter=self._resource_limiter,
|
|
1442
1880
|
)
|
|
1443
1881
|
|
|
1444
1882
|
# Magically ensure app_id index exists (it's always used in queries)
|
|
@@ -1476,17 +1914,13 @@ class ScopedMongoWrapper:
|
|
|
1476
1914
|
f"connection is closed (likely during shutdown)"
|
|
1477
1915
|
)
|
|
1478
1916
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1479
|
-
ScopedMongoWrapper._app_id_index_cache.pop(
|
|
1480
|
-
collection_name, None
|
|
1481
|
-
)
|
|
1917
|
+
ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
|
|
1482
1918
|
return
|
|
1483
1919
|
|
|
1484
1920
|
has_index = await self._ensure_app_id_index(real_collection)
|
|
1485
1921
|
# Update cache with result (inside lock for thread-safety)
|
|
1486
1922
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1487
|
-
ScopedMongoWrapper._app_id_index_cache[collection_name] =
|
|
1488
|
-
has_index
|
|
1489
|
-
)
|
|
1923
|
+
ScopedMongoWrapper._app_id_index_cache[collection_name] = has_index
|
|
1490
1924
|
except (
|
|
1491
1925
|
ConnectionFailure,
|
|
1492
1926
|
ServerSelectionTimeoutError,
|
|
@@ -1499,30 +1933,82 @@ class ScopedMongoWrapper:
|
|
|
1499
1933
|
)
|
|
1500
1934
|
# Remove from cache on error so we can retry later
|
|
1501
1935
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1502
|
-
ScopedMongoWrapper._app_id_index_cache.pop(
|
|
1503
|
-
collection_name, None
|
|
1504
|
-
)
|
|
1936
|
+
ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
|
|
1505
1937
|
except OperationFailure as e:
|
|
1506
1938
|
# Index creation failed for other reasons (non-critical)
|
|
1507
1939
|
logger.debug(f"App_id index creation failed (non-critical): {e}")
|
|
1508
1940
|
# Remove from cache on error so we can retry later
|
|
1509
1941
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1510
|
-
ScopedMongoWrapper._app_id_index_cache.pop(
|
|
1511
|
-
|
|
1512
|
-
)
|
|
1942
|
+
ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
|
|
1943
|
+
# Let other exceptions bubble up - they are non-recoverable (Type 4)
|
|
1513
1944
|
|
|
1514
1945
|
# Check cache first (quick check before lock)
|
|
1515
1946
|
if collection_name not in ScopedMongoWrapper._app_id_index_cache:
|
|
1516
1947
|
# Fire and forget - task will check lock internally
|
|
1517
1948
|
# (managed to prevent accumulation)
|
|
1518
|
-
_create_managed_task(
|
|
1519
|
-
_safe_app_id_index_check(), task_name="app_id_index_check"
|
|
1520
|
-
)
|
|
1949
|
+
_create_managed_task(_safe_app_id_index_check(), task_name="app_id_index_check")
|
|
1521
1950
|
|
|
1522
1951
|
# Store it in the cache for this instance using the *prefixed_name*
|
|
1523
1952
|
self._wrapper_cache[prefixed_name] = wrapper
|
|
1524
1953
|
return wrapper
|
|
1525
1954
|
|
|
1955
|
+
def _find_matched_app_for_collection(self, name: str) -> str | None:
|
|
1956
|
+
"""
|
|
1957
|
+
Check if collection name matches any app slug in read_scopes (cross-app access).
|
|
1958
|
+
|
|
1959
|
+
Args:
|
|
1960
|
+
name: Collection name to check
|
|
1961
|
+
|
|
1962
|
+
Returns:
|
|
1963
|
+
Matched app slug if found, None otherwise
|
|
1964
|
+
"""
|
|
1965
|
+
if "_" not in name:
|
|
1966
|
+
return None
|
|
1967
|
+
|
|
1968
|
+
# Check if any app slug in read_scopes matches the beginning of the name
|
|
1969
|
+
for app_slug in self._read_scopes:
|
|
1970
|
+
if name.startswith(f"{app_slug}_") and app_slug != self._write_scope:
|
|
1971
|
+
return app_slug
|
|
1972
|
+
return None
|
|
1973
|
+
|
|
1974
|
+
def _resolve_prefixed_collection_name(self, name: str, matched_app: str | None) -> str:
|
|
1975
|
+
"""
|
|
1976
|
+
Resolve the prefixed collection name based on matched app or write scope.
|
|
1977
|
+
|
|
1978
|
+
Args:
|
|
1979
|
+
name: Collection name (base or prefixed)
|
|
1980
|
+
matched_app: Matched app slug if cross-app access, None otherwise
|
|
1981
|
+
|
|
1982
|
+
Returns:
|
|
1983
|
+
Prefixed collection name
|
|
1984
|
+
|
|
1985
|
+
Raises:
|
|
1986
|
+
ValueError: If prefixed name is invalid
|
|
1987
|
+
"""
|
|
1988
|
+
if matched_app:
|
|
1989
|
+
# This is authorized cross-app access
|
|
1990
|
+
prefixed_name = name
|
|
1991
|
+
# Log authorized cross-app access for audit trail
|
|
1992
|
+
logger.info(
|
|
1993
|
+
f"Cross-app access authorized. "
|
|
1994
|
+
f"Collection: '{prefixed_name}', From app: '{self._write_scope}', "
|
|
1995
|
+
f"To app: '{matched_app}'"
|
|
1996
|
+
)
|
|
1997
|
+
else:
|
|
1998
|
+
# Regular collection name - prefix with write_scope
|
|
1999
|
+
prefixed_name = f"{self._write_scope}_{name}"
|
|
2000
|
+
# Validate prefixed name
|
|
2001
|
+
try:
|
|
2002
|
+
_validate_collection_name(prefixed_name, allow_prefixed=True)
|
|
2003
|
+
except ValueError as e:
|
|
2004
|
+
logger.warning(
|
|
2005
|
+
f"Security: Invalid prefixed collection name in get_collection(). "
|
|
2006
|
+
f"Base name: '{name}', Prefixed: '{prefixed_name}', "
|
|
2007
|
+
f"App: {self._write_scope}, Error: {e}"
|
|
2008
|
+
)
|
|
2009
|
+
raise
|
|
2010
|
+
return prefixed_name
|
|
2011
|
+
|
|
1526
2012
|
def get_collection(self, name: str) -> ScopedCollectionWrapper:
|
|
1527
2013
|
"""
|
|
1528
2014
|
Get a collection by name (Motor-like API).
|
|
@@ -1539,6 +2025,9 @@ class ScopedMongoWrapper:
|
|
|
1539
2025
|
Returns:
|
|
1540
2026
|
ScopedCollectionWrapper instance
|
|
1541
2027
|
|
|
2028
|
+
Raises:
|
|
2029
|
+
ValueError: If collection name is invalid or cross-app access is not authorized
|
|
2030
|
+
|
|
1542
2031
|
Example:
|
|
1543
2032
|
# Same-app collection (base name)
|
|
1544
2033
|
collection = db.get_collection("my_collection")
|
|
@@ -1546,15 +2035,21 @@ class ScopedMongoWrapper:
|
|
|
1546
2035
|
# Cross-app collection (fully prefixed)
|
|
1547
2036
|
collection = db.get_collection("click_tracker_clicks")
|
|
1548
2037
|
"""
|
|
1549
|
-
#
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
2038
|
+
# Validate collection name for security
|
|
2039
|
+
try:
|
|
2040
|
+
_validate_collection_name(name, allow_prefixed=True)
|
|
2041
|
+
except ValueError as e:
|
|
2042
|
+
logger.warning(
|
|
2043
|
+
f"Security: Invalid collection name in get_collection(). "
|
|
2044
|
+
f"Name: '{name}', App: {self._write_scope}, Error: {e}"
|
|
2045
|
+
)
|
|
2046
|
+
raise
|
|
2047
|
+
|
|
2048
|
+
# Check if name is already fully prefixed (cross-app access)
|
|
2049
|
+
matched_app = self._find_matched_app_for_collection(name)
|
|
2050
|
+
|
|
2051
|
+
# Resolve prefixed name based on matched app or write scope
|
|
2052
|
+
prefixed_name = self._resolve_prefixed_collection_name(name, matched_app)
|
|
1558
2053
|
|
|
1559
2054
|
# Check cache first
|
|
1560
2055
|
if prefixed_name in self._wrapper_cache:
|
|
@@ -1576,6 +2071,9 @@ class ScopedMongoWrapper:
|
|
|
1576
2071
|
read_scopes=self._read_scopes,
|
|
1577
2072
|
write_scope=self._write_scope,
|
|
1578
2073
|
auto_index=self._auto_index,
|
|
2074
|
+
query_validator=self._query_validator,
|
|
2075
|
+
resource_limiter=self._resource_limiter,
|
|
2076
|
+
parent_wrapper=self,
|
|
1579
2077
|
)
|
|
1580
2078
|
|
|
1581
2079
|
# Magically ensure app_id index exists (background task)
|
|
@@ -1607,16 +2105,12 @@ class ScopedMongoWrapper:
|
|
|
1607
2105
|
f"connection is closed (likely during shutdown)"
|
|
1608
2106
|
)
|
|
1609
2107
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1610
|
-
ScopedMongoWrapper._app_id_index_cache.pop(
|
|
1611
|
-
collection_name, None
|
|
1612
|
-
)
|
|
2108
|
+
ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
|
|
1613
2109
|
return
|
|
1614
2110
|
|
|
1615
2111
|
has_index = await self._ensure_app_id_index(real_collection)
|
|
1616
2112
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1617
|
-
ScopedMongoWrapper._app_id_index_cache[collection_name] =
|
|
1618
|
-
has_index
|
|
1619
|
-
)
|
|
2113
|
+
ScopedMongoWrapper._app_id_index_cache[collection_name] = has_index
|
|
1620
2114
|
except (
|
|
1621
2115
|
ConnectionFailure,
|
|
1622
2116
|
ServerSelectionTimeoutError,
|
|
@@ -1628,27 +2122,53 @@ class ScopedMongoWrapper:
|
|
|
1628
2122
|
f"connection error (likely during shutdown): {e}"
|
|
1629
2123
|
)
|
|
1630
2124
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1631
|
-
ScopedMongoWrapper._app_id_index_cache.pop(
|
|
1632
|
-
collection_name, None
|
|
1633
|
-
)
|
|
2125
|
+
ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
|
|
1634
2126
|
except OperationFailure as e:
|
|
1635
2127
|
# Index creation failed for other reasons (non-critical)
|
|
1636
2128
|
logger.debug(f"App_id index creation failed (non-critical): {e}")
|
|
1637
2129
|
async with ScopedMongoWrapper._app_id_index_lock:
|
|
1638
|
-
ScopedMongoWrapper._app_id_index_cache.pop(
|
|
1639
|
-
|
|
1640
|
-
)
|
|
2130
|
+
ScopedMongoWrapper._app_id_index_cache.pop(collection_name, None)
|
|
2131
|
+
# Let other exceptions bubble up - they are non-recoverable (Type 4)
|
|
1641
2132
|
|
|
1642
2133
|
if collection_name not in ScopedMongoWrapper._app_id_index_cache:
|
|
1643
2134
|
# Use managed task creation to prevent accumulation
|
|
1644
|
-
_create_managed_task(
|
|
1645
|
-
_safe_app_id_index_check(), task_name="app_id_index_check"
|
|
1646
|
-
)
|
|
2135
|
+
_create_managed_task(_safe_app_id_index_check(), task_name="app_id_index_check")
|
|
1647
2136
|
|
|
1648
2137
|
# Store it in the cache
|
|
1649
2138
|
self._wrapper_cache[prefixed_name] = wrapper
|
|
1650
2139
|
return wrapper
|
|
1651
2140
|
|
|
2141
|
+
def __getitem__(self, name: str) -> ScopedCollectionWrapper:
|
|
2142
|
+
"""
|
|
2143
|
+
Support bracket notation for collection access (e.g., db["collection_name"]).
|
|
2144
|
+
|
|
2145
|
+
This allows compatibility with code that uses bracket notation instead of
|
|
2146
|
+
attribute access (e.g., TokenBlacklist, SessionManager).
|
|
2147
|
+
|
|
2148
|
+
Args:
|
|
2149
|
+
name: Collection name (base name, will be prefixed with write_scope)
|
|
2150
|
+
|
|
2151
|
+
Returns:
|
|
2152
|
+
ScopedCollectionWrapper instance
|
|
2153
|
+
|
|
2154
|
+
Raises:
|
|
2155
|
+
ValueError: If collection name is invalid
|
|
2156
|
+
|
|
2157
|
+
Example:
|
|
2158
|
+
collection = db["my_collection"] # Same as db.my_collection
|
|
2159
|
+
"""
|
|
2160
|
+
# Validate collection name for security (get_collection will do additional validation)
|
|
2161
|
+
try:
|
|
2162
|
+
_validate_collection_name(name, allow_prefixed=False)
|
|
2163
|
+
except ValueError as e:
|
|
2164
|
+
logger.warning(
|
|
2165
|
+
f"Security: Invalid collection name in __getitem__(). "
|
|
2166
|
+
f"Name: '{name}', App: {self._write_scope}, Error: {e}"
|
|
2167
|
+
)
|
|
2168
|
+
raise
|
|
2169
|
+
|
|
2170
|
+
return self.get_collection(name)
|
|
2171
|
+
|
|
1652
2172
|
async def _ensure_app_id_index(self, collection: AsyncIOMotorCollection) -> bool:
|
|
1653
2173
|
"""
|
|
1654
2174
|
Ensures app_id index exists on collection.
|
|
@@ -1680,11 +2200,7 @@ class ScopedMongoWrapper:
|
|
|
1680
2200
|
return True
|
|
1681
2201
|
except OperationFailure as e:
|
|
1682
2202
|
# Handle index build aborted (e.g., database being dropped during teardown)
|
|
1683
|
-
if (
|
|
1684
|
-
e.code == 276
|
|
1685
|
-
or "IndexBuildAborted" in str(e)
|
|
1686
|
-
or "dropDatabase" in str(e)
|
|
1687
|
-
):
|
|
2203
|
+
if e.code == 276 or "IndexBuildAborted" in str(e) or "dropDatabase" in str(e):
|
|
1688
2204
|
logger.debug(
|
|
1689
2205
|
f"Skipping app_id index creation on {collection.name}: "
|
|
1690
2206
|
f"index build aborted (likely during database drop/teardown): {e}"
|
|
@@ -1694,19 +2210,13 @@ class ScopedMongoWrapper:
|
|
|
1694
2210
|
return True
|
|
1695
2211
|
except OperationFailure as e:
|
|
1696
2212
|
# Handle index build aborted (e.g., database being dropped during teardown)
|
|
1697
|
-
if (
|
|
1698
|
-
e.code == 276
|
|
1699
|
-
or "IndexBuildAborted" in str(e)
|
|
1700
|
-
or "dropDatabase" in str(e)
|
|
1701
|
-
):
|
|
2213
|
+
if e.code == 276 or "IndexBuildAborted" in str(e) or "dropDatabase" in str(e):
|
|
1702
2214
|
logger.debug(
|
|
1703
2215
|
f"Skipping app_id index creation on {collection.name}: "
|
|
1704
2216
|
f"index build aborted (likely during database drop/teardown): {e}"
|
|
1705
2217
|
)
|
|
1706
2218
|
return False
|
|
1707
|
-
logger.debug(
|
|
1708
|
-
f"OperationFailure ensuring app_id index on {collection.name}: {e}"
|
|
1709
|
-
)
|
|
2219
|
+
logger.debug(f"OperationFailure ensuring app_id index on {collection.name}: {e}")
|
|
1710
2220
|
return False
|
|
1711
2221
|
except (ConnectionFailure, ServerSelectionTimeoutError, InvalidOperation) as e:
|
|
1712
2222
|
# Handle connection errors gracefully (e.g., during shutdown)
|