mdb-engine 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. mdb_engine/__init__.py +104 -11
  2. mdb_engine/auth/ARCHITECTURE.md +112 -0
  3. mdb_engine/auth/README.md +648 -11
  4. mdb_engine/auth/__init__.py +136 -29
  5. mdb_engine/auth/audit.py +592 -0
  6. mdb_engine/auth/base.py +252 -0
  7. mdb_engine/auth/casbin_factory.py +264 -69
  8. mdb_engine/auth/config_helpers.py +7 -6
  9. mdb_engine/auth/cookie_utils.py +3 -7
  10. mdb_engine/auth/csrf.py +373 -0
  11. mdb_engine/auth/decorators.py +3 -10
  12. mdb_engine/auth/dependencies.py +47 -50
  13. mdb_engine/auth/helpers.py +3 -3
  14. mdb_engine/auth/integration.py +53 -80
  15. mdb_engine/auth/jwt.py +2 -6
  16. mdb_engine/auth/middleware.py +77 -34
  17. mdb_engine/auth/oso_factory.py +18 -38
  18. mdb_engine/auth/provider.py +270 -171
  19. mdb_engine/auth/rate_limiter.py +504 -0
  20. mdb_engine/auth/restrictions.py +8 -24
  21. mdb_engine/auth/session_manager.py +14 -29
  22. mdb_engine/auth/shared_middleware.py +600 -0
  23. mdb_engine/auth/shared_users.py +759 -0
  24. mdb_engine/auth/token_store.py +14 -28
  25. mdb_engine/auth/users.py +54 -113
  26. mdb_engine/auth/utils.py +213 -15
  27. mdb_engine/cli/commands/generate.py +545 -9
  28. mdb_engine/cli/commands/validate.py +3 -7
  29. mdb_engine/cli/utils.py +3 -3
  30. mdb_engine/config.py +7 -21
  31. mdb_engine/constants.py +65 -0
  32. mdb_engine/core/README.md +117 -6
  33. mdb_engine/core/__init__.py +39 -7
  34. mdb_engine/core/app_registration.py +22 -41
  35. mdb_engine/core/app_secrets.py +290 -0
  36. mdb_engine/core/connection.py +18 -9
  37. mdb_engine/core/encryption.py +223 -0
  38. mdb_engine/core/engine.py +1057 -93
  39. mdb_engine/core/index_management.py +12 -16
  40. mdb_engine/core/manifest.py +459 -150
  41. mdb_engine/core/ray_integration.py +435 -0
  42. mdb_engine/core/seeding.py +10 -18
  43. mdb_engine/core/service_initialization.py +12 -23
  44. mdb_engine/core/types.py +2 -5
  45. mdb_engine/database/README.md +140 -17
  46. mdb_engine/database/__init__.py +17 -6
  47. mdb_engine/database/abstraction.py +25 -37
  48. mdb_engine/database/connection.py +11 -18
  49. mdb_engine/database/query_validator.py +367 -0
  50. mdb_engine/database/resource_limiter.py +204 -0
  51. mdb_engine/database/scoped_wrapper.py +713 -196
  52. mdb_engine/dependencies.py +426 -0
  53. mdb_engine/di/__init__.py +34 -0
  54. mdb_engine/di/container.py +248 -0
  55. mdb_engine/di/providers.py +205 -0
  56. mdb_engine/di/scopes.py +139 -0
  57. mdb_engine/embeddings/README.md +54 -24
  58. mdb_engine/embeddings/__init__.py +31 -24
  59. mdb_engine/embeddings/dependencies.py +37 -154
  60. mdb_engine/embeddings/service.py +11 -25
  61. mdb_engine/exceptions.py +92 -0
  62. mdb_engine/indexes/README.md +30 -13
  63. mdb_engine/indexes/__init__.py +1 -0
  64. mdb_engine/indexes/helpers.py +1 -1
  65. mdb_engine/indexes/manager.py +50 -114
  66. mdb_engine/memory/README.md +2 -2
  67. mdb_engine/memory/__init__.py +1 -2
  68. mdb_engine/memory/service.py +30 -87
  69. mdb_engine/observability/README.md +4 -2
  70. mdb_engine/observability/__init__.py +26 -9
  71. mdb_engine/observability/health.py +8 -9
  72. mdb_engine/observability/metrics.py +32 -12
  73. mdb_engine/repositories/__init__.py +34 -0
  74. mdb_engine/repositories/base.py +325 -0
  75. mdb_engine/repositories/mongo.py +233 -0
  76. mdb_engine/repositories/unit_of_work.py +166 -0
  77. mdb_engine/routing/README.md +1 -1
  78. mdb_engine/routing/__init__.py +1 -3
  79. mdb_engine/routing/websockets.py +25 -60
  80. mdb_engine-0.2.0.dist-info/METADATA +313 -0
  81. mdb_engine-0.2.0.dist-info/RECORD +96 -0
  82. mdb_engine-0.1.6.dist-info/METADATA +0 -213
  83. mdb_engine-0.1.6.dist-info/RECORD +0 -75
  84. {mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/WHEEL +0 -0
  85. {mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/entry_points.txt +0 -0
  86. {mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/licenses/LICENSE +0 -0
  87. {mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/top_level.txt +0 -0
@@ -180,9 +180,9 @@ Async-native interface for managing Atlas Search and Vector indexes.
180
180
  ```python
181
181
  from mdb_engine.database import AsyncAtlasIndexManager
182
182
 
183
- # Get index manager from collection
183
+ # Get index manager from collection (automatically uses unscoped collection)
184
184
  collection = db.my_collection
185
- index_manager = AsyncAtlasIndexManager(collection._collection) # Use unscoped collection
185
+ index_manager = collection.index_manager # Secure way to access index manager
186
186
 
187
187
  # Create vector search index
188
188
  await index_manager.create_vector_search_index(
@@ -277,25 +277,121 @@ await index_manager.create_search_index(
277
277
  )
278
278
  ```
279
279
 
280
- ## Connection Pooling
280
+ ## Security Features
281
+
282
+ The database module includes comprehensive security controls to prevent unauthorized access, NoSQL injection, resource exhaustion, and ensure data isolation:
283
+
284
+ ### Query Security
285
+
286
+ All queries are automatically validated for security:
287
+ - **Dangerous operator blocking**: Blocks `$where`, `$eval`, `$function`, and `$accumulator` operators that allow JavaScript execution
288
+ - **Query depth limits**: Prevents deeply nested queries that could cause performance issues
289
+ - **Regex complexity limits**: Prevents ReDoS (Regular Expression Denial of Service) attacks
290
+ - **Pipeline validation**: Validates aggregation pipelines for safety and complexity
291
+
292
+ ```python
293
+ # These queries will be blocked:
294
+ db.collection.find({"$where": "this.status === 'active'"}) # ❌ Dangerous operator
295
+ db.collection.aggregate([{"$match": {}}] * 100) # ❌ Too many pipeline stages
281
296
 
282
- The database module provides shared MongoDB connection pooling for efficient resource usage.
297
+ # These queries are safe:
298
+ db.collection.find({"status": "active"}) # ✅ Safe
299
+ db.collection.find({"age": {"$gt": 18}}) # ✅ Safe
300
+ ```
301
+
302
+ ### Resource Limits
283
303
 
284
- ### Get Shared Client
304
+ All operations have automatic resource limits to prevent resource exhaustion:
305
+ - **Query timeouts**: All queries automatically have `maxTimeMS` set (default: 30 seconds, max: 5 minutes)
306
+ - **Result size limits**: Maximum 10,000 documents per query (configurable)
307
+ - **Batch size limits**: Maximum 1,000 documents per cursor batch
308
+ - **Document size validation**: Documents are validated before insert (16MB MongoDB limit)
285
309
 
286
310
  ```python
287
- from mdb_engine.database import get_shared_mongo_client
311
+ # Timeouts are automatically enforced:
312
+ db.collection.find({"status": "active"}) # Automatically has maxTimeMS=30000
313
+
314
+ # Result limits are enforced:
315
+ db.collection.find({}, limit=20000) # Automatically capped to 10,000
316
+
317
+ # Document sizes are validated:
318
+ large_doc = {"data": "x" * (20 * 1024 * 1024)} # ❌ Exceeds 16MB limit
319
+ await db.collection.insert_one(large_doc) # Raises ResourceLimitExceeded
320
+ ```
321
+
322
+ ### Collection Name Validation
288
323
 
289
- # Get or create shared MongoDB client
290
- client = get_shared_mongo_client(
291
- mongo_uri="mongodb://localhost:27017",
292
- max_pool_size=10,
293
- min_pool_size=1
324
+ All collection names are validated for security:
325
+
326
+ ### Collection Name Validation
327
+
328
+ All collection names are validated for security:
329
+ - **Format validation**: Must match MongoDB naming rules (alphanumeric, underscore, dot, hyphen)
330
+ - **Length limits**: 1-255 characters
331
+ - **Reserved names**: System collections (`apps_config`) are blocked
332
+ - **Reserved prefixes**: Collections starting with `system`, `admin`, `config`, or `local` are blocked
333
+ - **Path traversal protection**: Blocks attempts to use `..`, `/`, or `\` in collection names
334
+
335
+ ```python
336
+ # These will raise ValueError:
337
+ db.system_users # Reserved prefix
338
+ db.apps_config # Reserved name
339
+ db["../other"] # Path traversal attempt
340
+ db["123invalid"] # Invalid format (starts with number)
341
+ ```
342
+
343
+ ### Cross-App Access Control
344
+
345
+ Cross-app collection access is strictly controlled:
346
+ - Apps can only read from collections of apps listed in their `read_scopes`
347
+ - Unauthorized cross-app access attempts are logged and blocked
348
+ - All cross-app access is logged for audit purposes
349
+
350
+ ```python
351
+ # App can only read from authorized apps
352
+ db = engine.get_scoped_db(
353
+ "my_app",
354
+ read_scopes=["my_app", "shared_app"] # Can read from these apps
294
355
  )
295
356
 
296
- db = client["my_database"]
357
+ # This works (authorized):
358
+ collection = db.get_collection("shared_app_data")
359
+
360
+ # This fails (unauthorized):
361
+ collection = db.get_collection("other_app_data") # Raises ValueError
297
362
  ```
298
363
 
364
+ ### Scope Validation
365
+
366
+ The `get_scoped_db()` method validates all scopes:
367
+ - `read_scopes` must be a non-empty list of valid app slugs
368
+ - `write_scope` must be a non-empty string
369
+ - Invalid scopes raise `ValueError` with clear error messages
370
+
371
+ ### Audit Logging
372
+
373
+ All security-relevant events are logged:
374
+ - Invalid collection name attempts
375
+ - Unauthorized cross-app access attempts
376
+ - Reserved name/prefix access attempts
377
+ - Collection name validation failures
378
+
379
+ Logs include app context (app_slug, collection_name, action) for security monitoring.
380
+
381
+ ### Best Practices
382
+
383
+ 1. **Always use scoped databases**: Never access raw MongoDB clients or databases
384
+ 2. **Validate collection names**: Use descriptive, valid collection names
385
+ 3. **Limit cross-app access**: Only grant `read_scopes` to apps that need cross-app data
386
+ 4. **Monitor audit logs**: Review security logs regularly for suspicious patterns
387
+ 5. **Follow naming conventions**: Use lowercase, underscore-separated names (e.g., `user_profiles`)
388
+
389
+ ## Connection Pooling
390
+
391
+ The database module provides shared MongoDB connection pooling for efficient resource usage. Connection pooling is handled automatically by the engine - users should always use `engine.get_scoped_db()` for database access.
392
+
393
+ **Security Note:** Direct MongoDB client creation functions are internal and not part of the public API. Always use scoped databases to ensure proper app isolation.
394
+
299
395
  ### Pool Metrics
300
396
 
301
397
  Monitor connection pool usage:
@@ -474,13 +570,40 @@ except OperationFailure as e:
474
570
  print(f"MongoDB operation failed: {e.details}")
475
571
  except AutoReconnect as e:
476
572
  print(f"MongoDB reconnection: {e}")
477
- except Exception as e:
478
- print(f"Unexpected error: {e}")
573
+ except (ConnectionFailure, ServerSelectionTimeoutError) as e:
574
+ print(f"Connection error: {e}")
479
575
  ```
480
576
 
481
577
  ## Integration Examples
482
578
 
483
- ### FastAPI Integration
579
+ ### FastAPI Integration (Recommended)
580
+
581
+ Use the request-scoped `get_scoped_db` dependency from `mdb_engine.dependencies`:
582
+
583
+ ```python
584
+ from fastapi import Depends
585
+ from mdb_engine import MongoDBEngine
586
+ from mdb_engine.dependencies import get_scoped_db
587
+
588
+ engine = MongoDBEngine(mongo_uri="...", db_name="...")
589
+ app = engine.create_app(slug="my_app", manifest=Path("manifest.json"))
590
+
591
+ @app.get("/data")
592
+ async def get_data(db=Depends(get_scoped_db)):
593
+ # db is automatically scoped to "my_app"
594
+ docs = await db.my_collection.find({}).to_list(length=10)
595
+ return {"data": docs}
596
+
597
+ @app.post("/data")
598
+ async def create_data(db=Depends(get_scoped_db)):
599
+ # Writes are automatically scoped to "my_app"
600
+ result = await db.my_collection.insert_one({"name": "New Document"})
601
+ return {"inserted_id": str(result.inserted_id)}
602
+ ```
603
+
604
+ ### Legacy FastAPI Integration
605
+
606
+ For apps not using `engine.create_app()`:
484
607
 
485
608
  ```python
486
609
  from fastapi import FastAPI, Depends
@@ -508,8 +631,8 @@ db1 = engine.get_scoped_db("app1")
508
631
  db2 = engine.get_scoped_db("app2")
509
632
 
510
633
  # Cross-app read (read from app1, write to app2)
511
- shared_db = ScopedMongoWrapper(
512
- real_db=engine.mongo_db,
634
+ shared_db = engine.get_scoped_db(
635
+ app_slug="shared",
513
636
  read_scopes=["app1", "app2"],
514
637
  write_scope="shared"
515
638
  )
@@ -6,11 +6,20 @@ and MongoDB-style API for familiarity.
6
6
  """
7
7
 
8
8
  from .abstraction import AppDB, Collection, get_app_db
9
- from .connection import (close_shared_client, get_pool_metrics,
10
- get_shared_mongo_client, register_client_for_metrics,
11
- verify_shared_client)
12
- from .scoped_wrapper import (AsyncAtlasIndexManager, AutoIndexManager,
13
- ScopedCollectionWrapper, ScopedMongoWrapper)
9
+ from .connection import (
10
+ close_shared_client,
11
+ get_pool_metrics,
12
+ register_client_for_metrics,
13
+ verify_shared_client,
14
+ )
15
+ from .query_validator import QueryValidator
16
+ from .resource_limiter import ResourceLimiter
17
+ from .scoped_wrapper import (
18
+ AsyncAtlasIndexManager,
19
+ AutoIndexManager,
20
+ ScopedCollectionWrapper,
21
+ ScopedMongoWrapper,
22
+ )
14
23
 
15
24
  __all__ = [
16
25
  # Scoped wrappers
@@ -18,12 +27,14 @@ __all__ = [
18
27
  "ScopedCollectionWrapper",
19
28
  "AsyncAtlasIndexManager",
20
29
  "AutoIndexManager",
30
+ # Query security
31
+ "QueryValidator",
32
+ "ResourceLimiter",
21
33
  # Database abstraction
22
34
  "AppDB",
23
35
  "Collection",
24
36
  "get_app_db",
25
37
  # Connection pooling
26
- "get_shared_mongo_client",
27
38
  "verify_shared_client",
28
39
  "get_pool_metrics",
29
40
  "register_client_for_metrics",
@@ -30,9 +30,13 @@ from ..exceptions import MongoDBEngineError
30
30
  from .scoped_wrapper import ScopedMongoWrapper
31
31
 
32
32
  try:
33
- from pymongo.errors import (AutoReconnect, ConnectionFailure,
34
- InvalidOperation, OperationFailure,
35
- ServerSelectionTimeoutError)
33
+ from pymongo.errors import (
34
+ AutoReconnect,
35
+ ConnectionFailure,
36
+ InvalidOperation,
37
+ OperationFailure,
38
+ ServerSelectionTimeoutError,
39
+ )
36
40
  except ImportError:
37
41
  OperationFailure = Exception
38
42
  AutoReconnect = Exception
@@ -41,8 +45,12 @@ except ImportError:
41
45
 
42
46
  try:
43
47
  from motor.motor_asyncio import AsyncIOMotorCursor
44
- from pymongo.results import (DeleteResult, InsertManyResult,
45
- InsertOneResult, UpdateResult)
48
+ from pymongo.results import (
49
+ DeleteResult,
50
+ InsertManyResult,
51
+ InsertOneResult,
52
+ UpdateResult,
53
+ )
46
54
  except ImportError:
47
55
  AsyncIOMotorCursor = None
48
56
  InsertOneResult = None
@@ -112,9 +120,7 @@ class Collection:
112
120
  context={"operation": "find_one"},
113
121
  ) from e
114
122
 
115
- def find(
116
- self, filter: Optional[Dict[str, Any]] = None, *args, **kwargs
117
- ) -> AsyncIOMotorCursor:
123
+ def find(self, filter: Optional[Dict[str, Any]] = None, *args, **kwargs) -> AsyncIOMotorCursor:
118
124
  """
119
125
  Find documents matching the filter.
120
126
 
@@ -140,9 +146,7 @@ class Collection:
140
146
  """
141
147
  return self._collection.find(filter or {}, *args, **kwargs)
142
148
 
143
- async def insert_one(
144
- self, document: Dict[str, Any], *args, **kwargs
145
- ) -> InsertOneResult:
149
+ async def insert_one(self, document: Dict[str, Any], *args, **kwargs) -> InsertOneResult:
146
150
  """
147
151
  Insert a single document.
148
152
 
@@ -460,9 +464,7 @@ class Collection:
460
464
  context={"operation": "count_documents"},
461
465
  ) from e
462
466
 
463
- def aggregate(
464
- self, pipeline: List[Dict[str, Any]], *args, **kwargs
465
- ) -> AsyncIOMotorCursor:
467
+ def aggregate(self, pipeline: List[Dict[str, Any]], *args, **kwargs) -> AsyncIOMotorCursor:
466
468
  """
467
469
  Perform aggregation pipeline.
468
470
 
@@ -554,11 +556,17 @@ class AppDB:
554
556
  Example:
555
557
  db.users.get("user_123") # Instead of db.collection("users").get("user_123")
556
558
  """
557
- # Only proxy collection names, not internal attributes
558
- if name.startswith("_"):
559
+ # Explicitly block access to 'database' property (removed for security)
560
+ if name == "database":
559
561
  raise AttributeError(
560
- f"'{type(self).__name__}' object has no attribute '{name}'"
562
+ "'database' property has been removed for security. "
563
+ "Use collection.index_manager for index operations. "
564
+ "All data access must go through scoped collections."
561
565
  )
566
+
567
+ # Only proxy collection names, not internal attributes
568
+ if name.startswith("_"):
569
+ raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
562
570
  return self.collection(name)
563
571
 
564
572
  @property
@@ -576,26 +584,6 @@ class AppDB:
576
584
  """
577
585
  return self._wrapper
578
586
 
579
- @property
580
- def database(self):
581
- """
582
- Access the underlying AsyncIOMotorDatabase (unscoped).
583
-
584
- This is useful for advanced operations that need direct access to the
585
- real database without scoping, such as index management or administrative
586
- operations.
587
-
588
- Returns:
589
- The underlying AsyncIOMotorDatabase instance
590
-
591
- Example:
592
- # Access underlying database for index management
593
- real_db = db.database
594
- collection = real_db["my_collection"]
595
- index_manager = AsyncAtlasIndexManager(collection)
596
- """
597
- return self._wrapper.database
598
-
599
587
 
600
588
  # FastAPI dependency helper
601
589
  async def get_app_db(request, get_scoped_db_func: Callable) -> AppDB:
@@ -26,8 +26,12 @@ import threading
26
26
  from typing import Any, Dict, Optional
27
27
 
28
28
  from motor.motor_asyncio import AsyncIOMotorClient
29
- from pymongo.errors import (ConnectionFailure, InvalidOperation,
30
- OperationFailure, ServerSelectionTimeoutError)
29
+ from pymongo.errors import (
30
+ ConnectionFailure,
31
+ InvalidOperation,
32
+ OperationFailure,
33
+ ServerSelectionTimeoutError,
34
+ )
31
35
 
32
36
  logger = logging.getLogger(__name__)
33
37
 
@@ -87,10 +91,7 @@ def get_shared_mongo_client(
87
91
  # Verify client is still connected
88
92
  try:
89
93
  # Non-blocking check - if client was closed, it will be None or invalid
90
- if (
91
- hasattr(_shared_client, "_topology")
92
- and _shared_client._topology is not None
93
- ):
94
+ if hasattr(_shared_client, "_topology") and _shared_client._topology is not None:
94
95
  return _shared_client
95
96
  except (AttributeError, RuntimeError):
96
97
  # Client was closed or invalid, reset and recreate
@@ -103,10 +104,7 @@ def get_shared_mongo_client(
103
104
  # Double-check pattern: another thread may have initialized while we waited
104
105
  if _shared_client is not None:
105
106
  try:
106
- if (
107
- hasattr(_shared_client, "_topology")
108
- and _shared_client._topology is not None
109
- ):
107
+ if hasattr(_shared_client, "_topology") and _shared_client._topology is not None:
110
108
  return _shared_client
111
109
  except (AttributeError, RuntimeError):
112
110
  # Client was closed or invalid, reset and recreate
@@ -180,7 +178,7 @@ async def verify_shared_client() -> bool:
180
178
  OperationFailure,
181
179
  InvalidOperation,
182
180
  ) as e:
183
- logger.error(f"Shared MongoDB client verification failed: {e}")
181
+ logger.exception(f"Shared MongoDB client verification failed: {e}")
184
182
  return False
185
183
 
186
184
 
@@ -236,10 +234,7 @@ async def get_pool_metrics(
236
234
  for registered_client in _registered_clients:
237
235
  try:
238
236
  # Verify client is still valid
239
- if (
240
- hasattr(registered_client, "_topology")
241
- and registered_client._topology is not None
242
- ):
237
+ if hasattr(registered_client, "_topology") and registered_client._topology is not None:
243
238
  return await _get_client_pool_metrics(registered_client)
244
239
  except (AttributeError, RuntimeError):
245
240
  # Type 2: Recoverable - if this client is invalid, try next one
@@ -338,9 +333,7 @@ async def _get_client_pool_metrics(client: AsyncIOMotorClient) -> Dict[str, Any]
338
333
  if max_pool_size and current_connections is not None:
339
334
  usage_percent = (current_connections / max_pool_size) * 100
340
335
  metrics["pool_usage_percent"] = round(usage_percent, 2)
341
- metrics["active_connections"] = (
342
- current_connections # Alias for compatibility
343
- )
336
+ metrics["active_connections"] = current_connections # Alias for compatibility
344
337
 
345
338
  # Warn if pool usage is high
346
339
  if usage_percent > 80: