mdb-engine 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. mdb_engine/README.md +144 -0
  2. mdb_engine/__init__.py +37 -0
  3. mdb_engine/auth/README.md +631 -0
  4. mdb_engine/auth/__init__.py +128 -0
  5. mdb_engine/auth/casbin_factory.py +199 -0
  6. mdb_engine/auth/casbin_models.py +46 -0
  7. mdb_engine/auth/config_defaults.py +71 -0
  8. mdb_engine/auth/config_helpers.py +213 -0
  9. mdb_engine/auth/cookie_utils.py +158 -0
  10. mdb_engine/auth/decorators.py +350 -0
  11. mdb_engine/auth/dependencies.py +747 -0
  12. mdb_engine/auth/helpers.py +64 -0
  13. mdb_engine/auth/integration.py +578 -0
  14. mdb_engine/auth/jwt.py +225 -0
  15. mdb_engine/auth/middleware.py +241 -0
  16. mdb_engine/auth/oso_factory.py +323 -0
  17. mdb_engine/auth/provider.py +570 -0
  18. mdb_engine/auth/restrictions.py +271 -0
  19. mdb_engine/auth/session_manager.py +477 -0
  20. mdb_engine/auth/token_lifecycle.py +213 -0
  21. mdb_engine/auth/token_store.py +289 -0
  22. mdb_engine/auth/users.py +1516 -0
  23. mdb_engine/auth/utils.py +614 -0
  24. mdb_engine/cli/__init__.py +13 -0
  25. mdb_engine/cli/commands/__init__.py +7 -0
  26. mdb_engine/cli/commands/generate.py +105 -0
  27. mdb_engine/cli/commands/migrate.py +83 -0
  28. mdb_engine/cli/commands/show.py +70 -0
  29. mdb_engine/cli/commands/validate.py +63 -0
  30. mdb_engine/cli/main.py +41 -0
  31. mdb_engine/cli/utils.py +92 -0
  32. mdb_engine/config.py +217 -0
  33. mdb_engine/constants.py +160 -0
  34. mdb_engine/core/README.md +542 -0
  35. mdb_engine/core/__init__.py +42 -0
  36. mdb_engine/core/app_registration.py +392 -0
  37. mdb_engine/core/connection.py +243 -0
  38. mdb_engine/core/engine.py +749 -0
  39. mdb_engine/core/index_management.py +162 -0
  40. mdb_engine/core/manifest.py +2793 -0
  41. mdb_engine/core/seeding.py +179 -0
  42. mdb_engine/core/service_initialization.py +355 -0
  43. mdb_engine/core/types.py +413 -0
  44. mdb_engine/database/README.md +522 -0
  45. mdb_engine/database/__init__.py +31 -0
  46. mdb_engine/database/abstraction.py +635 -0
  47. mdb_engine/database/connection.py +387 -0
  48. mdb_engine/database/scoped_wrapper.py +1721 -0
  49. mdb_engine/embeddings/README.md +184 -0
  50. mdb_engine/embeddings/__init__.py +62 -0
  51. mdb_engine/embeddings/dependencies.py +193 -0
  52. mdb_engine/embeddings/service.py +759 -0
  53. mdb_engine/exceptions.py +167 -0
  54. mdb_engine/indexes/README.md +651 -0
  55. mdb_engine/indexes/__init__.py +21 -0
  56. mdb_engine/indexes/helpers.py +145 -0
  57. mdb_engine/indexes/manager.py +895 -0
  58. mdb_engine/memory/README.md +451 -0
  59. mdb_engine/memory/__init__.py +30 -0
  60. mdb_engine/memory/service.py +1285 -0
  61. mdb_engine/observability/README.md +515 -0
  62. mdb_engine/observability/__init__.py +42 -0
  63. mdb_engine/observability/health.py +296 -0
  64. mdb_engine/observability/logging.py +161 -0
  65. mdb_engine/observability/metrics.py +297 -0
  66. mdb_engine/routing/README.md +462 -0
  67. mdb_engine/routing/__init__.py +73 -0
  68. mdb_engine/routing/websockets.py +813 -0
  69. mdb_engine/utils/__init__.py +7 -0
  70. mdb_engine-0.1.6.dist-info/METADATA +213 -0
  71. mdb_engine-0.1.6.dist-info/RECORD +75 -0
  72. mdb_engine-0.1.6.dist-info/WHEEL +5 -0
  73. mdb_engine-0.1.6.dist-info/entry_points.txt +2 -0
  74. mdb_engine-0.1.6.dist-info/licenses/LICENSE +661 -0
  75. mdb_engine-0.1.6.dist-info/top_level.txt +1 -0
@@ -0,0 +1,895 @@
1
+ """
2
+ Index Management Orchestration
3
+
4
+ High-level functions for creating and managing indexes based on manifest definitions.
5
+
6
+ This module is part of MDB_ENGINE - MongoDB Engine.
7
+ """
8
+
9
+ import json
10
+ import logging
11
+ from typing import Any, Dict, List
12
+
13
+ from motor.motor_asyncio import AsyncIOMotorDatabase
14
+ from pymongo.errors import (CollectionInvalid, ConnectionFailure,
15
+ OperationFailure, ServerSelectionTimeoutError)
16
+
17
+ # Import constants
18
+ from ..constants import INDEX_TYPE_REGULAR, INDEX_TYPE_TTL, MIN_TTL_SECONDS
19
+
20
+ # Import index manager from database module
21
+ try:
22
+ from ..database.scoped_wrapper import AsyncAtlasIndexManager
23
+ except ImportError:
24
+ AsyncAtlasIndexManager = None
25
+ logging.warning("AsyncAtlasIndexManager not available")
26
+
27
+ # Import helper functions
28
+ from .helpers import (check_and_update_index, is_id_index, normalize_keys,
29
+ validate_index_definition_basic)
30
+
31
+ # Check if index manager is available
32
+ INDEX_MANAGER_AVAILABLE = AsyncAtlasIndexManager is not None
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ async def _handle_regular_index(
38
+ index_manager: AsyncAtlasIndexManager,
39
+ index_def: Dict[str, Any],
40
+ index_name: str,
41
+ log_prefix: str,
42
+ ) -> None:
43
+ """Handle creation of a regular index."""
44
+ logger.info(f"{log_prefix} _handle_regular_index called for '{index_name}'")
45
+ keys = index_def.get("keys")
46
+ logger.info(f"{log_prefix} Index keys: {keys}, index_def: {index_def}")
47
+
48
+ is_valid, error_msg = validate_index_definition_basic(
49
+ index_def, index_name, ["keys"], log_prefix
50
+ )
51
+ logger.info(
52
+ f"{log_prefix} Validation result: is_valid={is_valid}, "
53
+ f"error_msg={error_msg}"
54
+ )
55
+ if not is_valid:
56
+ logger.error(f"{log_prefix} ❌ Validation failed: {error_msg}")
57
+ return
58
+
59
+ if is_id_index(keys):
60
+ logger.info(
61
+ f"{log_prefix} Skipping '_id' index '{index_name}'. "
62
+ f"MongoDB automatically creates '_id' indexes on all "
63
+ f"collections and they cannot be customized. "
64
+ f"This is expected behavior - no action needed."
65
+ )
66
+ return
67
+
68
+ # Get wait_for_ready from index definition (default: True for managed indexes)
69
+ wait_for_ready = index_def.get("wait_for_ready", True)
70
+
71
+ options = {
72
+ **index_def.get("options", {}),
73
+ "name": index_name,
74
+ "wait_for_ready": wait_for_ready,
75
+ }
76
+ logger.debug(f"{log_prefix} Checking if index '{index_name}' exists...")
77
+ exists, existing = await check_and_update_index(
78
+ index_manager, index_name, keys, options, log_prefix
79
+ )
80
+ logger.debug(
81
+ f"{log_prefix} Index exists check result: exists={exists}, "
82
+ f"existing={existing}"
83
+ )
84
+
85
+ if exists and existing:
86
+ logger.info(f"{log_prefix} Regular index '{index_name}' matches; skipping.")
87
+ return
88
+
89
+ logger.info(
90
+ f"{log_prefix} Creating regular index '{index_name}' with keys {keys} "
91
+ f"and options {options}..."
92
+ )
93
+ try:
94
+ created_name = await index_manager.create_index(keys, **options)
95
+ logger.info(
96
+ f"{log_prefix} ✔️ Created regular index '{created_name}' "
97
+ f"(requested: '{index_name}')."
98
+ )
99
+
100
+ # Wait for index to be ready and verify it was actually created
101
+ import asyncio
102
+
103
+ max_wait = 10 # Wait up to 10 seconds for index to be ready
104
+ poll_interval = 0.5
105
+ waited = 0
106
+
107
+ while waited < max_wait:
108
+ await asyncio.sleep(poll_interval)
109
+ waited += poll_interval
110
+
111
+ all_indexes = await index_manager.list_indexes()
112
+ verify_index = await index_manager.get_index(index_name)
113
+
114
+ if verify_index:
115
+ logger.info(
116
+ f"{log_prefix} ✅ Verified index '{index_name}' exists "
117
+ f"after {waited:.1f}s."
118
+ )
119
+ break
120
+
121
+ logger.debug(
122
+ f"{log_prefix} Waiting for index '{index_name}' to be ready... "
123
+ f"({waited:.1f}s/{max_wait}s). "
124
+ f"Available indexes: {[idx.get('name') for idx in all_indexes]}"
125
+ )
126
+ else:
127
+ # Timeout - index still not found
128
+ all_indexes = await index_manager.list_indexes()
129
+ logger.error(
130
+ f"{log_prefix} ❌ Index '{index_name}' was NOT found after "
131
+ f"{max_wait}s! create_index returned '{created_name}' but index "
132
+ f"is not visible. Available indexes: "
133
+ f"{[idx.get('name') for idx in all_indexes]}"
134
+ )
135
+ raise RuntimeError(
136
+ f"Index '{index_name}' was not found after {max_wait}s despite "
137
+ f"create_index returning '{created_name}'"
138
+ )
139
+ except (
140
+ OperationFailure,
141
+ ConnectionFailure,
142
+ ServerSelectionTimeoutError,
143
+ RuntimeError,
144
+ ValueError,
145
+ TypeError,
146
+ ) as e:
147
+ logger.error(
148
+ f"{log_prefix} ❌ Failed to create regular index '{index_name}': {e}",
149
+ exc_info=True,
150
+ )
151
+ raise
152
+
153
+
154
+ async def _handle_ttl_index(
155
+ index_manager: AsyncAtlasIndexManager,
156
+ index_def: Dict[str, Any],
157
+ index_name: str,
158
+ log_prefix: str,
159
+ ) -> None:
160
+ """Handle creation of a TTL index."""
161
+ keys = index_def.get("keys")
162
+ is_valid, error_msg = validate_index_definition_basic(
163
+ index_def, index_name, ["keys"], log_prefix
164
+ )
165
+ if not is_valid:
166
+ logger.warning(error_msg)
167
+ return
168
+
169
+ options = index_def.get("options", {})
170
+ expire_after = options.get("expireAfterSeconds")
171
+ if (
172
+ not expire_after
173
+ or not isinstance(expire_after, int)
174
+ or expire_after < MIN_TTL_SECONDS
175
+ ):
176
+ logger.warning(
177
+ f"{log_prefix} TTL index '{index_name}' missing or "
178
+ f"invalid 'expireAfterSeconds' in options. "
179
+ f"TTL indexes require 'options.expireAfterSeconds' to be "
180
+ f"a positive integer. "
181
+ f"Skipping this index definition."
182
+ )
183
+ return
184
+
185
+ ttl_keys = normalize_keys(keys)
186
+ index_options = {**options, "name": index_name}
187
+ exists, existing = await check_and_update_index(
188
+ index_manager, index_name, ttl_keys, index_options, log_prefix
189
+ )
190
+ if exists and existing:
191
+ logger.info(f"{log_prefix} TTL index '{index_name}' matches; skipping.")
192
+ return
193
+
194
+ logger.info(
195
+ f"{log_prefix} Creating TTL index '{index_name}' on field(s) {ttl_keys} "
196
+ f"with expireAfterSeconds={expire_after}..."
197
+ )
198
+ await index_manager.create_index(ttl_keys, **index_options)
199
+ logger.info(
200
+ f"{log_prefix} ✔️ Created TTL index '{index_name}' "
201
+ f"(expires after {expire_after} seconds)."
202
+ )
203
+
204
+
205
+ async def _handle_partial_index(
206
+ index_manager: AsyncAtlasIndexManager,
207
+ index_def: Dict[str, Any],
208
+ index_name: str,
209
+ log_prefix: str,
210
+ ) -> None:
211
+ """Handle creation of a partial index."""
212
+ keys = index_def.get("keys")
213
+ if not keys:
214
+ logger.warning(
215
+ f"{log_prefix} Missing 'keys' field on partial index '{index_name}'. "
216
+ f"Partial indexes require a 'keys' field. Skipping this index definition."
217
+ )
218
+ return
219
+
220
+ options = index_def.get("options", {})
221
+ partial_filter = options.get("partialFilterExpression")
222
+ if not partial_filter:
223
+ logger.warning(
224
+ f"{log_prefix} Partial index '{index_name}' missing "
225
+ f"'partialFilterExpression' in options. "
226
+ f"Partial indexes require "
227
+ f"'options.partialFilterExpression' to specify which "
228
+ f"documents to index. "
229
+ f"Skipping this index definition."
230
+ )
231
+ return
232
+
233
+ if isinstance(keys, dict):
234
+ partial_keys = [(k, v) for k, v in keys.items()]
235
+ else:
236
+ partial_keys = keys
237
+
238
+ index_options = {**options, "name": index_name}
239
+ existing_index = await index_manager.get_index(index_name)
240
+ if existing_index:
241
+ existing_key = existing_index.get("key", {})
242
+ expected_key = (
243
+ {k: v for k, v in partial_keys}
244
+ if isinstance(partial_keys, list)
245
+ else {k: v for k, v in keys.items()}
246
+ )
247
+ existing_partial = existing_index.get("partialFilterExpression")
248
+ expected_partial = partial_filter
249
+
250
+ if existing_key != expected_key or existing_partial != expected_partial:
251
+ logger.warning(
252
+ f"{log_prefix} Partial index '{index_name}' definition mismatch. "
253
+ f"Existing: keys={existing_key}, filter={existing_partial}. "
254
+ f"Expected: keys={expected_key}, filter={expected_partial}. "
255
+ f"Dropping existing index and recreating."
256
+ )
257
+ await index_manager.drop_index(index_name)
258
+ else:
259
+ logger.info(f"{log_prefix} Partial index '{index_name}' matches; skipping.")
260
+ return
261
+
262
+ logger.info(
263
+ f"{log_prefix} Creating partial index '{index_name}' "
264
+ f"on field(s) {partial_keys} "
265
+ f"with filter: {partial_filter}..."
266
+ )
267
+ await index_manager.create_index(partial_keys, **index_options)
268
+ logger.info(f"{log_prefix} ✔️ Created partial index '{index_name}'.")
269
+
270
+
271
+ async def _handle_text_index(
272
+ index_manager: AsyncAtlasIndexManager,
273
+ index_def: Dict[str, Any],
274
+ index_name: str,
275
+ log_prefix: str,
276
+ ) -> None:
277
+ """Handle creation of a text index."""
278
+ keys = index_def.get("keys")
279
+ if not keys:
280
+ logger.warning(
281
+ f"{log_prefix} Missing 'keys' field on text index '{index_name}'. "
282
+ f"Text indexes require a 'keys' field with at least one 'text' "
283
+ f"type field. Skipping this index definition."
284
+ )
285
+ return
286
+
287
+ if isinstance(keys, dict):
288
+ text_keys = [(k, v) for k, v in keys.items()]
289
+ else:
290
+ text_keys = keys
291
+
292
+ has_text = any(
293
+ (
294
+ isinstance(k, list)
295
+ and len(k) >= 2
296
+ and (k[1] == "text" or k[1] == "TEXT" or k[1] == 1)
297
+ )
298
+ or (
299
+ isinstance(k, tuple)
300
+ and len(k) >= 2
301
+ and (k[1] == "text" or k[1] == "TEXT" or k[1] == 1)
302
+ )
303
+ for k in text_keys
304
+ ) or any(
305
+ v == "text" or v == "TEXT" or v == 1
306
+ for k, v in (keys.items() if isinstance(keys, dict) else [])
307
+ )
308
+
309
+ if not has_text:
310
+ logger.warning(
311
+ f"{log_prefix} Text index '{index_name}' has no fields with "
312
+ f"'text' type. At least one field must have type 'text'. "
313
+ f"Skipping this index definition."
314
+ )
315
+ return
316
+
317
+ options = {**index_def.get("options", {}), "name": index_name}
318
+ existing_index = await index_manager.get_index(index_name)
319
+ if existing_index:
320
+ existing_key = existing_index.get("key", {})
321
+ expected_key = (
322
+ {k: v for k, v in text_keys}
323
+ if isinstance(text_keys, list)
324
+ else {k: v for k, v in keys.items()}
325
+ )
326
+
327
+ if existing_key != expected_key:
328
+ logger.warning(
329
+ f"{log_prefix} Text index '{index_name}' definition mismatch. "
330
+ f"Existing keys: {existing_key}, Expected keys: {expected_key}. "
331
+ f"Dropping existing index and recreating."
332
+ )
333
+ await index_manager.drop_index(index_name)
334
+ else:
335
+ logger.info(f"{log_prefix} Text index '{index_name}' matches; skipping.")
336
+ return
337
+
338
+ logger.info(
339
+ f"{log_prefix} Creating text index '{index_name}' on field(s) {text_keys}..."
340
+ )
341
+ await index_manager.create_index(text_keys, **options)
342
+ logger.info(f"{log_prefix} ✔️ Created text index '{index_name}'.")
343
+
344
+
345
+ async def _handle_geospatial_index(
346
+ index_manager: AsyncAtlasIndexManager,
347
+ index_def: Dict[str, Any],
348
+ index_name: str,
349
+ log_prefix: str,
350
+ ) -> None:
351
+ """Handle creation of a geospatial index."""
352
+ keys = index_def.get("keys")
353
+ if not keys:
354
+ logger.warning(
355
+ f"{log_prefix} Missing 'keys' field on geospatial index "
356
+ f"'{index_name}'. Geospatial indexes require a 'keys' field with "
357
+ f"at least one geospatial type ('2dsphere', '2d', 'geoHaystack'). "
358
+ f"Skipping this index definition."
359
+ )
360
+ return
361
+
362
+ if isinstance(keys, dict):
363
+ geo_keys = [(k, v) for k, v in keys.items()]
364
+ else:
365
+ geo_keys = keys
366
+
367
+ geo_types = ["2dsphere", "2d", "geoHaystack"]
368
+ has_geo = any(
369
+ (isinstance(k, list) and len(k) >= 2 and k[1] in geo_types)
370
+ or (isinstance(k, tuple) and len(k) >= 2 and k[1] in geo_types)
371
+ for k in geo_keys
372
+ ) or any(
373
+ v in geo_types for k, v in (keys.items() if isinstance(keys, dict) else [])
374
+ )
375
+
376
+ if not has_geo:
377
+ logger.warning(
378
+ f"{log_prefix} Geospatial index '{index_name}' has no fields with "
379
+ f"geospatial type. At least one field must have type '2dsphere', "
380
+ f"'2d', or 'geoHaystack'. Skipping this index definition."
381
+ )
382
+ return
383
+
384
+ options = {**index_def.get("options", {}), "name": index_name}
385
+ existing_index = await index_manager.get_index(index_name)
386
+ if existing_index:
387
+ existing_key = existing_index.get("key", {})
388
+ expected_key = (
389
+ {k: v for k, v in geo_keys}
390
+ if isinstance(geo_keys, list)
391
+ else {k: v for k, v in keys.items()}
392
+ )
393
+
394
+ if existing_key != expected_key:
395
+ logger.warning(
396
+ f"{log_prefix} Geospatial index '{index_name}' definition "
397
+ f"mismatch. Existing keys: {existing_key}, Expected keys: "
398
+ f"{expected_key}. Dropping existing index and recreating."
399
+ )
400
+ await index_manager.drop_index(index_name)
401
+ else:
402
+ logger.info(
403
+ f"{log_prefix} Geospatial index '{index_name}' matches; skipping."
404
+ )
405
+ return
406
+
407
+ logger.info(
408
+ f"{log_prefix} Creating geospatial index '{index_name}' "
409
+ f"on field(s) {geo_keys}..."
410
+ )
411
+ await index_manager.create_index(geo_keys, **options)
412
+ logger.info(f"{log_prefix} ✔️ Created geospatial index '{index_name}'.")
413
+
414
+
415
+ async def _handle_search_index(
416
+ index_manager: AsyncAtlasIndexManager,
417
+ index_def: Dict[str, Any],
418
+ index_name: str,
419
+ index_type: str,
420
+ slug: str,
421
+ log_prefix: str,
422
+ ) -> None:
423
+ """Handle creation of a search or vectorSearch index."""
424
+ definition = index_def.get("definition")
425
+ if not definition:
426
+ logger.warning(
427
+ f"{log_prefix} Missing 'definition' field for {index_type} "
428
+ f"index '{index_name}'. Atlas Search and Vector Search indexes "
429
+ f"require a 'definition' object specifying fields and configuration. "
430
+ f"Skipping this index definition."
431
+ )
432
+ return
433
+
434
+ fields = definition.get("fields", [])
435
+ has_app_id_filter = any(
436
+ isinstance(f, dict) and f.get("type") == "filter" and f.get("path") == "app_id"
437
+ for f in fields
438
+ )
439
+ if not has_app_id_filter:
440
+ app_id_filter = {"type": "filter", "path": "app_id"}
441
+ fields = [app_id_filter] + fields
442
+ definition = {**definition, "fields": fields}
443
+ logger.info(
444
+ f"{log_prefix} Automatically added 'app_id' filter to "
445
+ f"{index_type} index '{index_name}' "
446
+ f"(required by scoped wrapper)."
447
+ )
448
+ existing_index = await index_manager.get_search_index(index_name)
449
+ if existing_index:
450
+ current_def = existing_index.get(
451
+ "latestDefinition", existing_index.get("definition")
452
+ )
453
+ normalized_current = normalize_json_def(current_def)
454
+ normalized_expected = normalize_json_def(definition)
455
+
456
+ if normalized_current == normalized_expected:
457
+ logger.info(f"{log_prefix} Search index '{index_name}' definition matches.")
458
+ if (
459
+ not existing_index.get("queryable")
460
+ and existing_index.get("status") != "FAILED"
461
+ ):
462
+ logger.info(
463
+ f"{log_prefix} Index '{index_name}' not queryable yet; waiting."
464
+ )
465
+ await index_manager._wait_for_search_index_ready(
466
+ index_name, index_manager.DEFAULT_SEARCH_TIMEOUT
467
+ )
468
+ logger.info(f"{log_prefix} Index '{index_name}' now ready.")
469
+ elif existing_index.get("status") == "FAILED":
470
+ logger.error(
471
+ f"{log_prefix} Index '{index_name}' is in "
472
+ f"FAILED state. "
473
+ f"This indicates the index build failed - check "
474
+ f"Atlas UI for detailed error messages. "
475
+ f"Manual intervention required to resolve the "
476
+ f"issue before the index can be used."
477
+ )
478
+ else:
479
+ logger.info(f"{log_prefix} Index '{index_name}' is ready.")
480
+ else:
481
+ logger.warning(
482
+ f"{log_prefix} Search index '{index_name}' "
483
+ f"definition changed; updating."
484
+ )
485
+ current_fields = (
486
+ normalized_current.get("fields", [])
487
+ if isinstance(normalized_current, dict)
488
+ else []
489
+ )
490
+ expected_fields = (
491
+ normalized_expected.get("fields", [])
492
+ if isinstance(normalized_expected, dict)
493
+ else []
494
+ )
495
+
496
+ current_paths = [
497
+ f.get("path", "?") for f in current_fields if isinstance(f, dict)
498
+ ]
499
+ expected_paths = [
500
+ f.get("path", "?") for f in expected_fields if isinstance(f, dict)
501
+ ]
502
+
503
+ logger.info(f"{log_prefix} Current index filter fields: {current_paths}")
504
+ logger.info(f"{log_prefix} Expected index filter fields: {expected_paths}")
505
+ logger.info(
506
+ f"{log_prefix} Updating index '{index_name}' "
507
+ f"with new definition (this may take a few moments)..."
508
+ )
509
+
510
+ # Type 4: Let index update errors bubble up to framework handler
511
+ await index_manager.update_search_index(
512
+ name=index_name,
513
+ definition=definition,
514
+ wait_for_ready=True,
515
+ )
516
+ logger.info(
517
+ f"{log_prefix} ✔️ Successfully updated search index "
518
+ f"'{index_name}'. Index is now ready."
519
+ )
520
+ else:
521
+ logger.info(f"{log_prefix} Creating new search index '{index_name}'...")
522
+ await index_manager.create_search_index(
523
+ name=index_name,
524
+ definition=definition,
525
+ index_type=index_type,
526
+ wait_for_ready=True,
527
+ )
528
+ logger.info(f"{log_prefix} ✔️ Created new '{index_type}' index '{index_name}'.")
529
+
530
+
531
+ async def _handle_hybrid_index(
532
+ index_manager: AsyncAtlasIndexManager,
533
+ index_def: Dict[str, Any],
534
+ index_name: str,
535
+ slug: str,
536
+ log_prefix: str,
537
+ ) -> None:
538
+ """Handle creation of a hybrid index."""
539
+ hybrid_config = index_def.get("hybrid")
540
+ if not hybrid_config:
541
+ logger.warning(
542
+ f"{log_prefix} Missing 'hybrid' field for hybrid index '{index_name}'. "
543
+ f"Hybrid indexes require a 'hybrid' object with 'vector_index' and "
544
+ f"'text_index' definitions. Skipping this index definition."
545
+ )
546
+ return
547
+
548
+ vector_index_config = hybrid_config.get("vector_index")
549
+ text_index_config = hybrid_config.get("text_index")
550
+
551
+ if not vector_index_config or not text_index_config:
552
+ logger.warning(
553
+ f"{log_prefix} Hybrid index '{index_name}' requires both "
554
+ f"'vector_index' and 'text_index' in 'hybrid' field. "
555
+ f"Skipping this index definition."
556
+ )
557
+ return
558
+
559
+ vector_base_name = vector_index_config.get("name")
560
+ text_base_name = text_index_config.get("name")
561
+
562
+ if vector_base_name:
563
+ if not vector_base_name.startswith(f"{slug}_"):
564
+ vector_index_name = f"{slug}_{vector_base_name}"
565
+ else:
566
+ vector_index_name = vector_base_name
567
+ else:
568
+ vector_index_name = f"{index_name}_vector"
569
+
570
+ if text_base_name:
571
+ if not text_base_name.startswith(f"{slug}_"):
572
+ text_index_name = f"{slug}_{text_base_name}"
573
+ else:
574
+ text_index_name = text_base_name
575
+ else:
576
+ text_index_name = f"{index_name}_text"
577
+
578
+ vector_definition = vector_index_config.get("definition")
579
+ text_definition = text_index_config.get("definition")
580
+
581
+ if not vector_definition or not text_definition:
582
+ logger.warning(
583
+ f"{log_prefix} Hybrid index '{index_name}' requires 'definition' in "
584
+ f"both 'vector_index' and 'text_index'. "
585
+ f"Skipping this index definition."
586
+ )
587
+ return
588
+
589
+ # Process vector index
590
+ logger.info(
591
+ f"{log_prefix} Processing vector index '{vector_index_name}' "
592
+ f"for hybrid search..."
593
+ )
594
+ existing_vector_index = await index_manager.get_search_index(vector_index_name)
595
+ if existing_vector_index:
596
+ current_vector_def = existing_vector_index.get(
597
+ "latestDefinition", existing_vector_index.get("definition")
598
+ )
599
+ normalized_current_vector = normalize_json_def(current_vector_def)
600
+ normalized_expected_vector = normalize_json_def(vector_definition)
601
+
602
+ if normalized_current_vector == normalized_expected_vector:
603
+ logger.info(
604
+ f"{log_prefix} Vector index '{vector_index_name}' definition matches."
605
+ )
606
+ if (
607
+ not existing_vector_index.get("queryable")
608
+ and existing_vector_index.get("status") != "FAILED"
609
+ ):
610
+ logger.info(
611
+ f"{log_prefix} Vector index '{vector_index_name}' "
612
+ f"not queryable yet; waiting."
613
+ )
614
+ await index_manager._wait_for_search_index_ready(
615
+ vector_index_name, index_manager.DEFAULT_SEARCH_TIMEOUT
616
+ )
617
+ logger.info(
618
+ f"{log_prefix} Vector index '{vector_index_name}' now ready."
619
+ )
620
+ elif existing_vector_index.get("status") == "FAILED":
621
+ logger.error(
622
+ f"{log_prefix} Vector index '{vector_index_name}' "
623
+ f"is in FAILED state. "
624
+ f"Check Atlas UI for detailed error messages."
625
+ )
626
+ else:
627
+ logger.info(
628
+ f"{log_prefix} Vector index '{vector_index_name}' is ready."
629
+ )
630
+ else:
631
+ logger.warning(
632
+ f"{log_prefix} Vector index '{vector_index_name}' "
633
+ f"definition changed; updating."
634
+ )
635
+ # Type 4: Let index update errors bubble up to framework handler
636
+ await index_manager.update_search_index(
637
+ name=vector_index_name,
638
+ definition=vector_definition,
639
+ wait_for_ready=True,
640
+ )
641
+ logger.info(
642
+ f"{log_prefix} ✔️ Successfully updated vector index "
643
+ f"'{vector_index_name}'."
644
+ )
645
+ else:
646
+ logger.info(f"{log_prefix} Creating new vector index '{vector_index_name}'...")
647
+ await index_manager.create_search_index(
648
+ name=vector_index_name,
649
+ definition=vector_definition,
650
+ index_type="vectorSearch",
651
+ wait_for_ready=True,
652
+ )
653
+ logger.info(f"{log_prefix} ✔️ Created vector index '{vector_index_name}'.")
654
+
655
+ # Process text index
656
+ logger.info(
657
+ f"{log_prefix} Processing text index '{text_index_name}' "
658
+ f"for hybrid search..."
659
+ )
660
+ existing_text_index = await index_manager.get_search_index(text_index_name)
661
+ if existing_text_index:
662
+ current_text_def = existing_text_index.get(
663
+ "latestDefinition", existing_text_index.get("definition")
664
+ )
665
+ normalized_current_text = normalize_json_def(current_text_def)
666
+ normalized_expected_text = normalize_json_def(text_definition)
667
+
668
+ if normalized_current_text == normalized_expected_text:
669
+ logger.info(
670
+ f"{log_prefix} Text index '{text_index_name}' definition matches."
671
+ )
672
+ if (
673
+ not existing_text_index.get("queryable")
674
+ and existing_text_index.get("status") != "FAILED"
675
+ ):
676
+ logger.info(
677
+ f"{log_prefix} Text index '{text_index_name}' "
678
+ f"not queryable yet; waiting."
679
+ )
680
+ await index_manager._wait_for_search_index_ready(
681
+ text_index_name, index_manager.DEFAULT_SEARCH_TIMEOUT
682
+ )
683
+ logger.info(f"{log_prefix} Text index '{text_index_name}' now ready.")
684
+ elif existing_text_index.get("status") == "FAILED":
685
+ logger.error(
686
+ f"{log_prefix} Text index '{text_index_name}' is in FAILED "
687
+ f"state. Check Atlas UI for detailed error messages."
688
+ )
689
+ else:
690
+ logger.info(f"{log_prefix} Text index '{text_index_name}' is ready.")
691
+ else:
692
+ logger.warning(
693
+ f"{log_prefix} Text index '{text_index_name}' "
694
+ f"definition changed; updating."
695
+ )
696
+ # Type 4: Let index update errors bubble up to framework handler
697
+ await index_manager.update_search_index(
698
+ name=text_index_name,
699
+ definition=text_definition,
700
+ wait_for_ready=True,
701
+ )
702
+ logger.info(
703
+ f"{log_prefix} ✔️ Successfully updated text index "
704
+ f"'{text_index_name}'."
705
+ )
706
+ else:
707
+ logger.info(f"{log_prefix} Creating new text index '{text_index_name}'...")
708
+ await index_manager.create_search_index(
709
+ name=text_index_name,
710
+ definition=text_definition,
711
+ index_type="search",
712
+ wait_for_ready=True,
713
+ )
714
+ logger.info(f"{log_prefix} ✔️ Created text index '{text_index_name}'.")
715
+
716
+ logger.info(
717
+ f"{log_prefix} ✔️ Hybrid search indexes ready: "
718
+ f"'{vector_index_name}' (vector) and "
719
+ f"'{text_index_name}' (text)."
720
+ )
721
+
722
+
723
+ def normalize_json_def(obj: Any) -> Any:
724
+ """
725
+ Normalize a JSON-serializable object for comparison by:
726
+ 1. Converting to JSON string (which sorts dict keys)
727
+ 2. Parsing back to dict/list
728
+ This makes comparisons order-insensitive and format-insensitive.
729
+ """
730
+ try:
731
+ return json.loads(json.dumps(obj, sort_keys=True))
732
+ except (TypeError, ValueError) as e:
733
+ # If it can't be serialized, return as-is for fallback comparison
734
+ logger.warning(f"Could not normalize JSON def: {e}")
735
+ return obj
736
+
737
+
738
+ async def run_index_creation_for_collection(
739
+ db: AsyncIOMotorDatabase,
740
+ slug: str,
741
+ collection_name: str,
742
+ index_definitions: List[Dict[str, Any]],
743
+ ):
744
+ """Create or update indexes for a collection based on index definitions."""
745
+ log_prefix = f"[{slug} -> {collection_name}]"
746
+
747
+ if not INDEX_MANAGER_AVAILABLE:
748
+ logger.warning(f"{log_prefix} Index Manager not available.")
749
+ return
750
+
751
+ try:
752
+ real_collection = db[collection_name]
753
+ # Ensure collection exists before creating indexes
754
+ # MongoDB will create the collection if it doesn't exist, but we need to
755
+ # ensure it exists for index operations. We can do this by inserting and
756
+ # deleting a dummy document, or by creating the collection explicitly.
757
+ try:
758
+ # Try to create the collection explicitly
759
+ await db.create_collection(collection_name)
760
+ logger.debug(
761
+ f"{log_prefix} Created collection '{collection_name}' "
762
+ f"for index operations."
763
+ )
764
+ except CollectionInvalid as e:
765
+ if "already exists" in str(e).lower():
766
+ # Collection already exists, which is fine
767
+ logger.debug(
768
+ f"{log_prefix} Collection '{collection_name}' already exists."
769
+ )
770
+ else:
771
+ # Some other CollectionInvalid error - log but continue
772
+ logger.warning(
773
+ f"{log_prefix} CollectionInvalid when ensuring collection "
774
+ f"exists: {e}"
775
+ )
776
+ except (
777
+ OperationFailure,
778
+ ConnectionFailure,
779
+ ServerSelectionTimeoutError,
780
+ ) as e:
781
+ # If collection creation fails for other reasons, try to ensure it exists
782
+ # by doing a no-op operation that will create it
783
+ logger.debug(
784
+ f"{log_prefix} Could not create collection explicitly: {e}. "
785
+ f"Ensuring it exists via insert/delete."
786
+ )
787
+ try:
788
+ # Insert and immediately delete a dummy doc to ensure collection exists
789
+ dummy_id = await real_collection.insert_one({"_temp": True})
790
+ await real_collection.delete_one({"_id": dummy_id.inserted_id})
791
+ except (
792
+ OperationFailure,
793
+ ConnectionFailure,
794
+ ServerSelectionTimeoutError,
795
+ ) as ensure_error:
796
+ logger.warning(
797
+ f"{log_prefix} Could not ensure collection exists: "
798
+ f"{ensure_error}"
799
+ )
800
+
801
+ index_manager = AsyncAtlasIndexManager(real_collection)
802
+ logger.info(
803
+ f"{log_prefix} Initialized IndexManager for collection "
804
+ f"'{collection_name}'. Checking {len(index_definitions)} index defs."
805
+ )
806
+
807
+ # Log current indexes for debugging
808
+ current_indexes = await index_manager.list_indexes()
809
+ logger.debug(
810
+ f"{log_prefix} Current indexes in collection: "
811
+ f"{[idx.get('name') for idx in current_indexes]}"
812
+ )
813
+ except (
814
+ OperationFailure,
815
+ ConnectionFailure,
816
+ ServerSelectionTimeoutError,
817
+ AttributeError,
818
+ TypeError,
819
+ ValueError,
820
+ ) as e:
821
+ logger.error(
822
+ f"{log_prefix} Failed to initialize IndexManager for collection "
823
+ f"'{collection_name}': {e}. "
824
+ f"This prevents all index operations for this collection. "
825
+ f"Check MongoDB connection and collection permissions.",
826
+ exc_info=True,
827
+ )
828
+ return
829
+
830
+ for index_def in index_definitions:
831
+ index_name = index_def.get("name")
832
+ index_type = index_def.get("type")
833
+ try:
834
+ if index_type == INDEX_TYPE_REGULAR:
835
+ await _handle_regular_index(
836
+ index_manager, index_def, index_name, log_prefix
837
+ )
838
+ # Wait for index to be ready after creation
839
+ import asyncio
840
+
841
+ await asyncio.sleep(0.5) # Give MongoDB time to make index visible
842
+ elif index_type == INDEX_TYPE_TTL:
843
+ await _handle_ttl_index(
844
+ index_manager, index_def, index_name, log_prefix
845
+ )
846
+ elif index_type == "partial":
847
+ await _handle_partial_index(
848
+ index_manager, index_def, index_name, log_prefix
849
+ )
850
+ elif index_type == "text":
851
+ await _handle_text_index(
852
+ index_manager, index_def, index_name, log_prefix
853
+ )
854
+ elif index_type == "geospatial":
855
+ await _handle_geospatial_index(
856
+ index_manager, index_def, index_name, log_prefix
857
+ )
858
+ elif index_type in ("vectorSearch", "search"):
859
+ await _handle_search_index(
860
+ index_manager, index_def, index_name, index_type, slug, log_prefix
861
+ )
862
+ elif index_type == "hybrid":
863
+ await _handle_hybrid_index(
864
+ index_manager, index_def, index_name, slug, log_prefix
865
+ )
866
+ else:
867
+ from ..constants import SUPPORTED_INDEX_TYPES
868
+
869
+ supported_types_str = ", ".join(f"'{t}'" for t in SUPPORTED_INDEX_TYPES)
870
+ logger.warning(
871
+ f"{log_prefix} Unknown index type '{index_type}' "
872
+ f"for index '{index_name}'. "
873
+ f"Supported types: {supported_types_str}. "
874
+ f"Skipping this index definition. "
875
+ f"Update manifest.json with a supported index type."
876
+ )
877
+ except (
878
+ OperationFailure,
879
+ ConnectionFailure,
880
+ ServerSelectionTimeoutError,
881
+ ValueError,
882
+ TypeError,
883
+ KeyError,
884
+ RuntimeError,
885
+ ) as e:
886
+ logger.error(
887
+ f"{log_prefix} Error managing index '{index_name}' "
888
+ f"(type: {index_type}): {e}. "
889
+ f"Collection: {collection_name}. "
890
+ f"This index will be skipped, but other indexes may still be "
891
+ f"created.",
892
+ exc_info=True,
893
+ )
894
+ # Re-raise to surface the error in tests
895
+ raise