mdb-engine 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. mdb_engine/README.md +144 -0
  2. mdb_engine/__init__.py +37 -0
  3. mdb_engine/auth/README.md +631 -0
  4. mdb_engine/auth/__init__.py +128 -0
  5. mdb_engine/auth/casbin_factory.py +199 -0
  6. mdb_engine/auth/casbin_models.py +46 -0
  7. mdb_engine/auth/config_defaults.py +71 -0
  8. mdb_engine/auth/config_helpers.py +213 -0
  9. mdb_engine/auth/cookie_utils.py +158 -0
  10. mdb_engine/auth/decorators.py +350 -0
  11. mdb_engine/auth/dependencies.py +747 -0
  12. mdb_engine/auth/helpers.py +64 -0
  13. mdb_engine/auth/integration.py +578 -0
  14. mdb_engine/auth/jwt.py +225 -0
  15. mdb_engine/auth/middleware.py +241 -0
  16. mdb_engine/auth/oso_factory.py +323 -0
  17. mdb_engine/auth/provider.py +570 -0
  18. mdb_engine/auth/restrictions.py +271 -0
  19. mdb_engine/auth/session_manager.py +477 -0
  20. mdb_engine/auth/token_lifecycle.py +213 -0
  21. mdb_engine/auth/token_store.py +289 -0
  22. mdb_engine/auth/users.py +1516 -0
  23. mdb_engine/auth/utils.py +614 -0
  24. mdb_engine/cli/__init__.py +13 -0
  25. mdb_engine/cli/commands/__init__.py +7 -0
  26. mdb_engine/cli/commands/generate.py +105 -0
  27. mdb_engine/cli/commands/migrate.py +83 -0
  28. mdb_engine/cli/commands/show.py +70 -0
  29. mdb_engine/cli/commands/validate.py +63 -0
  30. mdb_engine/cli/main.py +41 -0
  31. mdb_engine/cli/utils.py +92 -0
  32. mdb_engine/config.py +217 -0
  33. mdb_engine/constants.py +160 -0
  34. mdb_engine/core/README.md +542 -0
  35. mdb_engine/core/__init__.py +42 -0
  36. mdb_engine/core/app_registration.py +392 -0
  37. mdb_engine/core/connection.py +243 -0
  38. mdb_engine/core/engine.py +749 -0
  39. mdb_engine/core/index_management.py +162 -0
  40. mdb_engine/core/manifest.py +2793 -0
  41. mdb_engine/core/seeding.py +179 -0
  42. mdb_engine/core/service_initialization.py +355 -0
  43. mdb_engine/core/types.py +413 -0
  44. mdb_engine/database/README.md +522 -0
  45. mdb_engine/database/__init__.py +31 -0
  46. mdb_engine/database/abstraction.py +635 -0
  47. mdb_engine/database/connection.py +387 -0
  48. mdb_engine/database/scoped_wrapper.py +1721 -0
  49. mdb_engine/embeddings/README.md +184 -0
  50. mdb_engine/embeddings/__init__.py +62 -0
  51. mdb_engine/embeddings/dependencies.py +193 -0
  52. mdb_engine/embeddings/service.py +759 -0
  53. mdb_engine/exceptions.py +167 -0
  54. mdb_engine/indexes/README.md +651 -0
  55. mdb_engine/indexes/__init__.py +21 -0
  56. mdb_engine/indexes/helpers.py +145 -0
  57. mdb_engine/indexes/manager.py +895 -0
  58. mdb_engine/memory/README.md +451 -0
  59. mdb_engine/memory/__init__.py +30 -0
  60. mdb_engine/memory/service.py +1285 -0
  61. mdb_engine/observability/README.md +515 -0
  62. mdb_engine/observability/__init__.py +42 -0
  63. mdb_engine/observability/health.py +296 -0
  64. mdb_engine/observability/logging.py +161 -0
  65. mdb_engine/observability/metrics.py +297 -0
  66. mdb_engine/routing/README.md +462 -0
  67. mdb_engine/routing/__init__.py +73 -0
  68. mdb_engine/routing/websockets.py +813 -0
  69. mdb_engine/utils/__init__.py +7 -0
  70. mdb_engine-0.1.6.dist-info/METADATA +213 -0
  71. mdb_engine-0.1.6.dist-info/RECORD +75 -0
  72. mdb_engine-0.1.6.dist-info/WHEEL +5 -0
  73. mdb_engine-0.1.6.dist-info/entry_points.txt +2 -0
  74. mdb_engine-0.1.6.dist-info/licenses/LICENSE +661 -0
  75. mdb_engine-0.1.6.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2793 @@
1
+ """
2
+ Manifest validation and parsing system.
3
+
4
+ This module provides:
5
+ - Multi-version schema support for backward compatibility
6
+ - Schema migration functions for upgrading manifests
7
+ - Optimized validation with caching for scale
8
+ - Parallel manifest processing capabilities
9
+
10
+ This module is part of MDB_ENGINE - MongoDB Engine.
11
+
12
+ SCHEMA VERSIONING STRATEGY
13
+ ==========================
14
+
15
+ Versions:
16
+ - 1.0: Initial schema (default for manifests without version field)
17
+ - 2.0: Current schema with all features (auth.policy, auth.users, managed_indexes, etc.)
18
+
19
+ Migration Strategy:
20
+ - Automatically detects schema version from manifest
21
+ - Migrates older versions to current schema if needed
22
+ - Maintains backward compatibility
23
+ - Allows apps to specify target schema version
24
+
25
+ For Scale:
26
+ - Schema validation results are cached
27
+ - Supports parallel manifest processing
28
+ - Lazy schema loading for multiple apps
29
+ - Optimized validation paths for common cases
30
+ """
31
+
32
+ import asyncio
33
+ import hashlib
34
+ import logging
35
+ from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple
36
+
37
+ from jsonschema import SchemaError, ValidationError, validate
38
+
39
+ from ..constants import (CURRENT_SCHEMA_VERSION, DEFAULT_SCHEMA_VERSION,
40
+ MAX_TTL_SECONDS, MAX_VECTOR_DIMENSIONS,
41
+ MIN_TTL_SECONDS, MIN_VECTOR_DIMENSIONS)
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+ # Schema registry: maps version -> schema definition
46
+ SCHEMA_REGISTRY: Dict[str, Dict[str, Any]] = {}
47
+
48
+ # Validation cache: maps (manifest_hash, version) -> validation_result
49
+ _validation_cache: Dict[str, Tuple[bool, Optional[str], Optional[List[str]]]] = {}
50
+ _cache_lock = asyncio.Lock()
51
+
52
+
53
+ def _convert_tuples_to_lists(obj: Any) -> Any:
54
+ """
55
+ Recursively convert tuples to lists for JSON schema compatibility.
56
+
57
+ This function normalizes Python data structures to be JSON schema compliant.
58
+ JSON schema expects lists (arrays), but Python code often uses tuples for
59
+ immutable sequences (e.g., index keys: [("field1", 1), ("field2", -1)]).
60
+
61
+ This normalization should happen at the API boundary (e.g., in register_app)
62
+ before validation, keeping the validation layer schema-agnostic.
63
+
64
+ Args:
65
+ obj: Object to convert (dict, list, tuple, or primitive)
66
+
67
+ Returns:
68
+ Object with all tuples converted to lists (preserves structure)
69
+
70
+ Example:
71
+ >>> _convert_tuples_to_lists({"keys": [("field1", 1)]})
72
+ {"keys": [["field1", 1]]}
73
+ """
74
+ if isinstance(obj, tuple):
75
+ return list(obj)
76
+ elif isinstance(obj, dict):
77
+ return {key: _convert_tuples_to_lists(value) for key, value in obj.items()}
78
+ elif isinstance(obj, list):
79
+ return [_convert_tuples_to_lists(item) for item in obj]
80
+ else:
81
+ return obj
82
+
83
+
84
+ def _get_manifest_hash(manifest_data: Dict[str, Any]) -> str:
85
+ """Generate a hash for manifest caching."""
86
+ import json
87
+
88
+ # Normalize manifest by removing metadata fields that don't affect validation
89
+ normalized = {
90
+ k: v
91
+ for k, v in manifest_data.items()
92
+ if k not in ["_id", "_updated", "_created", "url"]
93
+ }
94
+ normalized_str = json.dumps(normalized, sort_keys=True)
95
+ return hashlib.sha256(normalized_str.encode()).hexdigest()[:16]
96
+
97
+
98
+ # JSON Schema definition for manifest.json (Version 2.0 - Current)
99
+ MANIFEST_SCHEMA_V2 = {
100
+ "type": "object",
101
+ "properties": {
102
+ "schema_version": {
103
+ "type": "string",
104
+ "pattern": "^\\d+\\.\\d+$",
105
+ "default": "2.0",
106
+ "description": (
107
+ "Schema version for this manifest (format: 'major.minor'). "
108
+ "Defaults to 2.0 if not specified."
109
+ ),
110
+ },
111
+ "slug": {
112
+ "type": "string",
113
+ "pattern": "^[a-z0-9_-]+$",
114
+ "description": "App slug (lowercase alphanumeric, underscores, hyphens)",
115
+ },
116
+ "name": {
117
+ "type": "string",
118
+ "minLength": 1,
119
+ "description": "Human-readable app name",
120
+ },
121
+ "description": {"type": "string", "description": "App description"},
122
+ "status": {
123
+ "type": "string",
124
+ "enum": ["active", "draft", "archived", "inactive"],
125
+ "default": "draft",
126
+ "description": "App status",
127
+ },
128
+ "auth_required": {
129
+ "type": "boolean",
130
+ "default": False,
131
+ "description": (
132
+ "Whether authentication is required for this app "
133
+ "(backward compatibility). If auth.policy is provided, "
134
+ "this is ignored."
135
+ ),
136
+ },
137
+ "auth": {
138
+ "type": "object",
139
+ "properties": {
140
+ "policy": {
141
+ "type": "object",
142
+ "properties": {
143
+ "required": {
144
+ "type": "boolean",
145
+ "default": True,
146
+ "description": (
147
+ "Whether authentication is required "
148
+ "(default: true). If false, allows anonymous "
149
+ "access but still checks other policies."
150
+ ),
151
+ },
152
+ "provider": {
153
+ "type": "string",
154
+ "enum": ["casbin", "oso", "custom"],
155
+ "default": "casbin",
156
+ "description": (
157
+ "Authorization provider to use. 'casbin' "
158
+ "(default) auto-creates Casbin with MongoDB "
159
+ "adapter, 'oso' uses OSO/Polar, 'custom' "
160
+ "expects manual provider setup."
161
+ ),
162
+ },
163
+ "authorization": {
164
+ "type": "object",
165
+ "properties": {
166
+ # Casbin-specific properties
167
+ "model": {
168
+ "type": "string",
169
+ "default": "rbac",
170
+ "description": (
171
+ "Casbin model type or path. Use 'rbac' "
172
+ "for default RBAC model, or provide "
173
+ "path to custom model file. Only used "
174
+ "when provider is 'casbin'."
175
+ ),
176
+ },
177
+ "policies_collection": {
178
+ "type": "string",
179
+ "pattern": "^[a-zA-Z0-9_]+$",
180
+ "default": "casbin_policies",
181
+ "description": (
182
+ "MongoDB collection name for storing "
183
+ "Casbin policies (default: "
184
+ "'casbin_policies'). Only used when "
185
+ "provider is 'casbin'."
186
+ ),
187
+ },
188
+ "link_users_roles": {
189
+ "type": "boolean",
190
+ "default": True,
191
+ "description": (
192
+ "If true, automatically assign Casbin "
193
+ "roles to app-level users when they are "
194
+ "created or updated. Only used when "
195
+ "provider is 'casbin'."
196
+ ),
197
+ },
198
+ "default_roles": {
199
+ "type": "array",
200
+ "items": {"type": "string"},
201
+ "description": (
202
+ "List of default roles to create in "
203
+ "Casbin (e.g., ['user', 'admin']). "
204
+ "These roles are created automatically "
205
+ "when the provider is initialized. "
206
+ "Only used when provider is 'casbin'."
207
+ ),
208
+ },
209
+ # OSO Cloud-specific properties
210
+ "api_key": {
211
+ "type": ["string", "null"],
212
+ "description": (
213
+ "OSO Cloud API key. If not provided, "
214
+ "reads from OSO_AUTH environment "
215
+ "variable. Only used when provider is "
216
+ "'oso'."
217
+ ),
218
+ },
219
+ "url": {
220
+ "type": ["string", "null"],
221
+ "description": (
222
+ "OSO Cloud URL. If not provided, "
223
+ "reads from OSO_URL environment "
224
+ "variable. Only used when provider is "
225
+ "'oso'."
226
+ ),
227
+ },
228
+ "initial_roles": {
229
+ "type": "array",
230
+ "items": {
231
+ "type": "object",
232
+ "properties": {
233
+ "user": {
234
+ "type": "string",
235
+ "format": "email",
236
+ },
237
+ "role": {"type": "string"},
238
+ "resource": {
239
+ "type": "string",
240
+ "default": "app",
241
+ },
242
+ },
243
+ "required": ["user", "role"],
244
+ "additionalProperties": False,
245
+ },
246
+ "description": (
247
+ "Initial role assignments to set up in "
248
+ "OSO Cloud on startup. Only used when "
249
+ "provider is 'oso'. Example: "
250
+ '[{"user": "admin@example.com", '
251
+ '"role": "admin"}]'
252
+ ),
253
+ },
254
+ "initial_policies": {
255
+ "type": "array",
256
+ "items": {
257
+ "type": "object",
258
+ "properties": {
259
+ "role": {"type": "string"},
260
+ "resource": {
261
+ "type": "string",
262
+ "default": "documents",
263
+ },
264
+ "action": {"type": "string"},
265
+ },
266
+ "required": ["role", "action"],
267
+ "additionalProperties": False,
268
+ },
269
+ "description": (
270
+ "Initial permission policies to set up "
271
+ "in OSO Cloud on startup. Only used "
272
+ "when provider is 'oso'. Example: "
273
+ '[{"role": "admin", "resource": '
274
+ '"documents", "action": "read"}]'
275
+ ),
276
+ },
277
+ },
278
+ "additionalProperties": False,
279
+ "description": (
280
+ "Authorization configuration. For Casbin "
281
+ "provider: use 'model', 'policies_collection', "
282
+ "'default_roles'. For OSO Cloud provider: use "
283
+ "'api_key' (or env var), 'url' (or env var), "
284
+ "'initial_roles', 'initial_policies'."
285
+ ),
286
+ },
287
+ "allowed_roles": {
288
+ "type": "array",
289
+ "items": {"type": "string"},
290
+ "description": (
291
+ "List of roles that can access this app "
292
+ "(e.g., ['admin', 'developer']). Users must "
293
+ "have at least one of these roles."
294
+ ),
295
+ },
296
+ "allowed_users": {
297
+ "type": "array",
298
+ "items": {"type": "string", "format": "email"},
299
+ "description": (
300
+ "List of specific user emails that can access "
301
+ "this app (whitelist). If provided, only "
302
+ "these users can access regardless of roles."
303
+ ),
304
+ },
305
+ "denied_users": {
306
+ "type": "array",
307
+ "items": {"type": "string", "format": "email"},
308
+ "description": (
309
+ "List of user emails that are explicitly "
310
+ "denied access (blacklist). Takes precedence "
311
+ "over allowed_users and allowed_roles."
312
+ ),
313
+ },
314
+ "required_permissions": {
315
+ "type": "array",
316
+ "items": {"type": "string"},
317
+ "description": (
318
+ "List of required permissions (format: "
319
+ "'resource:action', e.g., ['apps:view', "
320
+ "'apps:manage_own']). User must have all "
321
+ "listed permissions."
322
+ ),
323
+ },
324
+ "custom_resource": {
325
+ "type": "string",
326
+ "pattern": "^[a-z0-9_:]+$",
327
+ "description": (
328
+ "Custom Casbin resource name (e.g., "
329
+ "'app:storyweaver'). If not provided, "
330
+ "defaults to 'app:{slug}'."
331
+ ),
332
+ },
333
+ "custom_actions": {
334
+ "type": "array",
335
+ "items": {
336
+ "type": "string",
337
+ "enum": ["access", "read", "write", "admin"],
338
+ },
339
+ "description": (
340
+ "Custom actions to check (defaults to "
341
+ "['access']). Used with custom_resource for "
342
+ "fine-grained permission checks."
343
+ ),
344
+ },
345
+ "allow_anonymous": {
346
+ "type": "boolean",
347
+ "default": False,
348
+ "description": (
349
+ "If true, allows anonymous (unauthenticated) "
350
+ "access. Only applies if required is false or "
351
+ "not specified."
352
+ ),
353
+ },
354
+ "owner_can_access": {
355
+ "type": "boolean",
356
+ "default": True,
357
+ "description": (
358
+ "If true (default), the app owner "
359
+ "(developer_id) can always access the app."
360
+ ),
361
+ },
362
+ },
363
+ "additionalProperties": False,
364
+ "description": (
365
+ "Intelligent authorization policy for app-level access "
366
+ "control. Supports role-based, user-based, and "
367
+ "permission-based access. Takes precedence over "
368
+ "auth_required."
369
+ ),
370
+ },
371
+ "users": {
372
+ "type": "object",
373
+ "properties": {
374
+ "enabled": {
375
+ "type": "boolean",
376
+ "default": False,
377
+ "description": (
378
+ "Enable app-level user management. When "
379
+ "enabled, app manages its own users separate "
380
+ "from platform users."
381
+ ),
382
+ },
383
+ "strategy": {
384
+ "type": "string",
385
+ "enum": [
386
+ "app_users",
387
+ "anonymous_session",
388
+ ],
389
+ "default": "app_users",
390
+ "description": (
391
+ "User management strategy. 'app_users' = "
392
+ "app-specific user accounts, "
393
+ "'anonymous_session' = session-based anonymous "
394
+ "users."
395
+ ),
396
+ },
397
+ "collection_name": {
398
+ "type": "string",
399
+ "pattern": "^[a-zA-Z0-9_]+$",
400
+ "default": "users",
401
+ "description": (
402
+ "Collection name for app-specific users "
403
+ "(default: 'users'). Will be prefixed with "
404
+ "app slug."
405
+ ),
406
+ },
407
+ "session_cookie_name": {
408
+ "type": "string",
409
+ "pattern": "^[a-z0-9_-]+$",
410
+ "default": "app_session",
411
+ "description": (
412
+ "Cookie name for app-specific session "
413
+ "(default: 'app_session'). Will be suffixed "
414
+ "with app slug."
415
+ ),
416
+ },
417
+ "session_ttl_seconds": {
418
+ "type": "integer",
419
+ "minimum": 60,
420
+ "default": 86400,
421
+ "description": (
422
+ "Session TTL in seconds (default: 86400 = "
423
+ "24 hours). Used for app-specific sessions."
424
+ ),
425
+ },
426
+ "allow_registration": {
427
+ "type": "boolean",
428
+ "default": False,
429
+ "description": (
430
+ "Allow users to self-register in the app "
431
+ "(when strategy is 'app_users')."
432
+ ),
433
+ },
434
+ "link_platform_users": {
435
+ "type": "boolean",
436
+ "default": True,
437
+ "description": (
438
+ "Link app users to platform users. Allows platform users "
439
+ "to have app-specific profiles."
440
+ ),
441
+ },
442
+ "anonymous_user_prefix": {
443
+ "type": "string",
444
+ "default": "guest",
445
+ "description": (
446
+ "Prefix for anonymous user IDs (default: "
447
+ "'guest'). Used for anonymous_session strategy."
448
+ ),
449
+ },
450
+ "user_id_field": {
451
+ "type": "string",
452
+ "default": "app_user_id",
453
+ "description": (
454
+ "Field name in platform user JWT "
455
+ "for storing app user ID "
456
+ "(default: 'app_user_id'). "
457
+ "Used for linking."
458
+ ),
459
+ },
460
+ "demo_users": {
461
+ "type": "array",
462
+ "items": {
463
+ "type": "object",
464
+ "properties": {
465
+ "email": {
466
+ "type": "string",
467
+ "format": "email",
468
+ "description": (
469
+ "Email address for demo user "
470
+ "(defaults to platform demo email "
471
+ "if not specified)"
472
+ ),
473
+ },
474
+ "password": {
475
+ "type": "string",
476
+ "description": (
477
+ "Password for demo user (defaults "
478
+ "to platform demo password if not "
479
+ "specified, or plain text for demo "
480
+ "purposes)"
481
+ ),
482
+ },
483
+ "role": {
484
+ "type": "string",
485
+ "default": "user",
486
+ "description": (
487
+ "Role for demo user in app "
488
+ "(default: 'user')"
489
+ ),
490
+ },
491
+ "auto_create": {
492
+ "type": "boolean",
493
+ "default": True,
494
+ "description": (
495
+ "Automatically create this demo "
496
+ "user if it doesn't exist "
497
+ "(default: true)"
498
+ ),
499
+ },
500
+ "link_to_platform": {
501
+ "type": "boolean",
502
+ "default": False,
503
+ "description": (
504
+ "Link this demo user to platform "
505
+ "demo user (if platform demo "
506
+ "exists, default: false)"
507
+ ),
508
+ },
509
+ "extra_data": {
510
+ "type": "object",
511
+ "description": (
512
+ "Additional data to store with "
513
+ "demo user (e.g., store_id, "
514
+ "preferences, etc.)"
515
+ ),
516
+ },
517
+ },
518
+ "required": [],
519
+ },
520
+ "description": (
521
+ "Array of demo users to automatically "
522
+ "create/link for this app. If empty, "
523
+ "automatically uses platform demo user if "
524
+ "available."
525
+ ),
526
+ },
527
+ "auto_link_platform_demo": {
528
+ "type": "boolean",
529
+ "default": True,
530
+ "description": (
531
+ "Automatically link platform demo user to "
532
+ "experiment demo user if platform demo exists "
533
+ "(default: true). Works in combination with "
534
+ "link_platform_users and demo_users."
535
+ ),
536
+ },
537
+ "demo_user_seed_strategy": {
538
+ "type": "string",
539
+ "enum": ["auto", "manual", "disabled"],
540
+ "default": "auto",
541
+ "description": (
542
+ "Strategy for demo user seeding: 'auto' = "
543
+ "automatically create/link on first access or "
544
+ "actor init, 'manual' = require explicit "
545
+ "creation via API, 'disabled' = no automatic "
546
+ "demo user handling (default: 'auto')"
547
+ ),
548
+ },
549
+ "allow_demo_access": {
550
+ "type": "boolean",
551
+ "default": False,
552
+ "description": (
553
+ "Enable automatic demo user access. When "
554
+ "enabled, unauthenticated users are "
555
+ "automatically logged in as demo user, "
556
+ "providing seamless demo experience. "
557
+ "Requires demo users to be configured via "
558
+ "demo_users or auto_link_platform_demo. "
559
+ "(default: false)"
560
+ ),
561
+ },
562
+ },
563
+ "additionalProperties": False,
564
+ "description": (
565
+ "App-level user management configuration. Enables apps "
566
+ "to have their own user accounts and sessions "
567
+ "independent of platform authentication."
568
+ ),
569
+ },
570
+ },
571
+ "additionalProperties": False,
572
+ "description": (
573
+ "Authentication and authorization configuration. Combines "
574
+ "authorization policy (who can access) and user management "
575
+ "(user accounts and sessions)."
576
+ ),
577
+ },
578
+ "token_management": {
579
+ "type": "object",
580
+ "properties": {
581
+ "enabled": {
582
+ "type": "boolean",
583
+ "default": True,
584
+ "description": (
585
+ "Enable enhanced token management features "
586
+ "(refresh tokens, blacklist, sessions). Default: true."
587
+ ),
588
+ },
589
+ "access_token_ttl": {
590
+ "type": "integer",
591
+ "minimum": 60,
592
+ "default": 900,
593
+ "description": (
594
+ "Access token TTL in seconds " "(default: 900 = 15 minutes)."
595
+ ),
596
+ },
597
+ "refresh_token_ttl": {
598
+ "type": "integer",
599
+ "minimum": 3600,
600
+ "default": 604800,
601
+ "description": (
602
+ "Refresh token TTL in seconds " "(default: 604800 = 7 days)."
603
+ ),
604
+ },
605
+ "token_rotation": {
606
+ "type": "boolean",
607
+ "default": True,
608
+ "description": (
609
+ "Enable refresh token rotation "
610
+ "(new refresh token on each use). Default: true."
611
+ ),
612
+ },
613
+ "max_sessions_per_user": {
614
+ "type": "integer",
615
+ "minimum": 1,
616
+ "default": 10,
617
+ "description": (
618
+ "Maximum number of concurrent sessions per user "
619
+ "(default: 10)."
620
+ ),
621
+ },
622
+ "session_inactivity_timeout": {
623
+ "type": "integer",
624
+ "minimum": 60,
625
+ "default": 1800,
626
+ "description": (
627
+ "Session inactivity timeout in seconds before "
628
+ "automatic cleanup (default: 1800 = 30 minutes)."
629
+ ),
630
+ },
631
+ "security": {
632
+ "type": "object",
633
+ "properties": {
634
+ "require_https": {
635
+ "type": "boolean",
636
+ "default": False,
637
+ "description": (
638
+ "Require HTTPS in production "
639
+ "(default: false, auto-detected)."
640
+ ),
641
+ },
642
+ "cookie_secure": {
643
+ "type": "string",
644
+ "enum": ["auto", "true", "false"],
645
+ "default": "auto",
646
+ "description": (
647
+ "Secure cookie flag: 'auto' = detect from "
648
+ "request, 'true' = always secure, "
649
+ "'false' = never secure (default: 'auto')."
650
+ ),
651
+ },
652
+ "cookie_samesite": {
653
+ "type": "string",
654
+ "enum": ["strict", "lax", "none"],
655
+ "default": "lax",
656
+ "description": "SameSite cookie attribute (default: 'lax').",
657
+ },
658
+ "cookie_httponly": {
659
+ "type": "boolean",
660
+ "default": True,
661
+ "description": ("HttpOnly cookie flag (default: true)."),
662
+ },
663
+ "csrf_protection": {
664
+ "type": "boolean",
665
+ "default": True,
666
+ "description": ("Enable CSRF protection (default: true)."),
667
+ },
668
+ "rate_limiting": {
669
+ "type": "object",
670
+ "properties": {
671
+ "login": {
672
+ "type": "object",
673
+ "properties": {
674
+ "max_attempts": {
675
+ "type": "integer",
676
+ "minimum": 1,
677
+ "default": 5,
678
+ },
679
+ "window_seconds": {
680
+ "type": "integer",
681
+ "minimum": 1,
682
+ "default": 300,
683
+ },
684
+ },
685
+ "additionalProperties": False,
686
+ },
687
+ "register": {
688
+ "type": "object",
689
+ "properties": {
690
+ "max_attempts": {
691
+ "type": "integer",
692
+ "minimum": 1,
693
+ "default": 3,
694
+ },
695
+ "window_seconds": {
696
+ "type": "integer",
697
+ "minimum": 1,
698
+ "default": 600,
699
+ },
700
+ },
701
+ "additionalProperties": False,
702
+ },
703
+ "refresh": {
704
+ "type": "object",
705
+ "properties": {
706
+ "max_attempts": {
707
+ "type": "integer",
708
+ "minimum": 1,
709
+ "default": 10,
710
+ },
711
+ "window_seconds": {
712
+ "type": "integer",
713
+ "minimum": 1,
714
+ "default": 60,
715
+ },
716
+ },
717
+ "additionalProperties": False,
718
+ },
719
+ },
720
+ "additionalProperties": False,
721
+ "description": (
722
+ "Rate limiting configuration per endpoint type."
723
+ ),
724
+ },
725
+ "password_policy": {
726
+ "type": "object",
727
+ "properties": {
728
+ "allow_plain_text": {
729
+ "type": "boolean",
730
+ "default": False,
731
+ "description": (
732
+ "Allow plain text passwords "
733
+ "(NOT recommended, default: false)"
734
+ ),
735
+ },
736
+ "min_length": {
737
+ "type": "integer",
738
+ "minimum": 1,
739
+ "default": 8,
740
+ "description": "Minimum password length (default: 8)",
741
+ },
742
+ "require_uppercase": {
743
+ "type": "boolean",
744
+ "default": True,
745
+ "description": "Require uppercase letters (default: true)",
746
+ },
747
+ "require_lowercase": {
748
+ "type": "boolean",
749
+ "default": True,
750
+ "description": (
751
+ "Require lowercase letters " "(default: true)"
752
+ ),
753
+ },
754
+ "require_numbers": {
755
+ "type": "boolean",
756
+ "default": True,
757
+ "description": ("Require numbers (default: true)"),
758
+ },
759
+ "require_special": {
760
+ "type": "boolean",
761
+ "default": False,
762
+ "description": (
763
+ "Require special characters " "(default: false)"
764
+ ),
765
+ },
766
+ },
767
+ "additionalProperties": False,
768
+ "description": ("Password policy configuration"),
769
+ },
770
+ "session_fingerprinting": {
771
+ "type": "object",
772
+ "properties": {
773
+ "enabled": {
774
+ "type": "boolean",
775
+ "default": True,
776
+ "description": (
777
+ "Enable session fingerprinting "
778
+ "(default: true)"
779
+ ),
780
+ },
781
+ "validate_on_login": {
782
+ "type": "boolean",
783
+ "default": True,
784
+ "description": (
785
+ "Validate fingerprint on login "
786
+ "(default: true)"
787
+ ),
788
+ },
789
+ "validate_on_refresh": {
790
+ "type": "boolean",
791
+ "default": True,
792
+ "description": (
793
+ "Validate fingerprint on token refresh "
794
+ "(default: true)"
795
+ ),
796
+ },
797
+ "validate_on_request": {
798
+ "type": "boolean",
799
+ "default": False,
800
+ "description": (
801
+ "Validate fingerprint on every request "
802
+ "(default: false, may impact performance)"
803
+ ),
804
+ },
805
+ "strict_mode": {
806
+ "type": "boolean",
807
+ "default": False,
808
+ "description": (
809
+ "Strict mode: reject requests if "
810
+ "fingerprint doesn't match "
811
+ "(default: false)"
812
+ ),
813
+ },
814
+ },
815
+ "additionalProperties": False,
816
+ "description": "Session fingerprinting configuration for security",
817
+ },
818
+ "account_lockout": {
819
+ "type": "object",
820
+ "properties": {
821
+ "enabled": {
822
+ "type": "boolean",
823
+ "default": True,
824
+ "description": "Enable account lockout (default: true)",
825
+ },
826
+ "max_failed_attempts": {
827
+ "type": "integer",
828
+ "minimum": 1,
829
+ "default": 5,
830
+ "description": (
831
+ "Maximum failed login attempts before "
832
+ "lockout (default: 5)"
833
+ ),
834
+ },
835
+ "lockout_duration_seconds": {
836
+ "type": "integer",
837
+ "minimum": 1,
838
+ "default": 900,
839
+ "description": (
840
+ "Lockout duration in seconds "
841
+ "(default: 900 = 15 minutes)"
842
+ ),
843
+ },
844
+ "reset_on_success": {
845
+ "type": "boolean",
846
+ "default": True,
847
+ "description": (
848
+ "Reset failed attempts counter on "
849
+ "successful login (default: true)"
850
+ ),
851
+ },
852
+ },
853
+ "additionalProperties": False,
854
+ "description": "Account lockout configuration",
855
+ },
856
+ "ip_validation": {
857
+ "type": "object",
858
+ "properties": {
859
+ "enabled": {
860
+ "type": "boolean",
861
+ "default": False,
862
+ "description": (
863
+ "Enable IP address validation "
864
+ "(default: false)"
865
+ ),
866
+ },
867
+ "strict": {
868
+ "type": "boolean",
869
+ "default": False,
870
+ "description": (
871
+ "Strict mode: reject requests if IP "
872
+ "changes (default: false)"
873
+ ),
874
+ },
875
+ "allow_ip_change": {
876
+ "type": "boolean",
877
+ "default": True,
878
+ "description": (
879
+ "Allow IP address changes during session "
880
+ "(default: true)"
881
+ ),
882
+ },
883
+ },
884
+ "additionalProperties": False,
885
+ "description": "IP address validation configuration",
886
+ },
887
+ "token_fingerprinting": {
888
+ "type": "object",
889
+ "properties": {
890
+ "enabled": {
891
+ "type": "boolean",
892
+ "default": True,
893
+ "description": (
894
+ "Enable token fingerprinting " "(default: true)"
895
+ ),
896
+ },
897
+ "bind_to_device": {
898
+ "type": "boolean",
899
+ "default": True,
900
+ "description": (
901
+ "Bind tokens to device ID " "(default: true)"
902
+ ),
903
+ },
904
+ },
905
+ "additionalProperties": False,
906
+ "description": ("Token fingerprinting configuration"),
907
+ },
908
+ },
909
+ "additionalProperties": False,
910
+ "description": ("Security settings for token management."),
911
+ },
912
+ "auto_setup": {
913
+ "type": "boolean",
914
+ "default": True,
915
+ "description": (
916
+ "Automatically set up token management on app startup "
917
+ "(default: true)."
918
+ ),
919
+ },
920
+ },
921
+ "additionalProperties": False,
922
+ "description": "Token management configuration for enhanced authentication features.",
923
+ },
924
+ "data_scope": {
925
+ "type": "array",
926
+ "items": {"type": "string"},
927
+ "minItems": 1,
928
+ "default": ["self"],
929
+ "description": "List of app slugs whose data this app can access",
930
+ },
931
+ "pip_deps": {
932
+ "type": "array",
933
+ "items": {"type": "string"},
934
+ "description": "List of pip dependencies for isolated environment",
935
+ },
936
+ "managed_indexes": {
937
+ "type": "object",
938
+ "patternProperties": {
939
+ "^[a-zA-Z0-9_]+$": {
940
+ "type": "array",
941
+ "items": {"$ref": "#/definitions/indexDefinition"},
942
+ "minItems": 1,
943
+ }
944
+ },
945
+ "description": "Collection name -> list of index definitions",
946
+ },
947
+ "collection_settings": {
948
+ "type": "object",
949
+ "patternProperties": {
950
+ "^[a-zA-Z0-9_]+$": {"$ref": "#/definitions/collectionSettings"}
951
+ },
952
+ "description": "Collection name -> collection settings",
953
+ },
954
+ "websockets": {
955
+ "type": "object",
956
+ "patternProperties": {
957
+ "^[a-zA-Z0-9_-]+$": {
958
+ "type": "object",
959
+ "properties": {
960
+ "path": {
961
+ "type": "string",
962
+ "pattern": "^/[a-zA-Z0-9_/-]+$",
963
+ "description": (
964
+ "WebSocket path (e.g., '/ws', '/events', "
965
+ "'/realtime'). Must start with '/'. "
966
+ "Routes are automatically registered."
967
+ ),
968
+ },
969
+ "auth": {
970
+ "type": "object",
971
+ "properties": {
972
+ "required": {
973
+ "type": "boolean",
974
+ "default": True,
975
+ "description": (
976
+ "Whether authentication is required "
977
+ "(default: true). Uses app's auth.policy "
978
+ "if not specified."
979
+ ),
980
+ },
981
+ "allow_anonymous": {
982
+ "type": "boolean",
983
+ "default": False,
984
+ "description": (
985
+ "Allow anonymous connections even if "
986
+ "auth is required (default: false)"
987
+ ),
988
+ },
989
+ },
990
+ "additionalProperties": False,
991
+ "description": (
992
+ "Authentication configuration. If not specified, "
993
+ "uses app's auth.policy settings."
994
+ ),
995
+ },
996
+ "description": {
997
+ "type": "string",
998
+ "description": (
999
+ "Description of what this WebSocket endpoint "
1000
+ "is used for"
1001
+ ),
1002
+ },
1003
+ "ping_interval": {
1004
+ "type": "integer",
1005
+ "minimum": 5,
1006
+ "maximum": 300,
1007
+ "default": 30,
1008
+ "description": (
1009
+ "Ping interval in seconds to keep connection "
1010
+ "alive (default: 30, min: 5, max: 300)"
1011
+ ),
1012
+ },
1013
+ },
1014
+ "required": ["path"],
1015
+ "additionalProperties": False,
1016
+ "description": (
1017
+ "WebSocket endpoint configuration. Each endpoint is "
1018
+ "automatically isolated to this app. Only 'path' is "
1019
+ "required - all other settings have sensible defaults."
1020
+ ),
1021
+ }
1022
+ },
1023
+ "description": (
1024
+ "WebSocket endpoints configuration. Super simple setup - "
1025
+ "just specify the path! Each endpoint is automatically "
1026
+ "scoped and isolated to this app. Key is the endpoint name "
1027
+ "(e.g., 'realtime', 'events'), value contains path and "
1028
+ "optional settings. Routes are automatically registered with "
1029
+ "FastAPI during app registration."
1030
+ ),
1031
+ },
1032
+ "embedding_config": {
1033
+ "type": "object",
1034
+ "properties": {
1035
+ "enabled": {
1036
+ "type": "boolean",
1037
+ "default": False,
1038
+ "description": (
1039
+ "Enable semantic text splitting and embedding service. "
1040
+ "When enabled, EmbeddingService will be available for "
1041
+ "chunking text and generating embeddings."
1042
+ ),
1043
+ },
1044
+ "max_tokens_per_chunk": {
1045
+ "type": "integer",
1046
+ "minimum": 100,
1047
+ "maximum": 10000,
1048
+ "default": 1000,
1049
+ "description": (
1050
+ "Maximum tokens per chunk when splitting text. The "
1051
+ "semantic-text-splitter ensures chunks never exceed "
1052
+ "this limit while preserving semantic boundaries."
1053
+ ),
1054
+ },
1055
+ "tokenizer_model": {
1056
+ "type": "string",
1057
+ "default": "gpt-3.5-turbo",
1058
+ "description": (
1059
+ "Optional: Tokenizer model name for counting tokens "
1060
+ "during chunking (e.g., 'gpt-3.5-turbo', 'gpt-4', "
1061
+ "'gpt-4o'). Must be a valid OpenAI model name. "
1062
+ "Defaults to 'gpt-3.5-turbo' (uses cl100k_base "
1063
+ "encoding internally, which works for GPT-3.5, GPT-4, "
1064
+ "and most models). This is ONLY for token counting, "
1065
+ "NOT for embeddings. You typically don't need to set "
1066
+ "this - the platform default works for most cases."
1067
+ ),
1068
+ },
1069
+ "default_embedding_model": {
1070
+ "type": "string",
1071
+ "default": "text-embedding-3-small",
1072
+ "description": (
1073
+ "Default embedding model for chunk embeddings "
1074
+ "(e.g., 'text-embedding-3-small', "
1075
+ "'text-embedding-ada-002'). Examples should implement "
1076
+ "their own embedding clients."
1077
+ ),
1078
+ },
1079
+ },
1080
+ "additionalProperties": False,
1081
+ "description": (
1082
+ "Semantic text splitting and embedding configuration. "
1083
+ "Enables intelligent chunking with Rust-based "
1084
+ "semantic-text-splitter. Examples should implement their own "
1085
+ "embedding clients. Perfect for RAG (Retrieval Augmented "
1086
+ "Generation) applications."
1087
+ ),
1088
+ },
1089
+ "memory_config": {
1090
+ "type": "object",
1091
+ "properties": {
1092
+ "enabled": {
1093
+ "type": "boolean",
1094
+ "default": False,
1095
+ "description": (
1096
+ "Enable Mem0 memory service for this app. When "
1097
+ "enabled, Mem0MemoryService will be initialized and "
1098
+ "available for intelligent memory management using "
1099
+ "MongoDB as the vector store. mem0 handles embeddings "
1100
+ "and LLM via environment variables (.env)."
1101
+ ),
1102
+ },
1103
+ "collection_name": {
1104
+ "type": "string",
1105
+ "pattern": "^[a-zA-Z0-9_]+$",
1106
+ "description": (
1107
+ "MongoDB collection name for storing memories "
1108
+ "(defaults to '{app_slug}_memories'). Will be prefixed "
1109
+ "with app slug if not already prefixed."
1110
+ ),
1111
+ },
1112
+ "embedding_model_dims": {
1113
+ "type": "integer",
1114
+ "minimum": 128,
1115
+ "maximum": 4096,
1116
+ "default": 1536,
1117
+ "description": (
1118
+ "Dimensions of the embedding vectors (OPTIONAL - "
1119
+ "auto-detected by embedding a test string). Only "
1120
+ "specify if you need to override auto-detection. "
1121
+ "Default: 1536. The system will automatically detect "
1122
+ "the correct dimensions from your embedding model."
1123
+ ),
1124
+ },
1125
+ "enable_graph": {
1126
+ "type": "boolean",
1127
+ "default": False,
1128
+ "description": (
1129
+ "Enable knowledge graph construction for entity "
1130
+ "relationships. When enabled, Mem0 will build a graph "
1131
+ "of connected entities from memories."
1132
+ ),
1133
+ },
1134
+ "infer": {
1135
+ "type": "boolean",
1136
+ "default": True,
1137
+ "description": (
1138
+ "Whether to infer memories from conversations "
1139
+ "(default: true). If false, stores messages as-is "
1140
+ "without inference. Requires LLM configured via "
1141
+ "environment variables if true."
1142
+ ),
1143
+ },
1144
+ "embedding_model": {
1145
+ "type": "string",
1146
+ "description": (
1147
+ "Embedding model name (e.g., 'text-embedding-3-small'). "
1148
+ "If not provided, mem0 will use environment variables "
1149
+ "or defaults."
1150
+ ),
1151
+ },
1152
+ "chat_model": {
1153
+ "type": "string",
1154
+ "description": (
1155
+ "Chat model name for inference (e.g., 'gpt-4o'). "
1156
+ "If not provided, mem0 will use environment variables "
1157
+ "or defaults."
1158
+ ),
1159
+ },
1160
+ "temperature": {
1161
+ "type": "number",
1162
+ "minimum": 0.0,
1163
+ "maximum": 2.0,
1164
+ "default": 0.0,
1165
+ "description": (
1166
+ "Temperature for LLM inference "
1167
+ "(0.0 = deterministic, 2.0 = creative). "
1168
+ "Only used if infer=true."
1169
+ ),
1170
+ },
1171
+ "async_mode": {
1172
+ "type": "boolean",
1173
+ "default": True,
1174
+ "description": (
1175
+ "Whether to process memories asynchronously "
1176
+ "(default: true). Enables better performance for "
1177
+ "memory ingestion."
1178
+ ),
1179
+ },
1180
+ },
1181
+ "additionalProperties": False,
1182
+ "description": (
1183
+ "Mem0 memory service configuration. Enables intelligent memory "
1184
+ "management that automatically extracts, stores, and retrieves "
1185
+ "user memories. Uses MongoDB as the vector store (native "
1186
+ "integration with mdb-engine). mem0 handles embeddings and LLM "
1187
+ "via environment variables (.env). Configure "
1188
+ "AZURE_OPENAI_API_KEY/AZURE_OPENAI_ENDPOINT or OPENAI_API_KEY "
1189
+ "in your .env file."
1190
+ ),
1191
+ },
1192
+ "cors": {
1193
+ "type": "object",
1194
+ "properties": {
1195
+ "enabled": {
1196
+ "type": "boolean",
1197
+ "default": False,
1198
+ "description": "Enable CORS for this app (default: false)",
1199
+ },
1200
+ "allow_origins": {
1201
+ "type": "array",
1202
+ "items": {"type": "string"},
1203
+ "default": ["*"],
1204
+ "description": (
1205
+ "List of allowed origins (use ['*'] for all origins, "
1206
+ "not recommended for production)"
1207
+ ),
1208
+ },
1209
+ "allow_credentials": {
1210
+ "type": "boolean",
1211
+ "default": False,
1212
+ "description": (
1213
+ "Allow credentials (cookies, authorization headers) "
1214
+ "in CORS requests"
1215
+ ),
1216
+ },
1217
+ "allow_methods": {
1218
+ "type": "array",
1219
+ "items": {
1220
+ "type": "string",
1221
+ "enum": [
1222
+ "GET",
1223
+ "POST",
1224
+ "PUT",
1225
+ "DELETE",
1226
+ "PATCH",
1227
+ "OPTIONS",
1228
+ "HEAD",
1229
+ "*",
1230
+ ],
1231
+ },
1232
+ "default": ["GET", "POST", "PUT", "DELETE", "PATCH"],
1233
+ "description": (
1234
+ "List of allowed HTTP methods. Use ['*'] to allow all "
1235
+ "methods (not recommended for production)"
1236
+ ),
1237
+ },
1238
+ "allow_headers": {
1239
+ "type": "array",
1240
+ "items": {"type": "string"},
1241
+ "default": ["*"],
1242
+ "description": "List of allowed headers (use ['*'] for all headers)",
1243
+ },
1244
+ "expose_headers": {
1245
+ "type": "array",
1246
+ "items": {"type": "string"},
1247
+ "description": "List of headers to expose to the client",
1248
+ },
1249
+ "max_age": {
1250
+ "type": "integer",
1251
+ "minimum": 0,
1252
+ "default": 3600,
1253
+ "description": "Max age for preflight requests in seconds (default: 3600)",
1254
+ },
1255
+ },
1256
+ "additionalProperties": False,
1257
+ "description": "CORS (Cross-Origin Resource Sharing) configuration for web apps",
1258
+ },
1259
+ "observability": {
1260
+ "type": "object",
1261
+ "properties": {
1262
+ "health_checks": {
1263
+ "type": "object",
1264
+ "properties": {
1265
+ "enabled": {
1266
+ "type": "boolean",
1267
+ "default": True,
1268
+ "description": "Enable health check endpoint (default: true)",
1269
+ },
1270
+ "endpoint": {
1271
+ "type": "string",
1272
+ "pattern": "^/[a-zA-Z0-9_/-]*$",
1273
+ "default": "/health",
1274
+ "description": "Health check endpoint path (default: '/health')",
1275
+ },
1276
+ "interval_seconds": {
1277
+ "type": "integer",
1278
+ "minimum": 5,
1279
+ "default": 30,
1280
+ "description": "Health check interval in seconds (default: 30)",
1281
+ },
1282
+ },
1283
+ "additionalProperties": False,
1284
+ "description": "Health check configuration",
1285
+ },
1286
+ "metrics": {
1287
+ "type": "object",
1288
+ "properties": {
1289
+ "enabled": {
1290
+ "type": "boolean",
1291
+ "default": True,
1292
+ "description": "Enable metrics collection (default: true)",
1293
+ },
1294
+ "collect_operation_metrics": {
1295
+ "type": "boolean",
1296
+ "default": True,
1297
+ "description": (
1298
+ "Collect operation-level metrics "
1299
+ "(duration, errors, etc.)"
1300
+ ),
1301
+ },
1302
+ "collect_performance_metrics": {
1303
+ "type": "boolean",
1304
+ "default": True,
1305
+ "description": "Collect performance metrics (memory, CPU, etc.)",
1306
+ },
1307
+ "custom_metrics": {
1308
+ "type": "array",
1309
+ "items": {"type": "string"},
1310
+ "description": "List of custom metric names to track",
1311
+ },
1312
+ },
1313
+ "additionalProperties": False,
1314
+ "description": "Metrics collection configuration",
1315
+ },
1316
+ "logging": {
1317
+ "type": "object",
1318
+ "properties": {
1319
+ "level": {
1320
+ "type": "string",
1321
+ "enum": ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
1322
+ "default": "INFO",
1323
+ "description": "Logging level (default: 'INFO')",
1324
+ },
1325
+ "format": {
1326
+ "type": "string",
1327
+ "enum": ["json", "text"],
1328
+ "default": "json",
1329
+ "description": "Log format (default: 'json')",
1330
+ },
1331
+ "include_request_id": {
1332
+ "type": "boolean",
1333
+ "default": True,
1334
+ "description": "Include request ID in logs (default: true)",
1335
+ },
1336
+ "log_sensitive_data": {
1337
+ "type": "boolean",
1338
+ "default": False,
1339
+ "description": (
1340
+ "Log sensitive data (passwords, tokens, etc.) - "
1341
+ "NOT recommended for production (default: false)"
1342
+ ),
1343
+ },
1344
+ },
1345
+ "additionalProperties": False,
1346
+ "description": "Logging configuration",
1347
+ },
1348
+ },
1349
+ "additionalProperties": False,
1350
+ "description": "Observability configuration (health checks, metrics, logging)",
1351
+ },
1352
+ "initial_data": {
1353
+ "type": "object",
1354
+ "patternProperties": {
1355
+ "^[a-zA-Z0-9_]+$": {
1356
+ "type": "array",
1357
+ "items": {"type": "object"},
1358
+ "description": "Collection name -> array of documents to seed",
1359
+ }
1360
+ },
1361
+ "description": (
1362
+ "Initial data to seed into collections. Only seeds if "
1363
+ "collection is empty (idempotent). Each key is a collection "
1364
+ "name, value is an array of documents to insert."
1365
+ ),
1366
+ },
1367
+ "developer_id": {
1368
+ "type": "string",
1369
+ "format": "email",
1370
+ "description": "Email of the developer who owns this app",
1371
+ },
1372
+ },
1373
+ "required": ["slug", "name"],
1374
+ "definitions": {
1375
+ "indexDefinition": {
1376
+ "type": "object",
1377
+ "properties": {
1378
+ "name": {
1379
+ "type": "string",
1380
+ "pattern": "^[a-zA-Z0-9_]+$",
1381
+ "minLength": 1,
1382
+ "description": "Base index name (will be prefixed with slug)",
1383
+ },
1384
+ "type": {
1385
+ "type": "string",
1386
+ "enum": [
1387
+ "regular",
1388
+ "vectorSearch",
1389
+ "search",
1390
+ "text",
1391
+ "geospatial",
1392
+ "ttl",
1393
+ "partial",
1394
+ "hybrid",
1395
+ ],
1396
+ "description": (
1397
+ "Index type. 'hybrid' creates both vector and text "
1398
+ "indexes for hybrid search with $rankFusion."
1399
+ ),
1400
+ },
1401
+ "keys": {
1402
+ "oneOf": [
1403
+ {
1404
+ "type": "object",
1405
+ "patternProperties": {
1406
+ "^[a-zA-Z0-9_.]+$": {
1407
+ "oneOf": [
1408
+ {"type": "integer", "enum": [1, -1]},
1409
+ {
1410
+ "type": "string",
1411
+ "enum": [
1412
+ "text",
1413
+ "2dsphere",
1414
+ "2d",
1415
+ "geoHaystack",
1416
+ "hashed",
1417
+ ],
1418
+ },
1419
+ ]
1420
+ }
1421
+ },
1422
+ },
1423
+ {
1424
+ "type": "array",
1425
+ "items": {
1426
+ "type": "array",
1427
+ "minItems": 2,
1428
+ "maxItems": 2,
1429
+ "prefixItems": [
1430
+ {"type": "string"},
1431
+ {
1432
+ "oneOf": [
1433
+ {"type": "integer", "enum": [1, -1]},
1434
+ {
1435
+ "type": "string",
1436
+ "enum": [
1437
+ "text",
1438
+ "2dsphere",
1439
+ "2d",
1440
+ "geoHaystack",
1441
+ "hashed",
1442
+ ],
1443
+ },
1444
+ ]
1445
+ },
1446
+ ],
1447
+ "items": False,
1448
+ },
1449
+ },
1450
+ ],
1451
+ "description": (
1452
+ "Index keys (required for regular, text, geospatial, "
1453
+ "ttl, partial indexes)"
1454
+ ),
1455
+ },
1456
+ "definition": {
1457
+ "type": "object",
1458
+ "description": (
1459
+ "Index definition (required for vectorSearch and "
1460
+ "search indexes)"
1461
+ ),
1462
+ },
1463
+ "hybrid": {
1464
+ "type": "object",
1465
+ "properties": {
1466
+ "vector_index": {
1467
+ "type": "object",
1468
+ "properties": {
1469
+ "name": {
1470
+ "type": "string",
1471
+ "pattern": "^[a-zA-Z0-9_]+$",
1472
+ "description": (
1473
+ "Name for the vector index "
1474
+ "(defaults to '{name}_vector')"
1475
+ ),
1476
+ },
1477
+ "definition": {
1478
+ "type": "object",
1479
+ "description": (
1480
+ "Vector index definition with "
1481
+ "mappings.fields containing knnVector "
1482
+ "fields"
1483
+ ),
1484
+ },
1485
+ },
1486
+ "required": ["definition"],
1487
+ "additionalProperties": False,
1488
+ },
1489
+ "text_index": {
1490
+ "type": "object",
1491
+ "properties": {
1492
+ "name": {
1493
+ "type": "string",
1494
+ "pattern": "^[a-zA-Z0-9_]+$",
1495
+ "description": (
1496
+ "Name for the text index "
1497
+ "(defaults to '{name}_text')"
1498
+ ),
1499
+ },
1500
+ "definition": {
1501
+ "type": "object",
1502
+ "description": (
1503
+ "Text index definition with mappings "
1504
+ "for full-text search"
1505
+ ),
1506
+ },
1507
+ },
1508
+ "required": ["definition"],
1509
+ "additionalProperties": False,
1510
+ },
1511
+ },
1512
+ "required": ["vector_index", "text_index"],
1513
+ "additionalProperties": False,
1514
+ "description": (
1515
+ "Hybrid search configuration (required when type is "
1516
+ "'hybrid'). Defines both vector and text indexes for "
1517
+ "$rankFusion."
1518
+ ),
1519
+ },
1520
+ "options": {
1521
+ "type": "object",
1522
+ "properties": {
1523
+ "unique": {"type": "boolean"},
1524
+ "sparse": {"type": "boolean"},
1525
+ "background": {"type": "boolean"},
1526
+ "name": {"type": "string"},
1527
+ "partialFilterExpression": {
1528
+ "type": "object",
1529
+ "description": "Filter expression for partial indexes",
1530
+ },
1531
+ "expireAfterSeconds": {
1532
+ "type": "integer",
1533
+ "minimum": 1,
1534
+ "description": "TTL in seconds (required for TTL indexes)",
1535
+ },
1536
+ "weights": {
1537
+ "type": "object",
1538
+ "patternProperties": {
1539
+ "^[a-zA-Z0-9_.]+$": {"type": "integer", "minimum": 1}
1540
+ },
1541
+ "description": "Field weights for text indexes",
1542
+ },
1543
+ "default_language": {
1544
+ "type": "string",
1545
+ "description": "Default language for text indexes",
1546
+ },
1547
+ "language_override": {
1548
+ "type": "string",
1549
+ "description": "Language override field for text indexes",
1550
+ },
1551
+ },
1552
+ "description": "Index options (varies by index type)",
1553
+ },
1554
+ },
1555
+ "required": ["name", "type"],
1556
+ "allOf": [
1557
+ {
1558
+ "if": {"properties": {"type": {"const": "regular"}}},
1559
+ "then": {"required": ["keys"]},
1560
+ },
1561
+ {
1562
+ "if": {"properties": {"type": {"const": "text"}}},
1563
+ "then": {"required": ["keys"]},
1564
+ },
1565
+ {
1566
+ "if": {"properties": {"type": {"const": "geospatial"}}},
1567
+ "then": {"required": ["keys"]},
1568
+ },
1569
+ {
1570
+ "if": {"properties": {"type": {"const": "ttl"}}},
1571
+ "then": {"required": ["keys"]},
1572
+ },
1573
+ {
1574
+ "if": {"properties": {"type": {"const": "partial"}}},
1575
+ "then": {"required": ["keys", "options"]},
1576
+ "else": {
1577
+ "properties": {
1578
+ "options": {
1579
+ "not": {"required": ["partialFilterExpression"]}
1580
+ }
1581
+ }
1582
+ },
1583
+ },
1584
+ {
1585
+ "if": {"properties": {"type": {"const": "vectorSearch"}}},
1586
+ "then": {"required": ["definition"]},
1587
+ },
1588
+ {
1589
+ "if": {"properties": {"type": {"const": "search"}}},
1590
+ "then": {"required": ["definition"]},
1591
+ },
1592
+ {
1593
+ "if": {"properties": {"type": {"const": "hybrid"}}},
1594
+ "then": {"required": ["hybrid"]},
1595
+ },
1596
+ ],
1597
+ },
1598
+ "collectionSettings": {
1599
+ "type": "object",
1600
+ "properties": {
1601
+ "validation": {
1602
+ "type": "object",
1603
+ "properties": {
1604
+ "validator": {"type": "object"},
1605
+ "validationLevel": {
1606
+ "type": "string",
1607
+ "enum": ["off", "strict", "moderate"],
1608
+ },
1609
+ "validationAction": {
1610
+ "type": "string",
1611
+ "enum": ["error", "warn"],
1612
+ },
1613
+ },
1614
+ },
1615
+ "collation": {
1616
+ "type": "object",
1617
+ "properties": {
1618
+ "locale": {"type": "string"},
1619
+ "caseLevel": {"type": "boolean"},
1620
+ "caseFirst": {"type": "string"},
1621
+ "strength": {"type": "integer"},
1622
+ "numericOrdering": {"type": "boolean"},
1623
+ "alternate": {"type": "string"},
1624
+ "maxVariable": {"type": "string"},
1625
+ "normalization": {"type": "boolean"},
1626
+ "backwards": {"type": "boolean"},
1627
+ },
1628
+ },
1629
+ "capped": {"type": "boolean"},
1630
+ "size": {
1631
+ "type": "integer",
1632
+ "minimum": 1,
1633
+ "description": "Maximum size in bytes for capped collection",
1634
+ },
1635
+ "max": {
1636
+ "type": "integer",
1637
+ "minimum": 1,
1638
+ "description": "Maximum number of documents for capped collection",
1639
+ },
1640
+ "timeseries": {
1641
+ "type": "object",
1642
+ "properties": {
1643
+ "timeField": {"type": "string"},
1644
+ "metaField": {"type": "string"},
1645
+ "granularity": {
1646
+ "type": "string",
1647
+ "enum": ["seconds", "minutes", "hours"],
1648
+ },
1649
+ },
1650
+ "required": ["timeField"],
1651
+ },
1652
+ },
1653
+ },
1654
+ },
1655
+ }
1656
+
1657
+ # Schema for Version 1.0 (backward compatibility - simplified)
1658
+ # Version 1.0 had: slug, name, description, status, auth_required,
1659
+ # data_scope, pip_deps, managed_indexes
1660
+ MANIFEST_SCHEMA_V1 = {
1661
+ "type": "object",
1662
+ "properties": {
1663
+ "schema_version": {"type": "string", "pattern": "^1\\.0$", "const": "1.0"},
1664
+ "slug": {
1665
+ "type": "string",
1666
+ "pattern": "^[a-z0-9_-]+$",
1667
+ "description": "App slug (lowercase alphanumeric, underscores, hyphens)",
1668
+ },
1669
+ "name": {
1670
+ "type": "string",
1671
+ "minLength": 1,
1672
+ "description": "Human-readable app name",
1673
+ },
1674
+ "description": {"type": "string", "description": "App description"},
1675
+ "status": {
1676
+ "type": "string",
1677
+ "enum": ["active", "draft", "archived", "inactive"],
1678
+ "default": "draft",
1679
+ "description": "App status",
1680
+ },
1681
+ "auth_required": {
1682
+ "type": "boolean",
1683
+ "default": False,
1684
+ "description": "Whether authentication is required for this app",
1685
+ },
1686
+ "data_scope": {
1687
+ "type": "array",
1688
+ "items": {"type": "string"},
1689
+ "minItems": 1,
1690
+ "default": ["self"],
1691
+ "description": "List of app slugs whose data this app can access",
1692
+ },
1693
+ "pip_deps": {
1694
+ "type": "array",
1695
+ "items": {"type": "string"},
1696
+ "description": "List of pip dependencies for isolated environment",
1697
+ },
1698
+ "managed_indexes": {
1699
+ "type": "object",
1700
+ "patternProperties": {
1701
+ "^[a-zA-Z0-9_]+$": {
1702
+ "type": "array",
1703
+ "items": {"$ref": "#/definitions/indexDefinition"},
1704
+ "minItems": 1,
1705
+ }
1706
+ },
1707
+ "description": "Collection name -> list of index definitions",
1708
+ },
1709
+ "developer_id": {
1710
+ "type": "string",
1711
+ "format": "email",
1712
+ "description": "Email of the developer who owns this app",
1713
+ },
1714
+ },
1715
+ "required": ["slug", "name"],
1716
+ "definitions": {
1717
+ # Reuse same indexDefinition from V2
1718
+ "indexDefinition": MANIFEST_SCHEMA_V2["definitions"]["indexDefinition"]
1719
+ },
1720
+ }
1721
+
1722
+ # Register schemas (use constants for version strings)
1723
+ SCHEMA_REGISTRY[DEFAULT_SCHEMA_VERSION] = MANIFEST_SCHEMA_V1
1724
+ SCHEMA_REGISTRY[CURRENT_SCHEMA_VERSION] = MANIFEST_SCHEMA_V2
1725
+ # Also register as default/legacy
1726
+ SCHEMA_REGISTRY["default"] = MANIFEST_SCHEMA_V2
1727
+ MANIFEST_SCHEMA = MANIFEST_SCHEMA_V2 # Backward compatibility
1728
+
1729
+
1730
+ def get_schema_version(manifest_data: Dict[str, Any]) -> str:
1731
+ """
1732
+ Detect schema version from manifest.
1733
+
1734
+ Args:
1735
+ manifest_data: Manifest dictionary
1736
+
1737
+ Returns:
1738
+ Schema version string (e.g., "1.0", "2.0")
1739
+
1740
+ Raises:
1741
+ ValueError: If schema version format is invalid
1742
+ """
1743
+ version: Optional[str] = manifest_data.get("schema_version")
1744
+ if version:
1745
+ # Validate version format
1746
+ if not isinstance(version, str) or not version.replace(".", "").isdigit():
1747
+ raise ValueError(
1748
+ f"Invalid schema_version format: {version}. Expected format: 'major.minor'"
1749
+ )
1750
+ return str(version)
1751
+
1752
+ # Heuristic: If manifest has new fields, assume 2.0, otherwise 1.0
1753
+ v2_fields = ["auth", "collection_settings"]
1754
+ # Also check for old format for backward compatibility
1755
+ old_v2_fields = ["auth_policy", "sub_auth"]
1756
+ if any(field in manifest_data for field in v2_fields) or any(
1757
+ field in manifest_data for field in old_v2_fields
1758
+ ):
1759
+ return "2.0"
1760
+
1761
+ return DEFAULT_SCHEMA_VERSION
1762
+
1763
+
1764
+ def migrate_manifest(
1765
+ manifest_data: Dict[str, Any], target_version: str = CURRENT_SCHEMA_VERSION
1766
+ ) -> Dict[str, Any]:
1767
+ """
1768
+ Migrate manifest from one schema version to another.
1769
+
1770
+ Args:
1771
+ manifest_data: Manifest dictionary to migrate
1772
+ target_version: Target schema version (default: current)
1773
+
1774
+ Returns:
1775
+ Migrated manifest dictionary
1776
+ """
1777
+ current_version = get_schema_version(manifest_data)
1778
+
1779
+ if current_version == target_version:
1780
+ return manifest_data.copy()
1781
+
1782
+ migrated = manifest_data.copy()
1783
+
1784
+ # Migration path: 1.0 -> 2.0
1785
+ if current_version == "1.0" and target_version == "2.0":
1786
+ # V1.0 to V2.0: Add schema_version, new fields already present are kept
1787
+ if "schema_version" not in migrated:
1788
+ migrated["schema_version"] = "2.0"
1789
+
1790
+ # Migrate old auth_policy/sub_auth format to new auth.policy/auth.users format
1791
+ if "auth_policy" in migrated or "sub_auth" in migrated:
1792
+ logger.warning(
1793
+ f"Manifest {migrated.get('slug', 'unknown')} uses deprecated "
1794
+ f"'auth_policy'/'sub_auth' format. "
1795
+ f"Consider migrating to 'auth.policy'/'auth.users' format."
1796
+ )
1797
+ if "auth" not in migrated:
1798
+ migrated["auth"] = {}
1799
+ if "auth_policy" in migrated:
1800
+ migrated["auth"]["policy"] = migrated.pop("auth_policy")
1801
+ if "sub_auth" in migrated:
1802
+ migrated["auth"]["users"] = migrated.pop("sub_auth")
1803
+
1804
+ # No data transformation needed - V2.0 is backward compatible
1805
+ # New fields (auth, etc.) are optional
1806
+ logger.debug(
1807
+ f"Migrated manifest from 1.0 to 2.0: {migrated.get('slug', 'unknown')}"
1808
+ )
1809
+
1810
+ # Future: Add more migration paths as needed
1811
+ # Example: 2.0 -> 3.0, etc.
1812
+
1813
+ migrated["schema_version"] = target_version
1814
+ return migrated
1815
+
1816
+
1817
+ def get_schema_for_version(version: str) -> Dict[str, Any]:
1818
+ """
1819
+ Get schema definition for a specific version.
1820
+
1821
+ Args:
1822
+ version: Schema version string
1823
+
1824
+ Returns:
1825
+ Schema definition dictionary
1826
+
1827
+ Raises:
1828
+ ValueError: If version not found in registry
1829
+ """
1830
+ if version in SCHEMA_REGISTRY:
1831
+ return SCHEMA_REGISTRY[version]
1832
+
1833
+ # Try to find compatible version
1834
+ major = version.split(".")[0]
1835
+ for reg_version in sorted(SCHEMA_REGISTRY.keys(), reverse=True):
1836
+ if reg_version.startswith(major + "."):
1837
+ logger.warning(
1838
+ f"Schema version {version} not found, using compatible version {reg_version}"
1839
+ )
1840
+ return SCHEMA_REGISTRY[reg_version]
1841
+
1842
+ # Fallback to current
1843
+ logger.warning(
1844
+ f"Schema version {version} not found, using current version "
1845
+ f"{CURRENT_SCHEMA_VERSION}"
1846
+ )
1847
+ return SCHEMA_REGISTRY[CURRENT_SCHEMA_VERSION]
1848
+
1849
+
1850
+ async def _validate_manifest_async(
1851
+ manifest_data: Dict[str, Any], use_cache: bool = True
1852
+ ) -> Tuple[bool, Optional[str], Optional[List[str]]]:
1853
+ """
1854
+ Validate a manifest against the JSON Schema with versioning and caching support.
1855
+
1856
+ This function:
1857
+ 1. Detects schema version from manifest (defaults to 1.0 if not specified)
1858
+ 2. Uses appropriate schema for validation
1859
+ 3. Caches validation results for performance
1860
+ 4. Supports parallel validation for scale
1861
+
1862
+ Args:
1863
+ manifest_data: The manifest data to validate
1864
+ use_cache: Whether to use validation cache (default: True, set False to force re-validation)
1865
+
1866
+ Returns:
1867
+ Tuple of (is_valid, error_message, error_paths)
1868
+ - is_valid: True if valid, False otherwise
1869
+ - error_message: Human-readable error message (None if valid)
1870
+ - error_paths: List of JSON paths with errors (None if valid)
1871
+
1872
+ Note: This function does NOT validate developer_id against the database.
1873
+ Use validate_manifest_with_db() for database validation.
1874
+ """
1875
+ # Check cache first
1876
+ if use_cache:
1877
+ cache_key = (
1878
+ _get_manifest_hash(manifest_data) + "_" + get_schema_version(manifest_data)
1879
+ )
1880
+ if cache_key in _validation_cache:
1881
+ return _validation_cache[cache_key]
1882
+
1883
+ try:
1884
+ # Get schema version
1885
+ version = get_schema_version(manifest_data)
1886
+ schema = get_schema_for_version(version)
1887
+
1888
+ # Note: Tuple-to-list conversion should happen at the API boundary (register_app),
1889
+ # not here. This keeps validation logic clean and schema-agnostic.
1890
+ # Validate against appropriate schema
1891
+ validate(instance=manifest_data, schema=schema)
1892
+
1893
+ # Cache success result
1894
+ result = (True, None, None)
1895
+ if use_cache:
1896
+ cache_key = _get_manifest_hash(manifest_data) + "_" + version
1897
+ _validation_cache[cache_key] = result
1898
+
1899
+ return result
1900
+
1901
+ except ValidationError as e:
1902
+ error_paths = []
1903
+ error_messages = []
1904
+
1905
+ # Extract error paths and messages
1906
+ path_parts = list(e.absolute_path)
1907
+ if path_parts:
1908
+ error_paths.append(".".join(str(p) for p in path_parts))
1909
+ else:
1910
+ error_paths.append("root")
1911
+
1912
+ error_messages.append(e.message)
1913
+
1914
+ # Follow the error chain for nested errors
1915
+ error = e
1916
+ while hasattr(error, "context") and error.context:
1917
+ for suberror in error.context:
1918
+ subpath_parts = list(suberror.absolute_path)
1919
+ if subpath_parts:
1920
+ error_paths.append(".".join(str(p) for p in subpath_parts))
1921
+ error_messages.append(suberror.message)
1922
+ break # Only process first level of context
1923
+
1924
+ error_message = "; ".join(set(error_messages)) # Deduplicate messages
1925
+
1926
+ # Cache error result
1927
+ result = (False, error_message, error_paths)
1928
+ if use_cache:
1929
+ cache_key = _get_manifest_hash(manifest_data) + "_" + version
1930
+ _validation_cache[cache_key] = result
1931
+
1932
+ return result
1933
+
1934
+ except SchemaError as e:
1935
+ error_message = f"Invalid schema definition: {e.message}"
1936
+ result = (False, error_message, ["schema"])
1937
+ if use_cache:
1938
+ cache_key = (
1939
+ _get_manifest_hash(manifest_data)
1940
+ + "_"
1941
+ + get_schema_version(manifest_data)
1942
+ )
1943
+ _validation_cache[cache_key] = result
1944
+
1945
+ return result
1946
+
1947
+ except (ValidationError, SchemaError) as e:
1948
+ # Expected validation errors - extract details
1949
+ error_paths = []
1950
+ error_messages = []
1951
+ if isinstance(e, ValidationError):
1952
+ error_paths = [
1953
+ f".{'.'.join(str(p) for p in error.path)}" for error in e.context or [e]
1954
+ ]
1955
+ error_messages = [error.message for error in e.context or [e]]
1956
+ else:
1957
+ error_messages = [str(e)]
1958
+
1959
+ error_message = "; ".join(error_messages) if error_messages else str(e)
1960
+ result = (False, error_message, error_paths if error_paths else None)
1961
+ if use_cache:
1962
+ cache_key = (
1963
+ _get_manifest_hash(manifest_data)
1964
+ + "_"
1965
+ + get_schema_version(manifest_data)
1966
+ )
1967
+ _validation_cache[cache_key] = result
1968
+
1969
+ return result
1970
+ except (TypeError, ValueError, KeyError) as e:
1971
+ # Programming errors - these should not happen in normal operation
1972
+ error_message = f"Manifest structure error: {str(e)}"
1973
+ logger.exception("Unexpected error during manifest validation")
1974
+ result = (False, error_message, None)
1975
+ if use_cache:
1976
+ cache_key = (
1977
+ _get_manifest_hash(manifest_data)
1978
+ + "_"
1979
+ + get_schema_version(manifest_data)
1980
+ )
1981
+ _validation_cache[cache_key] = result
1982
+
1983
+ return result
1984
+
1985
+
1986
+ def clear_validation_cache():
1987
+ """Clear the validation cache. Useful for testing or when schemas change."""
1988
+ global _validation_cache
1989
+ _validation_cache.clear()
1990
+ logger.debug("Validation cache cleared")
1991
+
1992
+
1993
+ async def validate_manifests_parallel(
1994
+ manifests: List[Dict[str, Any]], use_cache: bool = True
1995
+ ) -> List[Tuple[bool, Optional[str], Optional[List[str]], Optional[str]]]:
1996
+ """
1997
+ Validate multiple manifests in parallel for scale.
1998
+
1999
+ Args:
2000
+ manifests: List of manifest dictionaries to validate
2001
+ use_cache: Whether to use validation cache
2002
+
2003
+ Returns:
2004
+ List of tuples: (is_valid, error_message, error_paths, slug)
2005
+ Each tuple corresponds to the manifest at the same index
2006
+ """
2007
+
2008
+ async def validate_one(
2009
+ manifest: Dict[str, Any]
2010
+ ) -> Tuple[bool, Optional[str], Optional[List[str]], Optional[str]]:
2011
+ slug = manifest.get("slug", "unknown")
2012
+ is_valid, error, paths = await _validate_manifest_async(
2013
+ manifest, use_cache=use_cache
2014
+ )
2015
+ return (is_valid, error, paths, slug)
2016
+
2017
+ # Run validations in parallel
2018
+ results = await asyncio.gather(
2019
+ *[validate_one(m) for m in manifests], return_exceptions=True
2020
+ )
2021
+
2022
+ # Handle exceptions
2023
+ validated_results = []
2024
+ for i, result in enumerate(results):
2025
+ if isinstance(result, Exception):
2026
+ slug = manifests[i].get("slug", "unknown")
2027
+ validated_results.append(
2028
+ (False, f"Validation error: {str(result)}", None, slug)
2029
+ )
2030
+ else:
2031
+ validated_results.append(result)
2032
+
2033
+ return validated_results
2034
+
2035
+
2036
+ async def validate_developer_id(
2037
+ developer_id: str, db_validator: Optional[Callable[[str], Awaitable[bool]]] = None
2038
+ ) -> Tuple[bool, Optional[str]]:
2039
+ """
2040
+ Validate that a developer_id exists in the system and has developer role.
2041
+
2042
+ Args:
2043
+ developer_id: The developer email to validate
2044
+ db_validator: Optional async function that checks if user exists and has developer role
2045
+ Should return True if valid, False otherwise
2046
+
2047
+ Returns:
2048
+ Tuple of (is_valid, error_message)
2049
+ - is_valid: True if valid, False otherwise
2050
+ - error_message: Human-readable error message (None if valid)
2051
+ """
2052
+ if not developer_id:
2053
+ return False, "developer_id cannot be empty"
2054
+
2055
+ if not isinstance(developer_id, str):
2056
+ return False, "developer_id must be a string (email)"
2057
+
2058
+ # Basic email format check (JSON schema will also validate format)
2059
+ if "@" not in developer_id or "." not in developer_id:
2060
+ return (
2061
+ False,
2062
+ f"developer_id '{developer_id}' does not appear to be a valid email",
2063
+ )
2064
+
2065
+ # If db_validator is provided, check database
2066
+ if db_validator:
2067
+ try:
2068
+ is_valid = await db_validator(developer_id)
2069
+ if not is_valid:
2070
+ return (
2071
+ False,
2072
+ f"developer_id '{developer_id}' does not exist or does not have developer role",
2073
+ )
2074
+ except (ValueError, TypeError, AttributeError) as e:
2075
+ logger.exception(
2076
+ f"Validation error validating developer_id '{developer_id}'"
2077
+ )
2078
+ return False, f"Error validating developer_id: {e}"
2079
+
2080
+ return True, None
2081
+
2082
+
2083
+ async def validate_manifest_with_db(
2084
+ manifest_data: Dict[str, Any],
2085
+ db_validator: Callable[[str], Awaitable[bool]],
2086
+ use_cache: bool = True,
2087
+ ) -> Tuple[bool, Optional[str], Optional[List[str]]]:
2088
+ """
2089
+ Validate a manifest against the JSON Schema (with versioning) and check
2090
+ developer_id exists in system.
2091
+
2092
+ Args:
2093
+ manifest_data: The manifest data to validate
2094
+ db_validator: Async function that checks if developer_id exists and has developer role
2095
+ Should accept developer_id (str) and return bool
2096
+ use_cache: Whether to use validation cache (default: True)
2097
+
2098
+ Returns:
2099
+ Tuple of (is_valid, error_message, error_paths)
2100
+ - is_valid: True if valid, False otherwise
2101
+ - error_message: Human-readable error message (None if valid)
2102
+ - error_paths: List of JSON paths with errors (None if valid)
2103
+ """
2104
+ # First validate schema (with versioning support) - use async version directly
2105
+ is_valid, error_message, error_paths = await _validate_manifest_async(
2106
+ manifest_data, use_cache=use_cache
2107
+ )
2108
+ if not is_valid:
2109
+ return False, error_message, error_paths
2110
+
2111
+ # Then validate developer_id if present
2112
+ if "developer_id" in manifest_data:
2113
+ dev_id = manifest_data.get("developer_id")
2114
+ is_valid, error_msg = await validate_developer_id(dev_id, db_validator)
2115
+ if not is_valid:
2116
+ return (
2117
+ False,
2118
+ f"developer_id validation failed: {error_msg}",
2119
+ ["developer_id"],
2120
+ )
2121
+
2122
+ return True, None, None
2123
+
2124
+
2125
+ # Public API: Synchronous wrapper for backward compatibility
2126
+ # Most callers use this synchronously, so we provide a sync wrapper
2127
+ def validate_manifest(
2128
+ manifest_data: Dict[str, Any], use_cache: bool = True
2129
+ ) -> Tuple[bool, Optional[str], Optional[List[str]]]:
2130
+ """
2131
+ Validate a manifest against the JSON Schema with versioning and caching
2132
+ support (synchronous wrapper).
2133
+
2134
+ This function wraps the async validation for backward compatibility.
2135
+ In async contexts, use _validate_manifest_async() directly for better performance.
2136
+
2137
+ Args:
2138
+ manifest_data: The manifest data to validate
2139
+ use_cache: Whether to use validation cache (default: True)
2140
+
2141
+ Returns:
2142
+ Tuple of (is_valid, error_message, error_paths)
2143
+ - is_valid: True if valid, False otherwise
2144
+ - error_message: Human-readable error message (None if valid)
2145
+ - error_paths: List of JSON paths with errors (None if valid)
2146
+ """
2147
+ import asyncio
2148
+
2149
+ try:
2150
+ loop = asyncio.get_event_loop()
2151
+ if loop.is_running():
2152
+ # If we're in an async context, use a thread pool to run sync
2153
+ import concurrent.futures
2154
+
2155
+ with concurrent.futures.ThreadPoolExecutor() as executor:
2156
+ future = executor.submit(
2157
+ lambda: asyncio.run(
2158
+ _validate_manifest_async(manifest_data, use_cache)
2159
+ )
2160
+ )
2161
+ return future.result()
2162
+ else:
2163
+ return loop.run_until_complete(
2164
+ _validate_manifest_async(manifest_data, use_cache)
2165
+ )
2166
+ except RuntimeError:
2167
+ # No event loop, create one
2168
+ return asyncio.run(_validate_manifest_async(manifest_data, use_cache))
2169
+
2170
+
2171
+ def _validate_regular_index(
2172
+ index_def: Dict[str, Any], collection_name: str, index_name: str
2173
+ ) -> Tuple[bool, Optional[str]]:
2174
+ """Validate a regular index definition."""
2175
+ if "keys" not in index_def:
2176
+ return (
2177
+ False,
2178
+ f"Regular index '{index_name}' in collection "
2179
+ f"'{collection_name}' requires 'keys' field",
2180
+ )
2181
+ keys = index_def.get("keys")
2182
+ if (
2183
+ not keys
2184
+ or (isinstance(keys, dict) and len(keys) == 0)
2185
+ or (isinstance(keys, list) and len(keys) == 0)
2186
+ ):
2187
+ return (
2188
+ False,
2189
+ f"Regular index '{index_name}' in collection "
2190
+ f"'{collection_name}' has empty 'keys'",
2191
+ )
2192
+
2193
+ # Check for _id index
2194
+ is_id_index = False
2195
+ if isinstance(keys, dict):
2196
+ is_id_index = len(keys) == 1 and "_id" in keys
2197
+ elif isinstance(keys, list):
2198
+ is_id_index = len(keys) == 1 and len(keys[0]) >= 1 and keys[0][0] == "_id"
2199
+
2200
+ if is_id_index:
2201
+ return (
2202
+ False,
2203
+ f"Index '{index_name}' in collection '{collection_name}' "
2204
+ f"cannot target '_id' field (MongoDB creates _id indexes "
2205
+ f"automatically)",
2206
+ )
2207
+ return True, None
2208
+
2209
+
2210
+ def _validate_ttl_index(
2211
+ index_def: Dict[str, Any], collection_name: str, index_name: str
2212
+ ) -> Tuple[bool, Optional[str]]:
2213
+ """Validate a TTL index definition."""
2214
+ if "keys" not in index_def:
2215
+ return (
2216
+ False,
2217
+ f"TTL index '{index_name}' in collection '{collection_name}' "
2218
+ f"requires 'keys' field",
2219
+ )
2220
+ options = index_def.get("options", {})
2221
+ if "expireAfterSeconds" not in options:
2222
+ return (
2223
+ False,
2224
+ f"TTL index '{index_name}' in collection '{collection_name}' "
2225
+ f"requires 'expireAfterSeconds' in options",
2226
+ )
2227
+ expire_after = options.get("expireAfterSeconds")
2228
+ if not isinstance(expire_after, int) or expire_after < MIN_TTL_SECONDS:
2229
+ return (
2230
+ False,
2231
+ f"TTL index '{index_name}' in collection '{collection_name}' "
2232
+ f"requires 'expireAfterSeconds' to be >= {MIN_TTL_SECONDS}",
2233
+ )
2234
+ if expire_after > MAX_TTL_SECONDS:
2235
+ return (
2236
+ False,
2237
+ f"TTL index '{index_name}' in collection '{collection_name}' "
2238
+ f"has 'expireAfterSeconds' too large ({expire_after}). "
2239
+ f"Maximum recommended is {MAX_TTL_SECONDS} (1 year). "
2240
+ f"Consider if this is intentional.",
2241
+ )
2242
+ return True, None
2243
+
2244
+
2245
+ def _validate_partial_index(
2246
+ index_def: Dict[str, Any], collection_name: str, index_name: str
2247
+ ) -> Tuple[bool, Optional[str]]:
2248
+ """Validate a partial index definition."""
2249
+ if "keys" not in index_def:
2250
+ return (
2251
+ False,
2252
+ f"Partial index '{index_name}' in collection "
2253
+ f"'{collection_name}' requires 'keys' field",
2254
+ )
2255
+ options = index_def.get("options", {})
2256
+ if "partialFilterExpression" not in options:
2257
+ return (
2258
+ False,
2259
+ f"Partial index '{index_name}' in collection "
2260
+ f"'{collection_name}' requires 'partialFilterExpression' in "
2261
+ f"options",
2262
+ )
2263
+ return True, None
2264
+
2265
+
2266
+ def _validate_text_index(
2267
+ index_def: Dict[str, Any], collection_name: str, index_name: str
2268
+ ) -> Tuple[bool, Optional[str]]:
2269
+ """Validate a text index definition."""
2270
+ if "keys" not in index_def:
2271
+ return (
2272
+ False,
2273
+ f"Text index '{index_name}' in collection '{collection_name}' "
2274
+ f"requires 'keys' field",
2275
+ )
2276
+ keys = index_def.get("keys")
2277
+ # Text indexes should have text type in keys
2278
+ has_text = False
2279
+ if isinstance(keys, dict):
2280
+ has_text = any(v == "text" or v == "TEXT" for v in keys.values())
2281
+ elif isinstance(keys, list):
2282
+ has_text = any(len(k) >= 2 and (k[1] == "text" or k[1] == "TEXT") for k in keys)
2283
+ if not has_text:
2284
+ return (
2285
+ False,
2286
+ f"Text index '{index_name}' in collection '{collection_name}' "
2287
+ f"must have at least one field with 'text' type in keys",
2288
+ )
2289
+ return True, None
2290
+
2291
+
2292
+ def _validate_geospatial_index(
2293
+ index_def: Dict[str, Any], collection_name: str, index_name: str
2294
+ ) -> Tuple[bool, Optional[str]]:
2295
+ """Validate a geospatial index definition."""
2296
+ if "keys" not in index_def:
2297
+ return (
2298
+ False,
2299
+ f"Geospatial index '{index_name}' in collection "
2300
+ f"'{collection_name}' requires 'keys' field",
2301
+ )
2302
+ keys = index_def.get("keys")
2303
+ # Geospatial indexes should have geospatial type in keys
2304
+ has_geo = False
2305
+ if isinstance(keys, dict):
2306
+ has_geo = any(v in ["2dsphere", "2d", "geoHaystack"] for v in keys.values())
2307
+ elif isinstance(keys, list):
2308
+ has_geo = any(
2309
+ len(k) >= 2 and k[1] in ["2dsphere", "2d", "geoHaystack"] for k in keys
2310
+ )
2311
+ if not has_geo:
2312
+ return (
2313
+ False,
2314
+ f"Geospatial index '{index_name}' in collection "
2315
+ f"'{collection_name}' must have at least one field with "
2316
+ f"geospatial type ('2dsphere', '2d', or 'geoHaystack') in keys",
2317
+ )
2318
+ return True, None
2319
+
2320
+
2321
+ def _validate_vector_search_index(
2322
+ index_def: Dict[str, Any], collection_name: str, index_name: str, index_type: str
2323
+ ) -> Tuple[bool, Optional[str]]:
2324
+ """Validate a vectorSearch or search index definition."""
2325
+ if "definition" not in index_def:
2326
+ return (
2327
+ False,
2328
+ f"{index_type} index '{index_name}' in collection "
2329
+ f"'{collection_name}' requires 'definition' field",
2330
+ )
2331
+ definition = index_def.get("definition")
2332
+ if not isinstance(definition, dict):
2333
+ return (
2334
+ False,
2335
+ f"{index_type} index '{index_name}' in collection "
2336
+ f"'{collection_name}' requires 'definition' to be an object",
2337
+ )
2338
+
2339
+ # Additional validation for vectorSearch indexes
2340
+ if index_type == "vectorSearch":
2341
+ fields = definition.get("fields", [])
2342
+ if not isinstance(fields, list) or len(fields) == 0:
2343
+ return (
2344
+ False,
2345
+ f"VectorSearch index '{index_name}' in collection "
2346
+ f"'{collection_name}' requires 'definition.fields' to be "
2347
+ f"a non-empty array",
2348
+ )
2349
+
2350
+ # Validate vector field dimensions
2351
+ for field in fields:
2352
+ if isinstance(field, dict) and field.get("type") == "vector":
2353
+ num_dims = field.get("numDimensions")
2354
+ if (
2355
+ not isinstance(num_dims, int)
2356
+ or num_dims < MIN_VECTOR_DIMENSIONS
2357
+ or num_dims > MAX_VECTOR_DIMENSIONS
2358
+ ):
2359
+ return (
2360
+ False,
2361
+ f"VectorSearch index '{index_name}' in collection "
2362
+ f"'{collection_name}' requires 'numDimensions' "
2363
+ f"to be between {MIN_VECTOR_DIMENSIONS} and "
2364
+ f"{MAX_VECTOR_DIMENSIONS}, got: {num_dims}",
2365
+ )
2366
+ return True, None
2367
+
2368
+
2369
+ def _validate_hybrid_index(
2370
+ index_def: Dict[str, Any], collection_name: str, index_name: str
2371
+ ) -> Tuple[bool, Optional[str]]:
2372
+ """Validate a hybrid index definition."""
2373
+ if "hybrid" not in index_def:
2374
+ return (
2375
+ False,
2376
+ f"Hybrid index '{index_name}' in collection '{collection_name}' "
2377
+ f"requires 'hybrid' field",
2378
+ )
2379
+ hybrid = index_def.get("hybrid")
2380
+ if not isinstance(hybrid, dict):
2381
+ return (
2382
+ False,
2383
+ f"Hybrid index '{index_name}' in collection '{collection_name}' "
2384
+ f"requires 'hybrid' to be an object",
2385
+ )
2386
+
2387
+ # Validate vector_index
2388
+ vector_index = hybrid.get("vector_index")
2389
+ if not vector_index or not isinstance(vector_index, dict):
2390
+ return (
2391
+ False,
2392
+ f"Hybrid index '{index_name}' in collection '{collection_name}' "
2393
+ f"requires 'hybrid.vector_index' to be an object",
2394
+ )
2395
+ if "definition" not in vector_index:
2396
+ return (
2397
+ False,
2398
+ f"Hybrid index '{index_name}' in collection '{collection_name}' "
2399
+ f"requires 'hybrid.vector_index.definition' field",
2400
+ )
2401
+ vector_def = vector_index.get("definition")
2402
+ if not isinstance(vector_def, dict):
2403
+ return (
2404
+ False,
2405
+ f"Hybrid index '{index_name}' in collection '{collection_name}' "
2406
+ f"requires 'hybrid.vector_index.definition' to be an object",
2407
+ )
2408
+
2409
+ # Validate text_index
2410
+ text_index = hybrid.get("text_index")
2411
+ if not text_index or not isinstance(text_index, dict):
2412
+ return (
2413
+ False,
2414
+ f"Hybrid index '{index_name}' in collection '{collection_name}' "
2415
+ f"requires 'hybrid.text_index' to be an object",
2416
+ )
2417
+ if "definition" not in text_index:
2418
+ return (
2419
+ False,
2420
+ f"Hybrid index '{index_name}' in collection '{collection_name}' "
2421
+ f"requires 'hybrid.text_index.definition' field",
2422
+ )
2423
+ text_def = text_index.get("definition")
2424
+ if not isinstance(text_def, dict):
2425
+ return (
2426
+ False,
2427
+ f"Hybrid index '{index_name}' in collection '{collection_name}' "
2428
+ f"requires 'hybrid.text_index.definition' to be an object",
2429
+ )
2430
+ return True, None
2431
+
2432
+
2433
+ def validate_index_definition(
2434
+ index_def: Dict[str, Any], collection_name: str, index_name: str
2435
+ ) -> Tuple[bool, Optional[str]]:
2436
+ """
2437
+ Validate a single index definition with context-specific checks.
2438
+
2439
+ Args:
2440
+ index_def: The index definition to validate
2441
+ collection_name: Name of the collection (for error context)
2442
+ index_name: Name of the index (for error context)
2443
+
2444
+ Returns:
2445
+ Tuple of (is_valid, error_message)
2446
+ """
2447
+ index_type = index_def.get("type")
2448
+ if not index_type:
2449
+ return (
2450
+ False,
2451
+ f"Index '{index_name}' in collection '{collection_name}' "
2452
+ f"is missing 'type' field",
2453
+ )
2454
+
2455
+ # Type-specific validation
2456
+ if index_type == "regular":
2457
+ return _validate_regular_index(index_def, collection_name, index_name)
2458
+ elif index_type == "ttl":
2459
+ return _validate_ttl_index(index_def, collection_name, index_name)
2460
+ elif index_type == "partial":
2461
+ return _validate_partial_index(index_def, collection_name, index_name)
2462
+ elif index_type == "text":
2463
+ return _validate_text_index(index_def, collection_name, index_name)
2464
+ elif index_type == "geospatial":
2465
+ return _validate_geospatial_index(index_def, collection_name, index_name)
2466
+ elif index_type in ("vectorSearch", "search"):
2467
+ return _validate_vector_search_index(
2468
+ index_def, collection_name, index_name, index_type
2469
+ )
2470
+ elif index_type == "hybrid":
2471
+ return _validate_hybrid_index(index_def, collection_name, index_name)
2472
+ else:
2473
+ return (
2474
+ False,
2475
+ f"Unknown index type '{index_type}' for index '{index_name}' "
2476
+ f"in collection '{collection_name}'",
2477
+ )
2478
+
2479
+
2480
+ def validate_managed_indexes(
2481
+ managed_indexes: Dict[str, List[Dict[str, Any]]]
2482
+ ) -> Tuple[bool, Optional[str]]:
2483
+ """
2484
+ Validate all managed indexes with collection and index context.
2485
+
2486
+ Args:
2487
+ managed_indexes: The managed_indexes object from manifest
2488
+
2489
+ Returns:
2490
+ Tuple of (is_valid, error_message)
2491
+ """
2492
+ if not isinstance(managed_indexes, dict):
2493
+ return (
2494
+ False,
2495
+ "'managed_indexes' must be an object mapping collection names to index arrays",
2496
+ )
2497
+
2498
+ for collection_name, indexes in managed_indexes.items():
2499
+ if not isinstance(collection_name, str) or not collection_name:
2500
+ return (
2501
+ False,
2502
+ f"Collection name must be a non-empty string, got: {collection_name}",
2503
+ )
2504
+
2505
+ if not isinstance(indexes, list):
2506
+ return False, f"Indexes for collection '{collection_name}' must be an array"
2507
+
2508
+ if len(indexes) == 0:
2509
+ return False, f"Collection '{collection_name}' has an empty indexes array"
2510
+
2511
+ for idx, index_def in enumerate(indexes):
2512
+ if not isinstance(index_def, dict):
2513
+ return (
2514
+ False,
2515
+ f"Index #{idx} in collection '{collection_name}' must be an object",
2516
+ )
2517
+
2518
+ index_name = index_def.get("name", f"index_{idx}")
2519
+ is_valid, error_msg = validate_index_definition(
2520
+ index_def, collection_name, index_name
2521
+ )
2522
+ if not is_valid:
2523
+ return False, error_msg
2524
+
2525
+ return True, None
2526
+
2527
+
2528
+ # ============================================================================
2529
+ # CLASS-BASED API (Enterprise-ready)
2530
+ # ============================================================================
2531
+
2532
+
2533
+ class ManifestValidator:
2534
+ """
2535
+ Enterprise-grade manifest validator with versioning and caching.
2536
+
2537
+ Provides a clean class-based API for manifest validation while
2538
+ maintaining backward compatibility with functional API.
2539
+ """
2540
+
2541
+ def __init__(self, use_cache: bool = True):
2542
+ """
2543
+ Initialize validator.
2544
+
2545
+ Args:
2546
+ use_cache: Whether to use validation cache (default: True)
2547
+ """
2548
+ self.use_cache = use_cache
2549
+
2550
+ @staticmethod
2551
+ def validate(
2552
+ manifest: Dict[str, Any], use_cache: bool = True
2553
+ ) -> Tuple[bool, Optional[str], Optional[List[str]]]:
2554
+ """
2555
+ Validate manifest against schema.
2556
+
2557
+ Args:
2558
+ manifest: Manifest dictionary to validate
2559
+ use_cache: Whether to use validation cache
2560
+
2561
+ Returns:
2562
+ Tuple of (is_valid, error_message, error_paths)
2563
+ """
2564
+ return validate_manifest(manifest, use_cache=use_cache)
2565
+
2566
+ @staticmethod
2567
+ async def validate_async(
2568
+ manifest: Dict[str, Any], use_cache: bool = True
2569
+ ) -> Tuple[bool, Optional[str], Optional[List[str]]]:
2570
+ """
2571
+ Validate manifest asynchronously.
2572
+
2573
+ This includes:
2574
+ - JSON Schema validation
2575
+ - Cross-field dependency validation
2576
+
2577
+ Args:
2578
+ manifest: Manifest dictionary to validate
2579
+ use_cache: Whether to use validation cache
2580
+
2581
+ Returns:
2582
+ Tuple of (is_valid, error_message, error_paths)
2583
+ """
2584
+ return await _validate_manifest_async(manifest, use_cache=use_cache)
2585
+
2586
+ @staticmethod
2587
+ async def validate_with_db(
2588
+ manifest: Dict[str, Any],
2589
+ db_validator: Callable[[str], Awaitable[bool]],
2590
+ use_cache: bool = True,
2591
+ ) -> Tuple[bool, Optional[str], Optional[List[str]]]:
2592
+ """
2593
+ Validate manifest and check developer_id exists in database.
2594
+
2595
+ Args:
2596
+ manifest: Manifest dictionary to validate
2597
+ db_validator: Async function that checks if developer_id exists
2598
+ use_cache: Whether to use validation cache
2599
+
2600
+ Returns:
2601
+ Tuple of (is_valid, error_message, error_paths)
2602
+ """
2603
+ return await validate_manifest_with_db(
2604
+ manifest, db_validator, use_cache=use_cache
2605
+ )
2606
+
2607
+ @staticmethod
2608
+ def validate_managed_indexes(
2609
+ managed_indexes: Dict[str, List[Dict[str, Any]]]
2610
+ ) -> Tuple[bool, Optional[str]]:
2611
+ """
2612
+ Validate managed indexes configuration.
2613
+
2614
+ Args:
2615
+ managed_indexes: Managed indexes dictionary
2616
+
2617
+ Returns:
2618
+ Tuple of (is_valid, error_message)
2619
+ """
2620
+ return validate_managed_indexes(managed_indexes)
2621
+
2622
+ @staticmethod
2623
+ def validate_index_definition(
2624
+ index_def: Dict[str, Any], collection_name: str, index_name: str
2625
+ ) -> Tuple[bool, Optional[str]]:
2626
+ """
2627
+ Validate a single index definition.
2628
+
2629
+ Args:
2630
+ index_def: Index definition dictionary
2631
+ collection_name: Collection name for context
2632
+ index_name: Index name for context
2633
+
2634
+ Returns:
2635
+ Tuple of (is_valid, error_message)
2636
+ """
2637
+ return validate_index_definition(index_def, collection_name, index_name)
2638
+
2639
+ @staticmethod
2640
+ def get_schema_version(manifest: Dict[str, Any]) -> str:
2641
+ """
2642
+ Get schema version from manifest.
2643
+
2644
+ Args:
2645
+ manifest: Manifest dictionary
2646
+
2647
+ Returns:
2648
+ Schema version string (e.g., "1.0", "2.0")
2649
+ """
2650
+ return get_schema_version(manifest)
2651
+
2652
+ @staticmethod
2653
+ def migrate(
2654
+ manifest: Dict[str, Any], target_version: str = CURRENT_SCHEMA_VERSION
2655
+ ) -> Dict[str, Any]:
2656
+ """
2657
+ Migrate manifest to target schema version.
2658
+
2659
+ Args:
2660
+ manifest: Manifest dictionary to migrate
2661
+ target_version: Target schema version
2662
+
2663
+ Returns:
2664
+ Migrated manifest dictionary
2665
+ """
2666
+ return migrate_manifest(manifest, target_version)
2667
+
2668
+ @staticmethod
2669
+ def clear_cache():
2670
+ """Clear validation cache."""
2671
+ clear_validation_cache()
2672
+
2673
+
2674
+ class ManifestParser:
2675
+ """
2676
+ Manifest parser for loading and processing manifest files.
2677
+
2678
+ Provides utilities for loading manifests from files or dictionaries
2679
+ with automatic validation and migration.
2680
+ """
2681
+
2682
+ def __init__(self, validator: Optional[ManifestValidator] = None):
2683
+ """
2684
+ Initialize parser.
2685
+
2686
+ Args:
2687
+ validator: Optional ManifestValidator instance (creates default if None)
2688
+ """
2689
+ self.validator = validator or ManifestValidator()
2690
+
2691
+ @staticmethod
2692
+ async def load_from_file(path: Any, validate: bool = True) -> Dict[str, Any]:
2693
+ """
2694
+ Load and validate manifest from file.
2695
+
2696
+ Args:
2697
+ path: Path to manifest.json file (Path object or string)
2698
+ validate: Whether to validate after loading (default: True)
2699
+
2700
+ Returns:
2701
+ Manifest dictionary
2702
+
2703
+ Raises:
2704
+ FileNotFoundError: If file doesn't exist
2705
+ ValueError: If validation fails
2706
+ """
2707
+ import json
2708
+ from pathlib import Path
2709
+
2710
+ path_obj = Path(path) if not isinstance(path, Path) else path
2711
+
2712
+ if not path_obj.exists():
2713
+ raise FileNotFoundError(f"Manifest file not found: {path_obj}")
2714
+
2715
+ # Read file
2716
+ content = path_obj.read_text(encoding="utf-8")
2717
+ manifest_data = json.loads(content)
2718
+
2719
+ # Validate if requested
2720
+ if validate:
2721
+ is_valid, error, paths = ManifestValidator.validate(manifest_data)
2722
+ if not is_valid:
2723
+ error_path_str = (
2724
+ f" (errors in: {', '.join(paths[:3])})" if paths else ""
2725
+ )
2726
+ raise ValueError(f"Manifest validation failed: {error}{error_path_str}")
2727
+
2728
+ return manifest_data
2729
+
2730
+ @staticmethod
2731
+ async def load_from_dict(
2732
+ data: Dict[str, Any], validate: bool = True
2733
+ ) -> Dict[str, Any]:
2734
+ """
2735
+ Load and validate manifest from dictionary.
2736
+
2737
+ Args:
2738
+ data: Manifest dictionary
2739
+ validate: Whether to validate (default: True)
2740
+
2741
+ Returns:
2742
+ Validated manifest dictionary
2743
+
2744
+ Raises:
2745
+ ValueError: If validation fails
2746
+ """
2747
+ # Validate if requested
2748
+ if validate:
2749
+ is_valid, error, paths = ManifestValidator.validate(data)
2750
+ if not is_valid:
2751
+ error_path_str = (
2752
+ f" (errors in: {', '.join(paths[:3])})" if paths else ""
2753
+ )
2754
+ raise ValueError(f"Manifest validation failed: {error}{error_path_str}")
2755
+
2756
+ return data.copy()
2757
+
2758
+ @staticmethod
2759
+ async def load_from_string(content: str, validate: bool = True) -> Dict[str, Any]:
2760
+ """
2761
+ Load and validate manifest from JSON string.
2762
+
2763
+ Args:
2764
+ content: JSON string content
2765
+ validate: Whether to validate (default: True)
2766
+
2767
+ Returns:
2768
+ Manifest dictionary
2769
+
2770
+ Raises:
2771
+ json.JSONDecodeError: If JSON is invalid
2772
+ ValueError: If validation fails
2773
+ """
2774
+ import json
2775
+
2776
+ manifest_data = json.loads(content)
2777
+ return await ManifestParser.load_from_dict(manifest_data, validate=validate)
2778
+
2779
+ @staticmethod
2780
+ async def load_and_migrate(
2781
+ manifest: Dict[str, Any], target_version: str = CURRENT_SCHEMA_VERSION
2782
+ ) -> Dict[str, Any]:
2783
+ """
2784
+ Load manifest and migrate to target version.
2785
+
2786
+ Args:
2787
+ manifest: Manifest dictionary
2788
+ target_version: Target schema version
2789
+
2790
+ Returns:
2791
+ Migrated manifest dictionary
2792
+ """
2793
+ return ManifestValidator.migrate(manifest, target_version)