mdb-engine 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdb_engine/README.md +144 -0
- mdb_engine/__init__.py +37 -0
- mdb_engine/auth/README.md +631 -0
- mdb_engine/auth/__init__.py +128 -0
- mdb_engine/auth/casbin_factory.py +199 -0
- mdb_engine/auth/casbin_models.py +46 -0
- mdb_engine/auth/config_defaults.py +71 -0
- mdb_engine/auth/config_helpers.py +213 -0
- mdb_engine/auth/cookie_utils.py +158 -0
- mdb_engine/auth/decorators.py +350 -0
- mdb_engine/auth/dependencies.py +747 -0
- mdb_engine/auth/helpers.py +64 -0
- mdb_engine/auth/integration.py +578 -0
- mdb_engine/auth/jwt.py +225 -0
- mdb_engine/auth/middleware.py +241 -0
- mdb_engine/auth/oso_factory.py +323 -0
- mdb_engine/auth/provider.py +570 -0
- mdb_engine/auth/restrictions.py +271 -0
- mdb_engine/auth/session_manager.py +477 -0
- mdb_engine/auth/token_lifecycle.py +213 -0
- mdb_engine/auth/token_store.py +289 -0
- mdb_engine/auth/users.py +1516 -0
- mdb_engine/auth/utils.py +614 -0
- mdb_engine/cli/__init__.py +13 -0
- mdb_engine/cli/commands/__init__.py +7 -0
- mdb_engine/cli/commands/generate.py +105 -0
- mdb_engine/cli/commands/migrate.py +83 -0
- mdb_engine/cli/commands/show.py +70 -0
- mdb_engine/cli/commands/validate.py +63 -0
- mdb_engine/cli/main.py +41 -0
- mdb_engine/cli/utils.py +92 -0
- mdb_engine/config.py +217 -0
- mdb_engine/constants.py +160 -0
- mdb_engine/core/README.md +542 -0
- mdb_engine/core/__init__.py +42 -0
- mdb_engine/core/app_registration.py +392 -0
- mdb_engine/core/connection.py +243 -0
- mdb_engine/core/engine.py +749 -0
- mdb_engine/core/index_management.py +162 -0
- mdb_engine/core/manifest.py +2793 -0
- mdb_engine/core/seeding.py +179 -0
- mdb_engine/core/service_initialization.py +355 -0
- mdb_engine/core/types.py +413 -0
- mdb_engine/database/README.md +522 -0
- mdb_engine/database/__init__.py +31 -0
- mdb_engine/database/abstraction.py +635 -0
- mdb_engine/database/connection.py +387 -0
- mdb_engine/database/scoped_wrapper.py +1721 -0
- mdb_engine/embeddings/README.md +184 -0
- mdb_engine/embeddings/__init__.py +62 -0
- mdb_engine/embeddings/dependencies.py +193 -0
- mdb_engine/embeddings/service.py +759 -0
- mdb_engine/exceptions.py +167 -0
- mdb_engine/indexes/README.md +651 -0
- mdb_engine/indexes/__init__.py +21 -0
- mdb_engine/indexes/helpers.py +145 -0
- mdb_engine/indexes/manager.py +895 -0
- mdb_engine/memory/README.md +451 -0
- mdb_engine/memory/__init__.py +30 -0
- mdb_engine/memory/service.py +1285 -0
- mdb_engine/observability/README.md +515 -0
- mdb_engine/observability/__init__.py +42 -0
- mdb_engine/observability/health.py +296 -0
- mdb_engine/observability/logging.py +161 -0
- mdb_engine/observability/metrics.py +297 -0
- mdb_engine/routing/README.md +462 -0
- mdb_engine/routing/__init__.py +73 -0
- mdb_engine/routing/websockets.py +813 -0
- mdb_engine/utils/__init__.py +7 -0
- mdb_engine-0.1.6.dist-info/METADATA +213 -0
- mdb_engine-0.1.6.dist-info/RECORD +75 -0
- mdb_engine-0.1.6.dist-info/WHEEL +5 -0
- mdb_engine-0.1.6.dist-info/entry_points.txt +2 -0
- mdb_engine-0.1.6.dist-info/licenses/LICENSE +661 -0
- mdb_engine-0.1.6.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,2793 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Manifest validation and parsing system.
|
|
3
|
+
|
|
4
|
+
This module provides:
|
|
5
|
+
- Multi-version schema support for backward compatibility
|
|
6
|
+
- Schema migration functions for upgrading manifests
|
|
7
|
+
- Optimized validation with caching for scale
|
|
8
|
+
- Parallel manifest processing capabilities
|
|
9
|
+
|
|
10
|
+
This module is part of MDB_ENGINE - MongoDB Engine.
|
|
11
|
+
|
|
12
|
+
SCHEMA VERSIONING STRATEGY
|
|
13
|
+
==========================
|
|
14
|
+
|
|
15
|
+
Versions:
|
|
16
|
+
- 1.0: Initial schema (default for manifests without version field)
|
|
17
|
+
- 2.0: Current schema with all features (auth.policy, auth.users, managed_indexes, etc.)
|
|
18
|
+
|
|
19
|
+
Migration Strategy:
|
|
20
|
+
- Automatically detects schema version from manifest
|
|
21
|
+
- Migrates older versions to current schema if needed
|
|
22
|
+
- Maintains backward compatibility
|
|
23
|
+
- Allows apps to specify target schema version
|
|
24
|
+
|
|
25
|
+
For Scale:
|
|
26
|
+
- Schema validation results are cached
|
|
27
|
+
- Supports parallel manifest processing
|
|
28
|
+
- Lazy schema loading for multiple apps
|
|
29
|
+
- Optimized validation paths for common cases
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
import asyncio
|
|
33
|
+
import hashlib
|
|
34
|
+
import logging
|
|
35
|
+
from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple
|
|
36
|
+
|
|
37
|
+
from jsonschema import SchemaError, ValidationError, validate
|
|
38
|
+
|
|
39
|
+
from ..constants import (CURRENT_SCHEMA_VERSION, DEFAULT_SCHEMA_VERSION,
|
|
40
|
+
MAX_TTL_SECONDS, MAX_VECTOR_DIMENSIONS,
|
|
41
|
+
MIN_TTL_SECONDS, MIN_VECTOR_DIMENSIONS)
|
|
42
|
+
|
|
43
|
+
logger = logging.getLogger(__name__)
|
|
44
|
+
|
|
45
|
+
# Schema registry: maps version -> schema definition
|
|
46
|
+
SCHEMA_REGISTRY: Dict[str, Dict[str, Any]] = {}
|
|
47
|
+
|
|
48
|
+
# Validation cache: maps (manifest_hash, version) -> validation_result
|
|
49
|
+
_validation_cache: Dict[str, Tuple[bool, Optional[str], Optional[List[str]]]] = {}
|
|
50
|
+
_cache_lock = asyncio.Lock()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _convert_tuples_to_lists(obj: Any) -> Any:
|
|
54
|
+
"""
|
|
55
|
+
Recursively convert tuples to lists for JSON schema compatibility.
|
|
56
|
+
|
|
57
|
+
This function normalizes Python data structures to be JSON schema compliant.
|
|
58
|
+
JSON schema expects lists (arrays), but Python code often uses tuples for
|
|
59
|
+
immutable sequences (e.g., index keys: [("field1", 1), ("field2", -1)]).
|
|
60
|
+
|
|
61
|
+
This normalization should happen at the API boundary (e.g., in register_app)
|
|
62
|
+
before validation, keeping the validation layer schema-agnostic.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
obj: Object to convert (dict, list, tuple, or primitive)
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Object with all tuples converted to lists (preserves structure)
|
|
69
|
+
|
|
70
|
+
Example:
|
|
71
|
+
>>> _convert_tuples_to_lists({"keys": [("field1", 1)]})
|
|
72
|
+
{"keys": [["field1", 1]]}
|
|
73
|
+
"""
|
|
74
|
+
if isinstance(obj, tuple):
|
|
75
|
+
return list(obj)
|
|
76
|
+
elif isinstance(obj, dict):
|
|
77
|
+
return {key: _convert_tuples_to_lists(value) for key, value in obj.items()}
|
|
78
|
+
elif isinstance(obj, list):
|
|
79
|
+
return [_convert_tuples_to_lists(item) for item in obj]
|
|
80
|
+
else:
|
|
81
|
+
return obj
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _get_manifest_hash(manifest_data: Dict[str, Any]) -> str:
|
|
85
|
+
"""Generate a hash for manifest caching."""
|
|
86
|
+
import json
|
|
87
|
+
|
|
88
|
+
# Normalize manifest by removing metadata fields that don't affect validation
|
|
89
|
+
normalized = {
|
|
90
|
+
k: v
|
|
91
|
+
for k, v in manifest_data.items()
|
|
92
|
+
if k not in ["_id", "_updated", "_created", "url"]
|
|
93
|
+
}
|
|
94
|
+
normalized_str = json.dumps(normalized, sort_keys=True)
|
|
95
|
+
return hashlib.sha256(normalized_str.encode()).hexdigest()[:16]
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# JSON Schema definition for manifest.json (Version 2.0 - Current)
|
|
99
|
+
MANIFEST_SCHEMA_V2 = {
|
|
100
|
+
"type": "object",
|
|
101
|
+
"properties": {
|
|
102
|
+
"schema_version": {
|
|
103
|
+
"type": "string",
|
|
104
|
+
"pattern": "^\\d+\\.\\d+$",
|
|
105
|
+
"default": "2.0",
|
|
106
|
+
"description": (
|
|
107
|
+
"Schema version for this manifest (format: 'major.minor'). "
|
|
108
|
+
"Defaults to 2.0 if not specified."
|
|
109
|
+
),
|
|
110
|
+
},
|
|
111
|
+
"slug": {
|
|
112
|
+
"type": "string",
|
|
113
|
+
"pattern": "^[a-z0-9_-]+$",
|
|
114
|
+
"description": "App slug (lowercase alphanumeric, underscores, hyphens)",
|
|
115
|
+
},
|
|
116
|
+
"name": {
|
|
117
|
+
"type": "string",
|
|
118
|
+
"minLength": 1,
|
|
119
|
+
"description": "Human-readable app name",
|
|
120
|
+
},
|
|
121
|
+
"description": {"type": "string", "description": "App description"},
|
|
122
|
+
"status": {
|
|
123
|
+
"type": "string",
|
|
124
|
+
"enum": ["active", "draft", "archived", "inactive"],
|
|
125
|
+
"default": "draft",
|
|
126
|
+
"description": "App status",
|
|
127
|
+
},
|
|
128
|
+
"auth_required": {
|
|
129
|
+
"type": "boolean",
|
|
130
|
+
"default": False,
|
|
131
|
+
"description": (
|
|
132
|
+
"Whether authentication is required for this app "
|
|
133
|
+
"(backward compatibility). If auth.policy is provided, "
|
|
134
|
+
"this is ignored."
|
|
135
|
+
),
|
|
136
|
+
},
|
|
137
|
+
"auth": {
|
|
138
|
+
"type": "object",
|
|
139
|
+
"properties": {
|
|
140
|
+
"policy": {
|
|
141
|
+
"type": "object",
|
|
142
|
+
"properties": {
|
|
143
|
+
"required": {
|
|
144
|
+
"type": "boolean",
|
|
145
|
+
"default": True,
|
|
146
|
+
"description": (
|
|
147
|
+
"Whether authentication is required "
|
|
148
|
+
"(default: true). If false, allows anonymous "
|
|
149
|
+
"access but still checks other policies."
|
|
150
|
+
),
|
|
151
|
+
},
|
|
152
|
+
"provider": {
|
|
153
|
+
"type": "string",
|
|
154
|
+
"enum": ["casbin", "oso", "custom"],
|
|
155
|
+
"default": "casbin",
|
|
156
|
+
"description": (
|
|
157
|
+
"Authorization provider to use. 'casbin' "
|
|
158
|
+
"(default) auto-creates Casbin with MongoDB "
|
|
159
|
+
"adapter, 'oso' uses OSO/Polar, 'custom' "
|
|
160
|
+
"expects manual provider setup."
|
|
161
|
+
),
|
|
162
|
+
},
|
|
163
|
+
"authorization": {
|
|
164
|
+
"type": "object",
|
|
165
|
+
"properties": {
|
|
166
|
+
# Casbin-specific properties
|
|
167
|
+
"model": {
|
|
168
|
+
"type": "string",
|
|
169
|
+
"default": "rbac",
|
|
170
|
+
"description": (
|
|
171
|
+
"Casbin model type or path. Use 'rbac' "
|
|
172
|
+
"for default RBAC model, or provide "
|
|
173
|
+
"path to custom model file. Only used "
|
|
174
|
+
"when provider is 'casbin'."
|
|
175
|
+
),
|
|
176
|
+
},
|
|
177
|
+
"policies_collection": {
|
|
178
|
+
"type": "string",
|
|
179
|
+
"pattern": "^[a-zA-Z0-9_]+$",
|
|
180
|
+
"default": "casbin_policies",
|
|
181
|
+
"description": (
|
|
182
|
+
"MongoDB collection name for storing "
|
|
183
|
+
"Casbin policies (default: "
|
|
184
|
+
"'casbin_policies'). Only used when "
|
|
185
|
+
"provider is 'casbin'."
|
|
186
|
+
),
|
|
187
|
+
},
|
|
188
|
+
"link_users_roles": {
|
|
189
|
+
"type": "boolean",
|
|
190
|
+
"default": True,
|
|
191
|
+
"description": (
|
|
192
|
+
"If true, automatically assign Casbin "
|
|
193
|
+
"roles to app-level users when they are "
|
|
194
|
+
"created or updated. Only used when "
|
|
195
|
+
"provider is 'casbin'."
|
|
196
|
+
),
|
|
197
|
+
},
|
|
198
|
+
"default_roles": {
|
|
199
|
+
"type": "array",
|
|
200
|
+
"items": {"type": "string"},
|
|
201
|
+
"description": (
|
|
202
|
+
"List of default roles to create in "
|
|
203
|
+
"Casbin (e.g., ['user', 'admin']). "
|
|
204
|
+
"These roles are created automatically "
|
|
205
|
+
"when the provider is initialized. "
|
|
206
|
+
"Only used when provider is 'casbin'."
|
|
207
|
+
),
|
|
208
|
+
},
|
|
209
|
+
# OSO Cloud-specific properties
|
|
210
|
+
"api_key": {
|
|
211
|
+
"type": ["string", "null"],
|
|
212
|
+
"description": (
|
|
213
|
+
"OSO Cloud API key. If not provided, "
|
|
214
|
+
"reads from OSO_AUTH environment "
|
|
215
|
+
"variable. Only used when provider is "
|
|
216
|
+
"'oso'."
|
|
217
|
+
),
|
|
218
|
+
},
|
|
219
|
+
"url": {
|
|
220
|
+
"type": ["string", "null"],
|
|
221
|
+
"description": (
|
|
222
|
+
"OSO Cloud URL. If not provided, "
|
|
223
|
+
"reads from OSO_URL environment "
|
|
224
|
+
"variable. Only used when provider is "
|
|
225
|
+
"'oso'."
|
|
226
|
+
),
|
|
227
|
+
},
|
|
228
|
+
"initial_roles": {
|
|
229
|
+
"type": "array",
|
|
230
|
+
"items": {
|
|
231
|
+
"type": "object",
|
|
232
|
+
"properties": {
|
|
233
|
+
"user": {
|
|
234
|
+
"type": "string",
|
|
235
|
+
"format": "email",
|
|
236
|
+
},
|
|
237
|
+
"role": {"type": "string"},
|
|
238
|
+
"resource": {
|
|
239
|
+
"type": "string",
|
|
240
|
+
"default": "app",
|
|
241
|
+
},
|
|
242
|
+
},
|
|
243
|
+
"required": ["user", "role"],
|
|
244
|
+
"additionalProperties": False,
|
|
245
|
+
},
|
|
246
|
+
"description": (
|
|
247
|
+
"Initial role assignments to set up in "
|
|
248
|
+
"OSO Cloud on startup. Only used when "
|
|
249
|
+
"provider is 'oso'. Example: "
|
|
250
|
+
'[{"user": "admin@example.com", '
|
|
251
|
+
'"role": "admin"}]'
|
|
252
|
+
),
|
|
253
|
+
},
|
|
254
|
+
"initial_policies": {
|
|
255
|
+
"type": "array",
|
|
256
|
+
"items": {
|
|
257
|
+
"type": "object",
|
|
258
|
+
"properties": {
|
|
259
|
+
"role": {"type": "string"},
|
|
260
|
+
"resource": {
|
|
261
|
+
"type": "string",
|
|
262
|
+
"default": "documents",
|
|
263
|
+
},
|
|
264
|
+
"action": {"type": "string"},
|
|
265
|
+
},
|
|
266
|
+
"required": ["role", "action"],
|
|
267
|
+
"additionalProperties": False,
|
|
268
|
+
},
|
|
269
|
+
"description": (
|
|
270
|
+
"Initial permission policies to set up "
|
|
271
|
+
"in OSO Cloud on startup. Only used "
|
|
272
|
+
"when provider is 'oso'. Example: "
|
|
273
|
+
'[{"role": "admin", "resource": '
|
|
274
|
+
'"documents", "action": "read"}]'
|
|
275
|
+
),
|
|
276
|
+
},
|
|
277
|
+
},
|
|
278
|
+
"additionalProperties": False,
|
|
279
|
+
"description": (
|
|
280
|
+
"Authorization configuration. For Casbin "
|
|
281
|
+
"provider: use 'model', 'policies_collection', "
|
|
282
|
+
"'default_roles'. For OSO Cloud provider: use "
|
|
283
|
+
"'api_key' (or env var), 'url' (or env var), "
|
|
284
|
+
"'initial_roles', 'initial_policies'."
|
|
285
|
+
),
|
|
286
|
+
},
|
|
287
|
+
"allowed_roles": {
|
|
288
|
+
"type": "array",
|
|
289
|
+
"items": {"type": "string"},
|
|
290
|
+
"description": (
|
|
291
|
+
"List of roles that can access this app "
|
|
292
|
+
"(e.g., ['admin', 'developer']). Users must "
|
|
293
|
+
"have at least one of these roles."
|
|
294
|
+
),
|
|
295
|
+
},
|
|
296
|
+
"allowed_users": {
|
|
297
|
+
"type": "array",
|
|
298
|
+
"items": {"type": "string", "format": "email"},
|
|
299
|
+
"description": (
|
|
300
|
+
"List of specific user emails that can access "
|
|
301
|
+
"this app (whitelist). If provided, only "
|
|
302
|
+
"these users can access regardless of roles."
|
|
303
|
+
),
|
|
304
|
+
},
|
|
305
|
+
"denied_users": {
|
|
306
|
+
"type": "array",
|
|
307
|
+
"items": {"type": "string", "format": "email"},
|
|
308
|
+
"description": (
|
|
309
|
+
"List of user emails that are explicitly "
|
|
310
|
+
"denied access (blacklist). Takes precedence "
|
|
311
|
+
"over allowed_users and allowed_roles."
|
|
312
|
+
),
|
|
313
|
+
},
|
|
314
|
+
"required_permissions": {
|
|
315
|
+
"type": "array",
|
|
316
|
+
"items": {"type": "string"},
|
|
317
|
+
"description": (
|
|
318
|
+
"List of required permissions (format: "
|
|
319
|
+
"'resource:action', e.g., ['apps:view', "
|
|
320
|
+
"'apps:manage_own']). User must have all "
|
|
321
|
+
"listed permissions."
|
|
322
|
+
),
|
|
323
|
+
},
|
|
324
|
+
"custom_resource": {
|
|
325
|
+
"type": "string",
|
|
326
|
+
"pattern": "^[a-z0-9_:]+$",
|
|
327
|
+
"description": (
|
|
328
|
+
"Custom Casbin resource name (e.g., "
|
|
329
|
+
"'app:storyweaver'). If not provided, "
|
|
330
|
+
"defaults to 'app:{slug}'."
|
|
331
|
+
),
|
|
332
|
+
},
|
|
333
|
+
"custom_actions": {
|
|
334
|
+
"type": "array",
|
|
335
|
+
"items": {
|
|
336
|
+
"type": "string",
|
|
337
|
+
"enum": ["access", "read", "write", "admin"],
|
|
338
|
+
},
|
|
339
|
+
"description": (
|
|
340
|
+
"Custom actions to check (defaults to "
|
|
341
|
+
"['access']). Used with custom_resource for "
|
|
342
|
+
"fine-grained permission checks."
|
|
343
|
+
),
|
|
344
|
+
},
|
|
345
|
+
"allow_anonymous": {
|
|
346
|
+
"type": "boolean",
|
|
347
|
+
"default": False,
|
|
348
|
+
"description": (
|
|
349
|
+
"If true, allows anonymous (unauthenticated) "
|
|
350
|
+
"access. Only applies if required is false or "
|
|
351
|
+
"not specified."
|
|
352
|
+
),
|
|
353
|
+
},
|
|
354
|
+
"owner_can_access": {
|
|
355
|
+
"type": "boolean",
|
|
356
|
+
"default": True,
|
|
357
|
+
"description": (
|
|
358
|
+
"If true (default), the app owner "
|
|
359
|
+
"(developer_id) can always access the app."
|
|
360
|
+
),
|
|
361
|
+
},
|
|
362
|
+
},
|
|
363
|
+
"additionalProperties": False,
|
|
364
|
+
"description": (
|
|
365
|
+
"Intelligent authorization policy for app-level access "
|
|
366
|
+
"control. Supports role-based, user-based, and "
|
|
367
|
+
"permission-based access. Takes precedence over "
|
|
368
|
+
"auth_required."
|
|
369
|
+
),
|
|
370
|
+
},
|
|
371
|
+
"users": {
|
|
372
|
+
"type": "object",
|
|
373
|
+
"properties": {
|
|
374
|
+
"enabled": {
|
|
375
|
+
"type": "boolean",
|
|
376
|
+
"default": False,
|
|
377
|
+
"description": (
|
|
378
|
+
"Enable app-level user management. When "
|
|
379
|
+
"enabled, app manages its own users separate "
|
|
380
|
+
"from platform users."
|
|
381
|
+
),
|
|
382
|
+
},
|
|
383
|
+
"strategy": {
|
|
384
|
+
"type": "string",
|
|
385
|
+
"enum": [
|
|
386
|
+
"app_users",
|
|
387
|
+
"anonymous_session",
|
|
388
|
+
],
|
|
389
|
+
"default": "app_users",
|
|
390
|
+
"description": (
|
|
391
|
+
"User management strategy. 'app_users' = "
|
|
392
|
+
"app-specific user accounts, "
|
|
393
|
+
"'anonymous_session' = session-based anonymous "
|
|
394
|
+
"users."
|
|
395
|
+
),
|
|
396
|
+
},
|
|
397
|
+
"collection_name": {
|
|
398
|
+
"type": "string",
|
|
399
|
+
"pattern": "^[a-zA-Z0-9_]+$",
|
|
400
|
+
"default": "users",
|
|
401
|
+
"description": (
|
|
402
|
+
"Collection name for app-specific users "
|
|
403
|
+
"(default: 'users'). Will be prefixed with "
|
|
404
|
+
"app slug."
|
|
405
|
+
),
|
|
406
|
+
},
|
|
407
|
+
"session_cookie_name": {
|
|
408
|
+
"type": "string",
|
|
409
|
+
"pattern": "^[a-z0-9_-]+$",
|
|
410
|
+
"default": "app_session",
|
|
411
|
+
"description": (
|
|
412
|
+
"Cookie name for app-specific session "
|
|
413
|
+
"(default: 'app_session'). Will be suffixed "
|
|
414
|
+
"with app slug."
|
|
415
|
+
),
|
|
416
|
+
},
|
|
417
|
+
"session_ttl_seconds": {
|
|
418
|
+
"type": "integer",
|
|
419
|
+
"minimum": 60,
|
|
420
|
+
"default": 86400,
|
|
421
|
+
"description": (
|
|
422
|
+
"Session TTL in seconds (default: 86400 = "
|
|
423
|
+
"24 hours). Used for app-specific sessions."
|
|
424
|
+
),
|
|
425
|
+
},
|
|
426
|
+
"allow_registration": {
|
|
427
|
+
"type": "boolean",
|
|
428
|
+
"default": False,
|
|
429
|
+
"description": (
|
|
430
|
+
"Allow users to self-register in the app "
|
|
431
|
+
"(when strategy is 'app_users')."
|
|
432
|
+
),
|
|
433
|
+
},
|
|
434
|
+
"link_platform_users": {
|
|
435
|
+
"type": "boolean",
|
|
436
|
+
"default": True,
|
|
437
|
+
"description": (
|
|
438
|
+
"Link app users to platform users. Allows platform users "
|
|
439
|
+
"to have app-specific profiles."
|
|
440
|
+
),
|
|
441
|
+
},
|
|
442
|
+
"anonymous_user_prefix": {
|
|
443
|
+
"type": "string",
|
|
444
|
+
"default": "guest",
|
|
445
|
+
"description": (
|
|
446
|
+
"Prefix for anonymous user IDs (default: "
|
|
447
|
+
"'guest'). Used for anonymous_session strategy."
|
|
448
|
+
),
|
|
449
|
+
},
|
|
450
|
+
"user_id_field": {
|
|
451
|
+
"type": "string",
|
|
452
|
+
"default": "app_user_id",
|
|
453
|
+
"description": (
|
|
454
|
+
"Field name in platform user JWT "
|
|
455
|
+
"for storing app user ID "
|
|
456
|
+
"(default: 'app_user_id'). "
|
|
457
|
+
"Used for linking."
|
|
458
|
+
),
|
|
459
|
+
},
|
|
460
|
+
"demo_users": {
|
|
461
|
+
"type": "array",
|
|
462
|
+
"items": {
|
|
463
|
+
"type": "object",
|
|
464
|
+
"properties": {
|
|
465
|
+
"email": {
|
|
466
|
+
"type": "string",
|
|
467
|
+
"format": "email",
|
|
468
|
+
"description": (
|
|
469
|
+
"Email address for demo user "
|
|
470
|
+
"(defaults to platform demo email "
|
|
471
|
+
"if not specified)"
|
|
472
|
+
),
|
|
473
|
+
},
|
|
474
|
+
"password": {
|
|
475
|
+
"type": "string",
|
|
476
|
+
"description": (
|
|
477
|
+
"Password for demo user (defaults "
|
|
478
|
+
"to platform demo password if not "
|
|
479
|
+
"specified, or plain text for demo "
|
|
480
|
+
"purposes)"
|
|
481
|
+
),
|
|
482
|
+
},
|
|
483
|
+
"role": {
|
|
484
|
+
"type": "string",
|
|
485
|
+
"default": "user",
|
|
486
|
+
"description": (
|
|
487
|
+
"Role for demo user in app "
|
|
488
|
+
"(default: 'user')"
|
|
489
|
+
),
|
|
490
|
+
},
|
|
491
|
+
"auto_create": {
|
|
492
|
+
"type": "boolean",
|
|
493
|
+
"default": True,
|
|
494
|
+
"description": (
|
|
495
|
+
"Automatically create this demo "
|
|
496
|
+
"user if it doesn't exist "
|
|
497
|
+
"(default: true)"
|
|
498
|
+
),
|
|
499
|
+
},
|
|
500
|
+
"link_to_platform": {
|
|
501
|
+
"type": "boolean",
|
|
502
|
+
"default": False,
|
|
503
|
+
"description": (
|
|
504
|
+
"Link this demo user to platform "
|
|
505
|
+
"demo user (if platform demo "
|
|
506
|
+
"exists, default: false)"
|
|
507
|
+
),
|
|
508
|
+
},
|
|
509
|
+
"extra_data": {
|
|
510
|
+
"type": "object",
|
|
511
|
+
"description": (
|
|
512
|
+
"Additional data to store with "
|
|
513
|
+
"demo user (e.g., store_id, "
|
|
514
|
+
"preferences, etc.)"
|
|
515
|
+
),
|
|
516
|
+
},
|
|
517
|
+
},
|
|
518
|
+
"required": [],
|
|
519
|
+
},
|
|
520
|
+
"description": (
|
|
521
|
+
"Array of demo users to automatically "
|
|
522
|
+
"create/link for this app. If empty, "
|
|
523
|
+
"automatically uses platform demo user if "
|
|
524
|
+
"available."
|
|
525
|
+
),
|
|
526
|
+
},
|
|
527
|
+
"auto_link_platform_demo": {
|
|
528
|
+
"type": "boolean",
|
|
529
|
+
"default": True,
|
|
530
|
+
"description": (
|
|
531
|
+
"Automatically link platform demo user to "
|
|
532
|
+
"experiment demo user if platform demo exists "
|
|
533
|
+
"(default: true). Works in combination with "
|
|
534
|
+
"link_platform_users and demo_users."
|
|
535
|
+
),
|
|
536
|
+
},
|
|
537
|
+
"demo_user_seed_strategy": {
|
|
538
|
+
"type": "string",
|
|
539
|
+
"enum": ["auto", "manual", "disabled"],
|
|
540
|
+
"default": "auto",
|
|
541
|
+
"description": (
|
|
542
|
+
"Strategy for demo user seeding: 'auto' = "
|
|
543
|
+
"automatically create/link on first access or "
|
|
544
|
+
"actor init, 'manual' = require explicit "
|
|
545
|
+
"creation via API, 'disabled' = no automatic "
|
|
546
|
+
"demo user handling (default: 'auto')"
|
|
547
|
+
),
|
|
548
|
+
},
|
|
549
|
+
"allow_demo_access": {
|
|
550
|
+
"type": "boolean",
|
|
551
|
+
"default": False,
|
|
552
|
+
"description": (
|
|
553
|
+
"Enable automatic demo user access. When "
|
|
554
|
+
"enabled, unauthenticated users are "
|
|
555
|
+
"automatically logged in as demo user, "
|
|
556
|
+
"providing seamless demo experience. "
|
|
557
|
+
"Requires demo users to be configured via "
|
|
558
|
+
"demo_users or auto_link_platform_demo. "
|
|
559
|
+
"(default: false)"
|
|
560
|
+
),
|
|
561
|
+
},
|
|
562
|
+
},
|
|
563
|
+
"additionalProperties": False,
|
|
564
|
+
"description": (
|
|
565
|
+
"App-level user management configuration. Enables apps "
|
|
566
|
+
"to have their own user accounts and sessions "
|
|
567
|
+
"independent of platform authentication."
|
|
568
|
+
),
|
|
569
|
+
},
|
|
570
|
+
},
|
|
571
|
+
"additionalProperties": False,
|
|
572
|
+
"description": (
|
|
573
|
+
"Authentication and authorization configuration. Combines "
|
|
574
|
+
"authorization policy (who can access) and user management "
|
|
575
|
+
"(user accounts and sessions)."
|
|
576
|
+
),
|
|
577
|
+
},
|
|
578
|
+
"token_management": {
|
|
579
|
+
"type": "object",
|
|
580
|
+
"properties": {
|
|
581
|
+
"enabled": {
|
|
582
|
+
"type": "boolean",
|
|
583
|
+
"default": True,
|
|
584
|
+
"description": (
|
|
585
|
+
"Enable enhanced token management features "
|
|
586
|
+
"(refresh tokens, blacklist, sessions). Default: true."
|
|
587
|
+
),
|
|
588
|
+
},
|
|
589
|
+
"access_token_ttl": {
|
|
590
|
+
"type": "integer",
|
|
591
|
+
"minimum": 60,
|
|
592
|
+
"default": 900,
|
|
593
|
+
"description": (
|
|
594
|
+
"Access token TTL in seconds " "(default: 900 = 15 minutes)."
|
|
595
|
+
),
|
|
596
|
+
},
|
|
597
|
+
"refresh_token_ttl": {
|
|
598
|
+
"type": "integer",
|
|
599
|
+
"minimum": 3600,
|
|
600
|
+
"default": 604800,
|
|
601
|
+
"description": (
|
|
602
|
+
"Refresh token TTL in seconds " "(default: 604800 = 7 days)."
|
|
603
|
+
),
|
|
604
|
+
},
|
|
605
|
+
"token_rotation": {
|
|
606
|
+
"type": "boolean",
|
|
607
|
+
"default": True,
|
|
608
|
+
"description": (
|
|
609
|
+
"Enable refresh token rotation "
|
|
610
|
+
"(new refresh token on each use). Default: true."
|
|
611
|
+
),
|
|
612
|
+
},
|
|
613
|
+
"max_sessions_per_user": {
|
|
614
|
+
"type": "integer",
|
|
615
|
+
"minimum": 1,
|
|
616
|
+
"default": 10,
|
|
617
|
+
"description": (
|
|
618
|
+
"Maximum number of concurrent sessions per user "
|
|
619
|
+
"(default: 10)."
|
|
620
|
+
),
|
|
621
|
+
},
|
|
622
|
+
"session_inactivity_timeout": {
|
|
623
|
+
"type": "integer",
|
|
624
|
+
"minimum": 60,
|
|
625
|
+
"default": 1800,
|
|
626
|
+
"description": (
|
|
627
|
+
"Session inactivity timeout in seconds before "
|
|
628
|
+
"automatic cleanup (default: 1800 = 30 minutes)."
|
|
629
|
+
),
|
|
630
|
+
},
|
|
631
|
+
"security": {
|
|
632
|
+
"type": "object",
|
|
633
|
+
"properties": {
|
|
634
|
+
"require_https": {
|
|
635
|
+
"type": "boolean",
|
|
636
|
+
"default": False,
|
|
637
|
+
"description": (
|
|
638
|
+
"Require HTTPS in production "
|
|
639
|
+
"(default: false, auto-detected)."
|
|
640
|
+
),
|
|
641
|
+
},
|
|
642
|
+
"cookie_secure": {
|
|
643
|
+
"type": "string",
|
|
644
|
+
"enum": ["auto", "true", "false"],
|
|
645
|
+
"default": "auto",
|
|
646
|
+
"description": (
|
|
647
|
+
"Secure cookie flag: 'auto' = detect from "
|
|
648
|
+
"request, 'true' = always secure, "
|
|
649
|
+
"'false' = never secure (default: 'auto')."
|
|
650
|
+
),
|
|
651
|
+
},
|
|
652
|
+
"cookie_samesite": {
|
|
653
|
+
"type": "string",
|
|
654
|
+
"enum": ["strict", "lax", "none"],
|
|
655
|
+
"default": "lax",
|
|
656
|
+
"description": "SameSite cookie attribute (default: 'lax').",
|
|
657
|
+
},
|
|
658
|
+
"cookie_httponly": {
|
|
659
|
+
"type": "boolean",
|
|
660
|
+
"default": True,
|
|
661
|
+
"description": ("HttpOnly cookie flag (default: true)."),
|
|
662
|
+
},
|
|
663
|
+
"csrf_protection": {
|
|
664
|
+
"type": "boolean",
|
|
665
|
+
"default": True,
|
|
666
|
+
"description": ("Enable CSRF protection (default: true)."),
|
|
667
|
+
},
|
|
668
|
+
"rate_limiting": {
|
|
669
|
+
"type": "object",
|
|
670
|
+
"properties": {
|
|
671
|
+
"login": {
|
|
672
|
+
"type": "object",
|
|
673
|
+
"properties": {
|
|
674
|
+
"max_attempts": {
|
|
675
|
+
"type": "integer",
|
|
676
|
+
"minimum": 1,
|
|
677
|
+
"default": 5,
|
|
678
|
+
},
|
|
679
|
+
"window_seconds": {
|
|
680
|
+
"type": "integer",
|
|
681
|
+
"minimum": 1,
|
|
682
|
+
"default": 300,
|
|
683
|
+
},
|
|
684
|
+
},
|
|
685
|
+
"additionalProperties": False,
|
|
686
|
+
},
|
|
687
|
+
"register": {
|
|
688
|
+
"type": "object",
|
|
689
|
+
"properties": {
|
|
690
|
+
"max_attempts": {
|
|
691
|
+
"type": "integer",
|
|
692
|
+
"minimum": 1,
|
|
693
|
+
"default": 3,
|
|
694
|
+
},
|
|
695
|
+
"window_seconds": {
|
|
696
|
+
"type": "integer",
|
|
697
|
+
"minimum": 1,
|
|
698
|
+
"default": 600,
|
|
699
|
+
},
|
|
700
|
+
},
|
|
701
|
+
"additionalProperties": False,
|
|
702
|
+
},
|
|
703
|
+
"refresh": {
|
|
704
|
+
"type": "object",
|
|
705
|
+
"properties": {
|
|
706
|
+
"max_attempts": {
|
|
707
|
+
"type": "integer",
|
|
708
|
+
"minimum": 1,
|
|
709
|
+
"default": 10,
|
|
710
|
+
},
|
|
711
|
+
"window_seconds": {
|
|
712
|
+
"type": "integer",
|
|
713
|
+
"minimum": 1,
|
|
714
|
+
"default": 60,
|
|
715
|
+
},
|
|
716
|
+
},
|
|
717
|
+
"additionalProperties": False,
|
|
718
|
+
},
|
|
719
|
+
},
|
|
720
|
+
"additionalProperties": False,
|
|
721
|
+
"description": (
|
|
722
|
+
"Rate limiting configuration per endpoint type."
|
|
723
|
+
),
|
|
724
|
+
},
|
|
725
|
+
"password_policy": {
|
|
726
|
+
"type": "object",
|
|
727
|
+
"properties": {
|
|
728
|
+
"allow_plain_text": {
|
|
729
|
+
"type": "boolean",
|
|
730
|
+
"default": False,
|
|
731
|
+
"description": (
|
|
732
|
+
"Allow plain text passwords "
|
|
733
|
+
"(NOT recommended, default: false)"
|
|
734
|
+
),
|
|
735
|
+
},
|
|
736
|
+
"min_length": {
|
|
737
|
+
"type": "integer",
|
|
738
|
+
"minimum": 1,
|
|
739
|
+
"default": 8,
|
|
740
|
+
"description": "Minimum password length (default: 8)",
|
|
741
|
+
},
|
|
742
|
+
"require_uppercase": {
|
|
743
|
+
"type": "boolean",
|
|
744
|
+
"default": True,
|
|
745
|
+
"description": "Require uppercase letters (default: true)",
|
|
746
|
+
},
|
|
747
|
+
"require_lowercase": {
|
|
748
|
+
"type": "boolean",
|
|
749
|
+
"default": True,
|
|
750
|
+
"description": (
|
|
751
|
+
"Require lowercase letters " "(default: true)"
|
|
752
|
+
),
|
|
753
|
+
},
|
|
754
|
+
"require_numbers": {
|
|
755
|
+
"type": "boolean",
|
|
756
|
+
"default": True,
|
|
757
|
+
"description": ("Require numbers (default: true)"),
|
|
758
|
+
},
|
|
759
|
+
"require_special": {
|
|
760
|
+
"type": "boolean",
|
|
761
|
+
"default": False,
|
|
762
|
+
"description": (
|
|
763
|
+
"Require special characters " "(default: false)"
|
|
764
|
+
),
|
|
765
|
+
},
|
|
766
|
+
},
|
|
767
|
+
"additionalProperties": False,
|
|
768
|
+
"description": ("Password policy configuration"),
|
|
769
|
+
},
|
|
770
|
+
"session_fingerprinting": {
|
|
771
|
+
"type": "object",
|
|
772
|
+
"properties": {
|
|
773
|
+
"enabled": {
|
|
774
|
+
"type": "boolean",
|
|
775
|
+
"default": True,
|
|
776
|
+
"description": (
|
|
777
|
+
"Enable session fingerprinting "
|
|
778
|
+
"(default: true)"
|
|
779
|
+
),
|
|
780
|
+
},
|
|
781
|
+
"validate_on_login": {
|
|
782
|
+
"type": "boolean",
|
|
783
|
+
"default": True,
|
|
784
|
+
"description": (
|
|
785
|
+
"Validate fingerprint on login "
|
|
786
|
+
"(default: true)"
|
|
787
|
+
),
|
|
788
|
+
},
|
|
789
|
+
"validate_on_refresh": {
|
|
790
|
+
"type": "boolean",
|
|
791
|
+
"default": True,
|
|
792
|
+
"description": (
|
|
793
|
+
"Validate fingerprint on token refresh "
|
|
794
|
+
"(default: true)"
|
|
795
|
+
),
|
|
796
|
+
},
|
|
797
|
+
"validate_on_request": {
|
|
798
|
+
"type": "boolean",
|
|
799
|
+
"default": False,
|
|
800
|
+
"description": (
|
|
801
|
+
"Validate fingerprint on every request "
|
|
802
|
+
"(default: false, may impact performance)"
|
|
803
|
+
),
|
|
804
|
+
},
|
|
805
|
+
"strict_mode": {
|
|
806
|
+
"type": "boolean",
|
|
807
|
+
"default": False,
|
|
808
|
+
"description": (
|
|
809
|
+
"Strict mode: reject requests if "
|
|
810
|
+
"fingerprint doesn't match "
|
|
811
|
+
"(default: false)"
|
|
812
|
+
),
|
|
813
|
+
},
|
|
814
|
+
},
|
|
815
|
+
"additionalProperties": False,
|
|
816
|
+
"description": "Session fingerprinting configuration for security",
|
|
817
|
+
},
|
|
818
|
+
"account_lockout": {
|
|
819
|
+
"type": "object",
|
|
820
|
+
"properties": {
|
|
821
|
+
"enabled": {
|
|
822
|
+
"type": "boolean",
|
|
823
|
+
"default": True,
|
|
824
|
+
"description": "Enable account lockout (default: true)",
|
|
825
|
+
},
|
|
826
|
+
"max_failed_attempts": {
|
|
827
|
+
"type": "integer",
|
|
828
|
+
"minimum": 1,
|
|
829
|
+
"default": 5,
|
|
830
|
+
"description": (
|
|
831
|
+
"Maximum failed login attempts before "
|
|
832
|
+
"lockout (default: 5)"
|
|
833
|
+
),
|
|
834
|
+
},
|
|
835
|
+
"lockout_duration_seconds": {
|
|
836
|
+
"type": "integer",
|
|
837
|
+
"minimum": 1,
|
|
838
|
+
"default": 900,
|
|
839
|
+
"description": (
|
|
840
|
+
"Lockout duration in seconds "
|
|
841
|
+
"(default: 900 = 15 minutes)"
|
|
842
|
+
),
|
|
843
|
+
},
|
|
844
|
+
"reset_on_success": {
|
|
845
|
+
"type": "boolean",
|
|
846
|
+
"default": True,
|
|
847
|
+
"description": (
|
|
848
|
+
"Reset failed attempts counter on "
|
|
849
|
+
"successful login (default: true)"
|
|
850
|
+
),
|
|
851
|
+
},
|
|
852
|
+
},
|
|
853
|
+
"additionalProperties": False,
|
|
854
|
+
"description": "Account lockout configuration",
|
|
855
|
+
},
|
|
856
|
+
"ip_validation": {
|
|
857
|
+
"type": "object",
|
|
858
|
+
"properties": {
|
|
859
|
+
"enabled": {
|
|
860
|
+
"type": "boolean",
|
|
861
|
+
"default": False,
|
|
862
|
+
"description": (
|
|
863
|
+
"Enable IP address validation "
|
|
864
|
+
"(default: false)"
|
|
865
|
+
),
|
|
866
|
+
},
|
|
867
|
+
"strict": {
|
|
868
|
+
"type": "boolean",
|
|
869
|
+
"default": False,
|
|
870
|
+
"description": (
|
|
871
|
+
"Strict mode: reject requests if IP "
|
|
872
|
+
"changes (default: false)"
|
|
873
|
+
),
|
|
874
|
+
},
|
|
875
|
+
"allow_ip_change": {
|
|
876
|
+
"type": "boolean",
|
|
877
|
+
"default": True,
|
|
878
|
+
"description": (
|
|
879
|
+
"Allow IP address changes during session "
|
|
880
|
+
"(default: true)"
|
|
881
|
+
),
|
|
882
|
+
},
|
|
883
|
+
},
|
|
884
|
+
"additionalProperties": False,
|
|
885
|
+
"description": "IP address validation configuration",
|
|
886
|
+
},
|
|
887
|
+
"token_fingerprinting": {
|
|
888
|
+
"type": "object",
|
|
889
|
+
"properties": {
|
|
890
|
+
"enabled": {
|
|
891
|
+
"type": "boolean",
|
|
892
|
+
"default": True,
|
|
893
|
+
"description": (
|
|
894
|
+
"Enable token fingerprinting " "(default: true)"
|
|
895
|
+
),
|
|
896
|
+
},
|
|
897
|
+
"bind_to_device": {
|
|
898
|
+
"type": "boolean",
|
|
899
|
+
"default": True,
|
|
900
|
+
"description": (
|
|
901
|
+
"Bind tokens to device ID " "(default: true)"
|
|
902
|
+
),
|
|
903
|
+
},
|
|
904
|
+
},
|
|
905
|
+
"additionalProperties": False,
|
|
906
|
+
"description": ("Token fingerprinting configuration"),
|
|
907
|
+
},
|
|
908
|
+
},
|
|
909
|
+
"additionalProperties": False,
|
|
910
|
+
"description": ("Security settings for token management."),
|
|
911
|
+
},
|
|
912
|
+
"auto_setup": {
|
|
913
|
+
"type": "boolean",
|
|
914
|
+
"default": True,
|
|
915
|
+
"description": (
|
|
916
|
+
"Automatically set up token management on app startup "
|
|
917
|
+
"(default: true)."
|
|
918
|
+
),
|
|
919
|
+
},
|
|
920
|
+
},
|
|
921
|
+
"additionalProperties": False,
|
|
922
|
+
"description": "Token management configuration for enhanced authentication features.",
|
|
923
|
+
},
|
|
924
|
+
"data_scope": {
|
|
925
|
+
"type": "array",
|
|
926
|
+
"items": {"type": "string"},
|
|
927
|
+
"minItems": 1,
|
|
928
|
+
"default": ["self"],
|
|
929
|
+
"description": "List of app slugs whose data this app can access",
|
|
930
|
+
},
|
|
931
|
+
"pip_deps": {
|
|
932
|
+
"type": "array",
|
|
933
|
+
"items": {"type": "string"},
|
|
934
|
+
"description": "List of pip dependencies for isolated environment",
|
|
935
|
+
},
|
|
936
|
+
"managed_indexes": {
|
|
937
|
+
"type": "object",
|
|
938
|
+
"patternProperties": {
|
|
939
|
+
"^[a-zA-Z0-9_]+$": {
|
|
940
|
+
"type": "array",
|
|
941
|
+
"items": {"$ref": "#/definitions/indexDefinition"},
|
|
942
|
+
"minItems": 1,
|
|
943
|
+
}
|
|
944
|
+
},
|
|
945
|
+
"description": "Collection name -> list of index definitions",
|
|
946
|
+
},
|
|
947
|
+
"collection_settings": {
|
|
948
|
+
"type": "object",
|
|
949
|
+
"patternProperties": {
|
|
950
|
+
"^[a-zA-Z0-9_]+$": {"$ref": "#/definitions/collectionSettings"}
|
|
951
|
+
},
|
|
952
|
+
"description": "Collection name -> collection settings",
|
|
953
|
+
},
|
|
954
|
+
"websockets": {
|
|
955
|
+
"type": "object",
|
|
956
|
+
"patternProperties": {
|
|
957
|
+
"^[a-zA-Z0-9_-]+$": {
|
|
958
|
+
"type": "object",
|
|
959
|
+
"properties": {
|
|
960
|
+
"path": {
|
|
961
|
+
"type": "string",
|
|
962
|
+
"pattern": "^/[a-zA-Z0-9_/-]+$",
|
|
963
|
+
"description": (
|
|
964
|
+
"WebSocket path (e.g., '/ws', '/events', "
|
|
965
|
+
"'/realtime'). Must start with '/'. "
|
|
966
|
+
"Routes are automatically registered."
|
|
967
|
+
),
|
|
968
|
+
},
|
|
969
|
+
"auth": {
|
|
970
|
+
"type": "object",
|
|
971
|
+
"properties": {
|
|
972
|
+
"required": {
|
|
973
|
+
"type": "boolean",
|
|
974
|
+
"default": True,
|
|
975
|
+
"description": (
|
|
976
|
+
"Whether authentication is required "
|
|
977
|
+
"(default: true). Uses app's auth.policy "
|
|
978
|
+
"if not specified."
|
|
979
|
+
),
|
|
980
|
+
},
|
|
981
|
+
"allow_anonymous": {
|
|
982
|
+
"type": "boolean",
|
|
983
|
+
"default": False,
|
|
984
|
+
"description": (
|
|
985
|
+
"Allow anonymous connections even if "
|
|
986
|
+
"auth is required (default: false)"
|
|
987
|
+
),
|
|
988
|
+
},
|
|
989
|
+
},
|
|
990
|
+
"additionalProperties": False,
|
|
991
|
+
"description": (
|
|
992
|
+
"Authentication configuration. If not specified, "
|
|
993
|
+
"uses app's auth.policy settings."
|
|
994
|
+
),
|
|
995
|
+
},
|
|
996
|
+
"description": {
|
|
997
|
+
"type": "string",
|
|
998
|
+
"description": (
|
|
999
|
+
"Description of what this WebSocket endpoint "
|
|
1000
|
+
"is used for"
|
|
1001
|
+
),
|
|
1002
|
+
},
|
|
1003
|
+
"ping_interval": {
|
|
1004
|
+
"type": "integer",
|
|
1005
|
+
"minimum": 5,
|
|
1006
|
+
"maximum": 300,
|
|
1007
|
+
"default": 30,
|
|
1008
|
+
"description": (
|
|
1009
|
+
"Ping interval in seconds to keep connection "
|
|
1010
|
+
"alive (default: 30, min: 5, max: 300)"
|
|
1011
|
+
),
|
|
1012
|
+
},
|
|
1013
|
+
},
|
|
1014
|
+
"required": ["path"],
|
|
1015
|
+
"additionalProperties": False,
|
|
1016
|
+
"description": (
|
|
1017
|
+
"WebSocket endpoint configuration. Each endpoint is "
|
|
1018
|
+
"automatically isolated to this app. Only 'path' is "
|
|
1019
|
+
"required - all other settings have sensible defaults."
|
|
1020
|
+
),
|
|
1021
|
+
}
|
|
1022
|
+
},
|
|
1023
|
+
"description": (
|
|
1024
|
+
"WebSocket endpoints configuration. Super simple setup - "
|
|
1025
|
+
"just specify the path! Each endpoint is automatically "
|
|
1026
|
+
"scoped and isolated to this app. Key is the endpoint name "
|
|
1027
|
+
"(e.g., 'realtime', 'events'), value contains path and "
|
|
1028
|
+
"optional settings. Routes are automatically registered with "
|
|
1029
|
+
"FastAPI during app registration."
|
|
1030
|
+
),
|
|
1031
|
+
},
|
|
1032
|
+
"embedding_config": {
|
|
1033
|
+
"type": "object",
|
|
1034
|
+
"properties": {
|
|
1035
|
+
"enabled": {
|
|
1036
|
+
"type": "boolean",
|
|
1037
|
+
"default": False,
|
|
1038
|
+
"description": (
|
|
1039
|
+
"Enable semantic text splitting and embedding service. "
|
|
1040
|
+
"When enabled, EmbeddingService will be available for "
|
|
1041
|
+
"chunking text and generating embeddings."
|
|
1042
|
+
),
|
|
1043
|
+
},
|
|
1044
|
+
"max_tokens_per_chunk": {
|
|
1045
|
+
"type": "integer",
|
|
1046
|
+
"minimum": 100,
|
|
1047
|
+
"maximum": 10000,
|
|
1048
|
+
"default": 1000,
|
|
1049
|
+
"description": (
|
|
1050
|
+
"Maximum tokens per chunk when splitting text. The "
|
|
1051
|
+
"semantic-text-splitter ensures chunks never exceed "
|
|
1052
|
+
"this limit while preserving semantic boundaries."
|
|
1053
|
+
),
|
|
1054
|
+
},
|
|
1055
|
+
"tokenizer_model": {
|
|
1056
|
+
"type": "string",
|
|
1057
|
+
"default": "gpt-3.5-turbo",
|
|
1058
|
+
"description": (
|
|
1059
|
+
"Optional: Tokenizer model name for counting tokens "
|
|
1060
|
+
"during chunking (e.g., 'gpt-3.5-turbo', 'gpt-4', "
|
|
1061
|
+
"'gpt-4o'). Must be a valid OpenAI model name. "
|
|
1062
|
+
"Defaults to 'gpt-3.5-turbo' (uses cl100k_base "
|
|
1063
|
+
"encoding internally, which works for GPT-3.5, GPT-4, "
|
|
1064
|
+
"and most models). This is ONLY for token counting, "
|
|
1065
|
+
"NOT for embeddings. You typically don't need to set "
|
|
1066
|
+
"this - the platform default works for most cases."
|
|
1067
|
+
),
|
|
1068
|
+
},
|
|
1069
|
+
"default_embedding_model": {
|
|
1070
|
+
"type": "string",
|
|
1071
|
+
"default": "text-embedding-3-small",
|
|
1072
|
+
"description": (
|
|
1073
|
+
"Default embedding model for chunk embeddings "
|
|
1074
|
+
"(e.g., 'text-embedding-3-small', "
|
|
1075
|
+
"'text-embedding-ada-002'). Examples should implement "
|
|
1076
|
+
"their own embedding clients."
|
|
1077
|
+
),
|
|
1078
|
+
},
|
|
1079
|
+
},
|
|
1080
|
+
"additionalProperties": False,
|
|
1081
|
+
"description": (
|
|
1082
|
+
"Semantic text splitting and embedding configuration. "
|
|
1083
|
+
"Enables intelligent chunking with Rust-based "
|
|
1084
|
+
"semantic-text-splitter. Examples should implement their own "
|
|
1085
|
+
"embedding clients. Perfect for RAG (Retrieval Augmented "
|
|
1086
|
+
"Generation) applications."
|
|
1087
|
+
),
|
|
1088
|
+
},
|
|
1089
|
+
"memory_config": {
|
|
1090
|
+
"type": "object",
|
|
1091
|
+
"properties": {
|
|
1092
|
+
"enabled": {
|
|
1093
|
+
"type": "boolean",
|
|
1094
|
+
"default": False,
|
|
1095
|
+
"description": (
|
|
1096
|
+
"Enable Mem0 memory service for this app. When "
|
|
1097
|
+
"enabled, Mem0MemoryService will be initialized and "
|
|
1098
|
+
"available for intelligent memory management using "
|
|
1099
|
+
"MongoDB as the vector store. mem0 handles embeddings "
|
|
1100
|
+
"and LLM via environment variables (.env)."
|
|
1101
|
+
),
|
|
1102
|
+
},
|
|
1103
|
+
"collection_name": {
|
|
1104
|
+
"type": "string",
|
|
1105
|
+
"pattern": "^[a-zA-Z0-9_]+$",
|
|
1106
|
+
"description": (
|
|
1107
|
+
"MongoDB collection name for storing memories "
|
|
1108
|
+
"(defaults to '{app_slug}_memories'). Will be prefixed "
|
|
1109
|
+
"with app slug if not already prefixed."
|
|
1110
|
+
),
|
|
1111
|
+
},
|
|
1112
|
+
"embedding_model_dims": {
|
|
1113
|
+
"type": "integer",
|
|
1114
|
+
"minimum": 128,
|
|
1115
|
+
"maximum": 4096,
|
|
1116
|
+
"default": 1536,
|
|
1117
|
+
"description": (
|
|
1118
|
+
"Dimensions of the embedding vectors (OPTIONAL - "
|
|
1119
|
+
"auto-detected by embedding a test string). Only "
|
|
1120
|
+
"specify if you need to override auto-detection. "
|
|
1121
|
+
"Default: 1536. The system will automatically detect "
|
|
1122
|
+
"the correct dimensions from your embedding model."
|
|
1123
|
+
),
|
|
1124
|
+
},
|
|
1125
|
+
"enable_graph": {
|
|
1126
|
+
"type": "boolean",
|
|
1127
|
+
"default": False,
|
|
1128
|
+
"description": (
|
|
1129
|
+
"Enable knowledge graph construction for entity "
|
|
1130
|
+
"relationships. When enabled, Mem0 will build a graph "
|
|
1131
|
+
"of connected entities from memories."
|
|
1132
|
+
),
|
|
1133
|
+
},
|
|
1134
|
+
"infer": {
|
|
1135
|
+
"type": "boolean",
|
|
1136
|
+
"default": True,
|
|
1137
|
+
"description": (
|
|
1138
|
+
"Whether to infer memories from conversations "
|
|
1139
|
+
"(default: true). If false, stores messages as-is "
|
|
1140
|
+
"without inference. Requires LLM configured via "
|
|
1141
|
+
"environment variables if true."
|
|
1142
|
+
),
|
|
1143
|
+
},
|
|
1144
|
+
"embedding_model": {
|
|
1145
|
+
"type": "string",
|
|
1146
|
+
"description": (
|
|
1147
|
+
"Embedding model name (e.g., 'text-embedding-3-small'). "
|
|
1148
|
+
"If not provided, mem0 will use environment variables "
|
|
1149
|
+
"or defaults."
|
|
1150
|
+
),
|
|
1151
|
+
},
|
|
1152
|
+
"chat_model": {
|
|
1153
|
+
"type": "string",
|
|
1154
|
+
"description": (
|
|
1155
|
+
"Chat model name for inference (e.g., 'gpt-4o'). "
|
|
1156
|
+
"If not provided, mem0 will use environment variables "
|
|
1157
|
+
"or defaults."
|
|
1158
|
+
),
|
|
1159
|
+
},
|
|
1160
|
+
"temperature": {
|
|
1161
|
+
"type": "number",
|
|
1162
|
+
"minimum": 0.0,
|
|
1163
|
+
"maximum": 2.0,
|
|
1164
|
+
"default": 0.0,
|
|
1165
|
+
"description": (
|
|
1166
|
+
"Temperature for LLM inference "
|
|
1167
|
+
"(0.0 = deterministic, 2.0 = creative). "
|
|
1168
|
+
"Only used if infer=true."
|
|
1169
|
+
),
|
|
1170
|
+
},
|
|
1171
|
+
"async_mode": {
|
|
1172
|
+
"type": "boolean",
|
|
1173
|
+
"default": True,
|
|
1174
|
+
"description": (
|
|
1175
|
+
"Whether to process memories asynchronously "
|
|
1176
|
+
"(default: true). Enables better performance for "
|
|
1177
|
+
"memory ingestion."
|
|
1178
|
+
),
|
|
1179
|
+
},
|
|
1180
|
+
},
|
|
1181
|
+
"additionalProperties": False,
|
|
1182
|
+
"description": (
|
|
1183
|
+
"Mem0 memory service configuration. Enables intelligent memory "
|
|
1184
|
+
"management that automatically extracts, stores, and retrieves "
|
|
1185
|
+
"user memories. Uses MongoDB as the vector store (native "
|
|
1186
|
+
"integration with mdb-engine). mem0 handles embeddings and LLM "
|
|
1187
|
+
"via environment variables (.env). Configure "
|
|
1188
|
+
"AZURE_OPENAI_API_KEY/AZURE_OPENAI_ENDPOINT or OPENAI_API_KEY "
|
|
1189
|
+
"in your .env file."
|
|
1190
|
+
),
|
|
1191
|
+
},
|
|
1192
|
+
"cors": {
|
|
1193
|
+
"type": "object",
|
|
1194
|
+
"properties": {
|
|
1195
|
+
"enabled": {
|
|
1196
|
+
"type": "boolean",
|
|
1197
|
+
"default": False,
|
|
1198
|
+
"description": "Enable CORS for this app (default: false)",
|
|
1199
|
+
},
|
|
1200
|
+
"allow_origins": {
|
|
1201
|
+
"type": "array",
|
|
1202
|
+
"items": {"type": "string"},
|
|
1203
|
+
"default": ["*"],
|
|
1204
|
+
"description": (
|
|
1205
|
+
"List of allowed origins (use ['*'] for all origins, "
|
|
1206
|
+
"not recommended for production)"
|
|
1207
|
+
),
|
|
1208
|
+
},
|
|
1209
|
+
"allow_credentials": {
|
|
1210
|
+
"type": "boolean",
|
|
1211
|
+
"default": False,
|
|
1212
|
+
"description": (
|
|
1213
|
+
"Allow credentials (cookies, authorization headers) "
|
|
1214
|
+
"in CORS requests"
|
|
1215
|
+
),
|
|
1216
|
+
},
|
|
1217
|
+
"allow_methods": {
|
|
1218
|
+
"type": "array",
|
|
1219
|
+
"items": {
|
|
1220
|
+
"type": "string",
|
|
1221
|
+
"enum": [
|
|
1222
|
+
"GET",
|
|
1223
|
+
"POST",
|
|
1224
|
+
"PUT",
|
|
1225
|
+
"DELETE",
|
|
1226
|
+
"PATCH",
|
|
1227
|
+
"OPTIONS",
|
|
1228
|
+
"HEAD",
|
|
1229
|
+
"*",
|
|
1230
|
+
],
|
|
1231
|
+
},
|
|
1232
|
+
"default": ["GET", "POST", "PUT", "DELETE", "PATCH"],
|
|
1233
|
+
"description": (
|
|
1234
|
+
"List of allowed HTTP methods. Use ['*'] to allow all "
|
|
1235
|
+
"methods (not recommended for production)"
|
|
1236
|
+
),
|
|
1237
|
+
},
|
|
1238
|
+
"allow_headers": {
|
|
1239
|
+
"type": "array",
|
|
1240
|
+
"items": {"type": "string"},
|
|
1241
|
+
"default": ["*"],
|
|
1242
|
+
"description": "List of allowed headers (use ['*'] for all headers)",
|
|
1243
|
+
},
|
|
1244
|
+
"expose_headers": {
|
|
1245
|
+
"type": "array",
|
|
1246
|
+
"items": {"type": "string"},
|
|
1247
|
+
"description": "List of headers to expose to the client",
|
|
1248
|
+
},
|
|
1249
|
+
"max_age": {
|
|
1250
|
+
"type": "integer",
|
|
1251
|
+
"minimum": 0,
|
|
1252
|
+
"default": 3600,
|
|
1253
|
+
"description": "Max age for preflight requests in seconds (default: 3600)",
|
|
1254
|
+
},
|
|
1255
|
+
},
|
|
1256
|
+
"additionalProperties": False,
|
|
1257
|
+
"description": "CORS (Cross-Origin Resource Sharing) configuration for web apps",
|
|
1258
|
+
},
|
|
1259
|
+
"observability": {
|
|
1260
|
+
"type": "object",
|
|
1261
|
+
"properties": {
|
|
1262
|
+
"health_checks": {
|
|
1263
|
+
"type": "object",
|
|
1264
|
+
"properties": {
|
|
1265
|
+
"enabled": {
|
|
1266
|
+
"type": "boolean",
|
|
1267
|
+
"default": True,
|
|
1268
|
+
"description": "Enable health check endpoint (default: true)",
|
|
1269
|
+
},
|
|
1270
|
+
"endpoint": {
|
|
1271
|
+
"type": "string",
|
|
1272
|
+
"pattern": "^/[a-zA-Z0-9_/-]*$",
|
|
1273
|
+
"default": "/health",
|
|
1274
|
+
"description": "Health check endpoint path (default: '/health')",
|
|
1275
|
+
},
|
|
1276
|
+
"interval_seconds": {
|
|
1277
|
+
"type": "integer",
|
|
1278
|
+
"minimum": 5,
|
|
1279
|
+
"default": 30,
|
|
1280
|
+
"description": "Health check interval in seconds (default: 30)",
|
|
1281
|
+
},
|
|
1282
|
+
},
|
|
1283
|
+
"additionalProperties": False,
|
|
1284
|
+
"description": "Health check configuration",
|
|
1285
|
+
},
|
|
1286
|
+
"metrics": {
|
|
1287
|
+
"type": "object",
|
|
1288
|
+
"properties": {
|
|
1289
|
+
"enabled": {
|
|
1290
|
+
"type": "boolean",
|
|
1291
|
+
"default": True,
|
|
1292
|
+
"description": "Enable metrics collection (default: true)",
|
|
1293
|
+
},
|
|
1294
|
+
"collect_operation_metrics": {
|
|
1295
|
+
"type": "boolean",
|
|
1296
|
+
"default": True,
|
|
1297
|
+
"description": (
|
|
1298
|
+
"Collect operation-level metrics "
|
|
1299
|
+
"(duration, errors, etc.)"
|
|
1300
|
+
),
|
|
1301
|
+
},
|
|
1302
|
+
"collect_performance_metrics": {
|
|
1303
|
+
"type": "boolean",
|
|
1304
|
+
"default": True,
|
|
1305
|
+
"description": "Collect performance metrics (memory, CPU, etc.)",
|
|
1306
|
+
},
|
|
1307
|
+
"custom_metrics": {
|
|
1308
|
+
"type": "array",
|
|
1309
|
+
"items": {"type": "string"},
|
|
1310
|
+
"description": "List of custom metric names to track",
|
|
1311
|
+
},
|
|
1312
|
+
},
|
|
1313
|
+
"additionalProperties": False,
|
|
1314
|
+
"description": "Metrics collection configuration",
|
|
1315
|
+
},
|
|
1316
|
+
"logging": {
|
|
1317
|
+
"type": "object",
|
|
1318
|
+
"properties": {
|
|
1319
|
+
"level": {
|
|
1320
|
+
"type": "string",
|
|
1321
|
+
"enum": ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
|
1322
|
+
"default": "INFO",
|
|
1323
|
+
"description": "Logging level (default: 'INFO')",
|
|
1324
|
+
},
|
|
1325
|
+
"format": {
|
|
1326
|
+
"type": "string",
|
|
1327
|
+
"enum": ["json", "text"],
|
|
1328
|
+
"default": "json",
|
|
1329
|
+
"description": "Log format (default: 'json')",
|
|
1330
|
+
},
|
|
1331
|
+
"include_request_id": {
|
|
1332
|
+
"type": "boolean",
|
|
1333
|
+
"default": True,
|
|
1334
|
+
"description": "Include request ID in logs (default: true)",
|
|
1335
|
+
},
|
|
1336
|
+
"log_sensitive_data": {
|
|
1337
|
+
"type": "boolean",
|
|
1338
|
+
"default": False,
|
|
1339
|
+
"description": (
|
|
1340
|
+
"Log sensitive data (passwords, tokens, etc.) - "
|
|
1341
|
+
"NOT recommended for production (default: false)"
|
|
1342
|
+
),
|
|
1343
|
+
},
|
|
1344
|
+
},
|
|
1345
|
+
"additionalProperties": False,
|
|
1346
|
+
"description": "Logging configuration",
|
|
1347
|
+
},
|
|
1348
|
+
},
|
|
1349
|
+
"additionalProperties": False,
|
|
1350
|
+
"description": "Observability configuration (health checks, metrics, logging)",
|
|
1351
|
+
},
|
|
1352
|
+
"initial_data": {
|
|
1353
|
+
"type": "object",
|
|
1354
|
+
"patternProperties": {
|
|
1355
|
+
"^[a-zA-Z0-9_]+$": {
|
|
1356
|
+
"type": "array",
|
|
1357
|
+
"items": {"type": "object"},
|
|
1358
|
+
"description": "Collection name -> array of documents to seed",
|
|
1359
|
+
}
|
|
1360
|
+
},
|
|
1361
|
+
"description": (
|
|
1362
|
+
"Initial data to seed into collections. Only seeds if "
|
|
1363
|
+
"collection is empty (idempotent). Each key is a collection "
|
|
1364
|
+
"name, value is an array of documents to insert."
|
|
1365
|
+
),
|
|
1366
|
+
},
|
|
1367
|
+
"developer_id": {
|
|
1368
|
+
"type": "string",
|
|
1369
|
+
"format": "email",
|
|
1370
|
+
"description": "Email of the developer who owns this app",
|
|
1371
|
+
},
|
|
1372
|
+
},
|
|
1373
|
+
"required": ["slug", "name"],
|
|
1374
|
+
"definitions": {
|
|
1375
|
+
"indexDefinition": {
|
|
1376
|
+
"type": "object",
|
|
1377
|
+
"properties": {
|
|
1378
|
+
"name": {
|
|
1379
|
+
"type": "string",
|
|
1380
|
+
"pattern": "^[a-zA-Z0-9_]+$",
|
|
1381
|
+
"minLength": 1,
|
|
1382
|
+
"description": "Base index name (will be prefixed with slug)",
|
|
1383
|
+
},
|
|
1384
|
+
"type": {
|
|
1385
|
+
"type": "string",
|
|
1386
|
+
"enum": [
|
|
1387
|
+
"regular",
|
|
1388
|
+
"vectorSearch",
|
|
1389
|
+
"search",
|
|
1390
|
+
"text",
|
|
1391
|
+
"geospatial",
|
|
1392
|
+
"ttl",
|
|
1393
|
+
"partial",
|
|
1394
|
+
"hybrid",
|
|
1395
|
+
],
|
|
1396
|
+
"description": (
|
|
1397
|
+
"Index type. 'hybrid' creates both vector and text "
|
|
1398
|
+
"indexes for hybrid search with $rankFusion."
|
|
1399
|
+
),
|
|
1400
|
+
},
|
|
1401
|
+
"keys": {
|
|
1402
|
+
"oneOf": [
|
|
1403
|
+
{
|
|
1404
|
+
"type": "object",
|
|
1405
|
+
"patternProperties": {
|
|
1406
|
+
"^[a-zA-Z0-9_.]+$": {
|
|
1407
|
+
"oneOf": [
|
|
1408
|
+
{"type": "integer", "enum": [1, -1]},
|
|
1409
|
+
{
|
|
1410
|
+
"type": "string",
|
|
1411
|
+
"enum": [
|
|
1412
|
+
"text",
|
|
1413
|
+
"2dsphere",
|
|
1414
|
+
"2d",
|
|
1415
|
+
"geoHaystack",
|
|
1416
|
+
"hashed",
|
|
1417
|
+
],
|
|
1418
|
+
},
|
|
1419
|
+
]
|
|
1420
|
+
}
|
|
1421
|
+
},
|
|
1422
|
+
},
|
|
1423
|
+
{
|
|
1424
|
+
"type": "array",
|
|
1425
|
+
"items": {
|
|
1426
|
+
"type": "array",
|
|
1427
|
+
"minItems": 2,
|
|
1428
|
+
"maxItems": 2,
|
|
1429
|
+
"prefixItems": [
|
|
1430
|
+
{"type": "string"},
|
|
1431
|
+
{
|
|
1432
|
+
"oneOf": [
|
|
1433
|
+
{"type": "integer", "enum": [1, -1]},
|
|
1434
|
+
{
|
|
1435
|
+
"type": "string",
|
|
1436
|
+
"enum": [
|
|
1437
|
+
"text",
|
|
1438
|
+
"2dsphere",
|
|
1439
|
+
"2d",
|
|
1440
|
+
"geoHaystack",
|
|
1441
|
+
"hashed",
|
|
1442
|
+
],
|
|
1443
|
+
},
|
|
1444
|
+
]
|
|
1445
|
+
},
|
|
1446
|
+
],
|
|
1447
|
+
"items": False,
|
|
1448
|
+
},
|
|
1449
|
+
},
|
|
1450
|
+
],
|
|
1451
|
+
"description": (
|
|
1452
|
+
"Index keys (required for regular, text, geospatial, "
|
|
1453
|
+
"ttl, partial indexes)"
|
|
1454
|
+
),
|
|
1455
|
+
},
|
|
1456
|
+
"definition": {
|
|
1457
|
+
"type": "object",
|
|
1458
|
+
"description": (
|
|
1459
|
+
"Index definition (required for vectorSearch and "
|
|
1460
|
+
"search indexes)"
|
|
1461
|
+
),
|
|
1462
|
+
},
|
|
1463
|
+
"hybrid": {
|
|
1464
|
+
"type": "object",
|
|
1465
|
+
"properties": {
|
|
1466
|
+
"vector_index": {
|
|
1467
|
+
"type": "object",
|
|
1468
|
+
"properties": {
|
|
1469
|
+
"name": {
|
|
1470
|
+
"type": "string",
|
|
1471
|
+
"pattern": "^[a-zA-Z0-9_]+$",
|
|
1472
|
+
"description": (
|
|
1473
|
+
"Name for the vector index "
|
|
1474
|
+
"(defaults to '{name}_vector')"
|
|
1475
|
+
),
|
|
1476
|
+
},
|
|
1477
|
+
"definition": {
|
|
1478
|
+
"type": "object",
|
|
1479
|
+
"description": (
|
|
1480
|
+
"Vector index definition with "
|
|
1481
|
+
"mappings.fields containing knnVector "
|
|
1482
|
+
"fields"
|
|
1483
|
+
),
|
|
1484
|
+
},
|
|
1485
|
+
},
|
|
1486
|
+
"required": ["definition"],
|
|
1487
|
+
"additionalProperties": False,
|
|
1488
|
+
},
|
|
1489
|
+
"text_index": {
|
|
1490
|
+
"type": "object",
|
|
1491
|
+
"properties": {
|
|
1492
|
+
"name": {
|
|
1493
|
+
"type": "string",
|
|
1494
|
+
"pattern": "^[a-zA-Z0-9_]+$",
|
|
1495
|
+
"description": (
|
|
1496
|
+
"Name for the text index "
|
|
1497
|
+
"(defaults to '{name}_text')"
|
|
1498
|
+
),
|
|
1499
|
+
},
|
|
1500
|
+
"definition": {
|
|
1501
|
+
"type": "object",
|
|
1502
|
+
"description": (
|
|
1503
|
+
"Text index definition with mappings "
|
|
1504
|
+
"for full-text search"
|
|
1505
|
+
),
|
|
1506
|
+
},
|
|
1507
|
+
},
|
|
1508
|
+
"required": ["definition"],
|
|
1509
|
+
"additionalProperties": False,
|
|
1510
|
+
},
|
|
1511
|
+
},
|
|
1512
|
+
"required": ["vector_index", "text_index"],
|
|
1513
|
+
"additionalProperties": False,
|
|
1514
|
+
"description": (
|
|
1515
|
+
"Hybrid search configuration (required when type is "
|
|
1516
|
+
"'hybrid'). Defines both vector and text indexes for "
|
|
1517
|
+
"$rankFusion."
|
|
1518
|
+
),
|
|
1519
|
+
},
|
|
1520
|
+
"options": {
|
|
1521
|
+
"type": "object",
|
|
1522
|
+
"properties": {
|
|
1523
|
+
"unique": {"type": "boolean"},
|
|
1524
|
+
"sparse": {"type": "boolean"},
|
|
1525
|
+
"background": {"type": "boolean"},
|
|
1526
|
+
"name": {"type": "string"},
|
|
1527
|
+
"partialFilterExpression": {
|
|
1528
|
+
"type": "object",
|
|
1529
|
+
"description": "Filter expression for partial indexes",
|
|
1530
|
+
},
|
|
1531
|
+
"expireAfterSeconds": {
|
|
1532
|
+
"type": "integer",
|
|
1533
|
+
"minimum": 1,
|
|
1534
|
+
"description": "TTL in seconds (required for TTL indexes)",
|
|
1535
|
+
},
|
|
1536
|
+
"weights": {
|
|
1537
|
+
"type": "object",
|
|
1538
|
+
"patternProperties": {
|
|
1539
|
+
"^[a-zA-Z0-9_.]+$": {"type": "integer", "minimum": 1}
|
|
1540
|
+
},
|
|
1541
|
+
"description": "Field weights for text indexes",
|
|
1542
|
+
},
|
|
1543
|
+
"default_language": {
|
|
1544
|
+
"type": "string",
|
|
1545
|
+
"description": "Default language for text indexes",
|
|
1546
|
+
},
|
|
1547
|
+
"language_override": {
|
|
1548
|
+
"type": "string",
|
|
1549
|
+
"description": "Language override field for text indexes",
|
|
1550
|
+
},
|
|
1551
|
+
},
|
|
1552
|
+
"description": "Index options (varies by index type)",
|
|
1553
|
+
},
|
|
1554
|
+
},
|
|
1555
|
+
"required": ["name", "type"],
|
|
1556
|
+
"allOf": [
|
|
1557
|
+
{
|
|
1558
|
+
"if": {"properties": {"type": {"const": "regular"}}},
|
|
1559
|
+
"then": {"required": ["keys"]},
|
|
1560
|
+
},
|
|
1561
|
+
{
|
|
1562
|
+
"if": {"properties": {"type": {"const": "text"}}},
|
|
1563
|
+
"then": {"required": ["keys"]},
|
|
1564
|
+
},
|
|
1565
|
+
{
|
|
1566
|
+
"if": {"properties": {"type": {"const": "geospatial"}}},
|
|
1567
|
+
"then": {"required": ["keys"]},
|
|
1568
|
+
},
|
|
1569
|
+
{
|
|
1570
|
+
"if": {"properties": {"type": {"const": "ttl"}}},
|
|
1571
|
+
"then": {"required": ["keys"]},
|
|
1572
|
+
},
|
|
1573
|
+
{
|
|
1574
|
+
"if": {"properties": {"type": {"const": "partial"}}},
|
|
1575
|
+
"then": {"required": ["keys", "options"]},
|
|
1576
|
+
"else": {
|
|
1577
|
+
"properties": {
|
|
1578
|
+
"options": {
|
|
1579
|
+
"not": {"required": ["partialFilterExpression"]}
|
|
1580
|
+
}
|
|
1581
|
+
}
|
|
1582
|
+
},
|
|
1583
|
+
},
|
|
1584
|
+
{
|
|
1585
|
+
"if": {"properties": {"type": {"const": "vectorSearch"}}},
|
|
1586
|
+
"then": {"required": ["definition"]},
|
|
1587
|
+
},
|
|
1588
|
+
{
|
|
1589
|
+
"if": {"properties": {"type": {"const": "search"}}},
|
|
1590
|
+
"then": {"required": ["definition"]},
|
|
1591
|
+
},
|
|
1592
|
+
{
|
|
1593
|
+
"if": {"properties": {"type": {"const": "hybrid"}}},
|
|
1594
|
+
"then": {"required": ["hybrid"]},
|
|
1595
|
+
},
|
|
1596
|
+
],
|
|
1597
|
+
},
|
|
1598
|
+
"collectionSettings": {
|
|
1599
|
+
"type": "object",
|
|
1600
|
+
"properties": {
|
|
1601
|
+
"validation": {
|
|
1602
|
+
"type": "object",
|
|
1603
|
+
"properties": {
|
|
1604
|
+
"validator": {"type": "object"},
|
|
1605
|
+
"validationLevel": {
|
|
1606
|
+
"type": "string",
|
|
1607
|
+
"enum": ["off", "strict", "moderate"],
|
|
1608
|
+
},
|
|
1609
|
+
"validationAction": {
|
|
1610
|
+
"type": "string",
|
|
1611
|
+
"enum": ["error", "warn"],
|
|
1612
|
+
},
|
|
1613
|
+
},
|
|
1614
|
+
},
|
|
1615
|
+
"collation": {
|
|
1616
|
+
"type": "object",
|
|
1617
|
+
"properties": {
|
|
1618
|
+
"locale": {"type": "string"},
|
|
1619
|
+
"caseLevel": {"type": "boolean"},
|
|
1620
|
+
"caseFirst": {"type": "string"},
|
|
1621
|
+
"strength": {"type": "integer"},
|
|
1622
|
+
"numericOrdering": {"type": "boolean"},
|
|
1623
|
+
"alternate": {"type": "string"},
|
|
1624
|
+
"maxVariable": {"type": "string"},
|
|
1625
|
+
"normalization": {"type": "boolean"},
|
|
1626
|
+
"backwards": {"type": "boolean"},
|
|
1627
|
+
},
|
|
1628
|
+
},
|
|
1629
|
+
"capped": {"type": "boolean"},
|
|
1630
|
+
"size": {
|
|
1631
|
+
"type": "integer",
|
|
1632
|
+
"minimum": 1,
|
|
1633
|
+
"description": "Maximum size in bytes for capped collection",
|
|
1634
|
+
},
|
|
1635
|
+
"max": {
|
|
1636
|
+
"type": "integer",
|
|
1637
|
+
"minimum": 1,
|
|
1638
|
+
"description": "Maximum number of documents for capped collection",
|
|
1639
|
+
},
|
|
1640
|
+
"timeseries": {
|
|
1641
|
+
"type": "object",
|
|
1642
|
+
"properties": {
|
|
1643
|
+
"timeField": {"type": "string"},
|
|
1644
|
+
"metaField": {"type": "string"},
|
|
1645
|
+
"granularity": {
|
|
1646
|
+
"type": "string",
|
|
1647
|
+
"enum": ["seconds", "minutes", "hours"],
|
|
1648
|
+
},
|
|
1649
|
+
},
|
|
1650
|
+
"required": ["timeField"],
|
|
1651
|
+
},
|
|
1652
|
+
},
|
|
1653
|
+
},
|
|
1654
|
+
},
|
|
1655
|
+
}
|
|
1656
|
+
|
|
1657
|
+
# Schema for Version 1.0 (backward compatibility - simplified)
|
|
1658
|
+
# Version 1.0 had: slug, name, description, status, auth_required,
|
|
1659
|
+
# data_scope, pip_deps, managed_indexes
|
|
1660
|
+
MANIFEST_SCHEMA_V1 = {
|
|
1661
|
+
"type": "object",
|
|
1662
|
+
"properties": {
|
|
1663
|
+
"schema_version": {"type": "string", "pattern": "^1\\.0$", "const": "1.0"},
|
|
1664
|
+
"slug": {
|
|
1665
|
+
"type": "string",
|
|
1666
|
+
"pattern": "^[a-z0-9_-]+$",
|
|
1667
|
+
"description": "App slug (lowercase alphanumeric, underscores, hyphens)",
|
|
1668
|
+
},
|
|
1669
|
+
"name": {
|
|
1670
|
+
"type": "string",
|
|
1671
|
+
"minLength": 1,
|
|
1672
|
+
"description": "Human-readable app name",
|
|
1673
|
+
},
|
|
1674
|
+
"description": {"type": "string", "description": "App description"},
|
|
1675
|
+
"status": {
|
|
1676
|
+
"type": "string",
|
|
1677
|
+
"enum": ["active", "draft", "archived", "inactive"],
|
|
1678
|
+
"default": "draft",
|
|
1679
|
+
"description": "App status",
|
|
1680
|
+
},
|
|
1681
|
+
"auth_required": {
|
|
1682
|
+
"type": "boolean",
|
|
1683
|
+
"default": False,
|
|
1684
|
+
"description": "Whether authentication is required for this app",
|
|
1685
|
+
},
|
|
1686
|
+
"data_scope": {
|
|
1687
|
+
"type": "array",
|
|
1688
|
+
"items": {"type": "string"},
|
|
1689
|
+
"minItems": 1,
|
|
1690
|
+
"default": ["self"],
|
|
1691
|
+
"description": "List of app slugs whose data this app can access",
|
|
1692
|
+
},
|
|
1693
|
+
"pip_deps": {
|
|
1694
|
+
"type": "array",
|
|
1695
|
+
"items": {"type": "string"},
|
|
1696
|
+
"description": "List of pip dependencies for isolated environment",
|
|
1697
|
+
},
|
|
1698
|
+
"managed_indexes": {
|
|
1699
|
+
"type": "object",
|
|
1700
|
+
"patternProperties": {
|
|
1701
|
+
"^[a-zA-Z0-9_]+$": {
|
|
1702
|
+
"type": "array",
|
|
1703
|
+
"items": {"$ref": "#/definitions/indexDefinition"},
|
|
1704
|
+
"minItems": 1,
|
|
1705
|
+
}
|
|
1706
|
+
},
|
|
1707
|
+
"description": "Collection name -> list of index definitions",
|
|
1708
|
+
},
|
|
1709
|
+
"developer_id": {
|
|
1710
|
+
"type": "string",
|
|
1711
|
+
"format": "email",
|
|
1712
|
+
"description": "Email of the developer who owns this app",
|
|
1713
|
+
},
|
|
1714
|
+
},
|
|
1715
|
+
"required": ["slug", "name"],
|
|
1716
|
+
"definitions": {
|
|
1717
|
+
# Reuse same indexDefinition from V2
|
|
1718
|
+
"indexDefinition": MANIFEST_SCHEMA_V2["definitions"]["indexDefinition"]
|
|
1719
|
+
},
|
|
1720
|
+
}
|
|
1721
|
+
|
|
1722
|
+
# Register schemas (use constants for version strings)
|
|
1723
|
+
SCHEMA_REGISTRY[DEFAULT_SCHEMA_VERSION] = MANIFEST_SCHEMA_V1
|
|
1724
|
+
SCHEMA_REGISTRY[CURRENT_SCHEMA_VERSION] = MANIFEST_SCHEMA_V2
|
|
1725
|
+
# Also register as default/legacy
|
|
1726
|
+
SCHEMA_REGISTRY["default"] = MANIFEST_SCHEMA_V2
|
|
1727
|
+
MANIFEST_SCHEMA = MANIFEST_SCHEMA_V2 # Backward compatibility
|
|
1728
|
+
|
|
1729
|
+
|
|
1730
|
+
def get_schema_version(manifest_data: Dict[str, Any]) -> str:
|
|
1731
|
+
"""
|
|
1732
|
+
Detect schema version from manifest.
|
|
1733
|
+
|
|
1734
|
+
Args:
|
|
1735
|
+
manifest_data: Manifest dictionary
|
|
1736
|
+
|
|
1737
|
+
Returns:
|
|
1738
|
+
Schema version string (e.g., "1.0", "2.0")
|
|
1739
|
+
|
|
1740
|
+
Raises:
|
|
1741
|
+
ValueError: If schema version format is invalid
|
|
1742
|
+
"""
|
|
1743
|
+
version: Optional[str] = manifest_data.get("schema_version")
|
|
1744
|
+
if version:
|
|
1745
|
+
# Validate version format
|
|
1746
|
+
if not isinstance(version, str) or not version.replace(".", "").isdigit():
|
|
1747
|
+
raise ValueError(
|
|
1748
|
+
f"Invalid schema_version format: {version}. Expected format: 'major.minor'"
|
|
1749
|
+
)
|
|
1750
|
+
return str(version)
|
|
1751
|
+
|
|
1752
|
+
# Heuristic: If manifest has new fields, assume 2.0, otherwise 1.0
|
|
1753
|
+
v2_fields = ["auth", "collection_settings"]
|
|
1754
|
+
# Also check for old format for backward compatibility
|
|
1755
|
+
old_v2_fields = ["auth_policy", "sub_auth"]
|
|
1756
|
+
if any(field in manifest_data for field in v2_fields) or any(
|
|
1757
|
+
field in manifest_data for field in old_v2_fields
|
|
1758
|
+
):
|
|
1759
|
+
return "2.0"
|
|
1760
|
+
|
|
1761
|
+
return DEFAULT_SCHEMA_VERSION
|
|
1762
|
+
|
|
1763
|
+
|
|
1764
|
+
def migrate_manifest(
|
|
1765
|
+
manifest_data: Dict[str, Any], target_version: str = CURRENT_SCHEMA_VERSION
|
|
1766
|
+
) -> Dict[str, Any]:
|
|
1767
|
+
"""
|
|
1768
|
+
Migrate manifest from one schema version to another.
|
|
1769
|
+
|
|
1770
|
+
Args:
|
|
1771
|
+
manifest_data: Manifest dictionary to migrate
|
|
1772
|
+
target_version: Target schema version (default: current)
|
|
1773
|
+
|
|
1774
|
+
Returns:
|
|
1775
|
+
Migrated manifest dictionary
|
|
1776
|
+
"""
|
|
1777
|
+
current_version = get_schema_version(manifest_data)
|
|
1778
|
+
|
|
1779
|
+
if current_version == target_version:
|
|
1780
|
+
return manifest_data.copy()
|
|
1781
|
+
|
|
1782
|
+
migrated = manifest_data.copy()
|
|
1783
|
+
|
|
1784
|
+
# Migration path: 1.0 -> 2.0
|
|
1785
|
+
if current_version == "1.0" and target_version == "2.0":
|
|
1786
|
+
# V1.0 to V2.0: Add schema_version, new fields already present are kept
|
|
1787
|
+
if "schema_version" not in migrated:
|
|
1788
|
+
migrated["schema_version"] = "2.0"
|
|
1789
|
+
|
|
1790
|
+
# Migrate old auth_policy/sub_auth format to new auth.policy/auth.users format
|
|
1791
|
+
if "auth_policy" in migrated or "sub_auth" in migrated:
|
|
1792
|
+
logger.warning(
|
|
1793
|
+
f"Manifest {migrated.get('slug', 'unknown')} uses deprecated "
|
|
1794
|
+
f"'auth_policy'/'sub_auth' format. "
|
|
1795
|
+
f"Consider migrating to 'auth.policy'/'auth.users' format."
|
|
1796
|
+
)
|
|
1797
|
+
if "auth" not in migrated:
|
|
1798
|
+
migrated["auth"] = {}
|
|
1799
|
+
if "auth_policy" in migrated:
|
|
1800
|
+
migrated["auth"]["policy"] = migrated.pop("auth_policy")
|
|
1801
|
+
if "sub_auth" in migrated:
|
|
1802
|
+
migrated["auth"]["users"] = migrated.pop("sub_auth")
|
|
1803
|
+
|
|
1804
|
+
# No data transformation needed - V2.0 is backward compatible
|
|
1805
|
+
# New fields (auth, etc.) are optional
|
|
1806
|
+
logger.debug(
|
|
1807
|
+
f"Migrated manifest from 1.0 to 2.0: {migrated.get('slug', 'unknown')}"
|
|
1808
|
+
)
|
|
1809
|
+
|
|
1810
|
+
# Future: Add more migration paths as needed
|
|
1811
|
+
# Example: 2.0 -> 3.0, etc.
|
|
1812
|
+
|
|
1813
|
+
migrated["schema_version"] = target_version
|
|
1814
|
+
return migrated
|
|
1815
|
+
|
|
1816
|
+
|
|
1817
|
+
def get_schema_for_version(version: str) -> Dict[str, Any]:
|
|
1818
|
+
"""
|
|
1819
|
+
Get schema definition for a specific version.
|
|
1820
|
+
|
|
1821
|
+
Args:
|
|
1822
|
+
version: Schema version string
|
|
1823
|
+
|
|
1824
|
+
Returns:
|
|
1825
|
+
Schema definition dictionary
|
|
1826
|
+
|
|
1827
|
+
Raises:
|
|
1828
|
+
ValueError: If version not found in registry
|
|
1829
|
+
"""
|
|
1830
|
+
if version in SCHEMA_REGISTRY:
|
|
1831
|
+
return SCHEMA_REGISTRY[version]
|
|
1832
|
+
|
|
1833
|
+
# Try to find compatible version
|
|
1834
|
+
major = version.split(".")[0]
|
|
1835
|
+
for reg_version in sorted(SCHEMA_REGISTRY.keys(), reverse=True):
|
|
1836
|
+
if reg_version.startswith(major + "."):
|
|
1837
|
+
logger.warning(
|
|
1838
|
+
f"Schema version {version} not found, using compatible version {reg_version}"
|
|
1839
|
+
)
|
|
1840
|
+
return SCHEMA_REGISTRY[reg_version]
|
|
1841
|
+
|
|
1842
|
+
# Fallback to current
|
|
1843
|
+
logger.warning(
|
|
1844
|
+
f"Schema version {version} not found, using current version "
|
|
1845
|
+
f"{CURRENT_SCHEMA_VERSION}"
|
|
1846
|
+
)
|
|
1847
|
+
return SCHEMA_REGISTRY[CURRENT_SCHEMA_VERSION]
|
|
1848
|
+
|
|
1849
|
+
|
|
1850
|
+
async def _validate_manifest_async(
|
|
1851
|
+
manifest_data: Dict[str, Any], use_cache: bool = True
|
|
1852
|
+
) -> Tuple[bool, Optional[str], Optional[List[str]]]:
|
|
1853
|
+
"""
|
|
1854
|
+
Validate a manifest against the JSON Schema with versioning and caching support.
|
|
1855
|
+
|
|
1856
|
+
This function:
|
|
1857
|
+
1. Detects schema version from manifest (defaults to 1.0 if not specified)
|
|
1858
|
+
2. Uses appropriate schema for validation
|
|
1859
|
+
3. Caches validation results for performance
|
|
1860
|
+
4. Supports parallel validation for scale
|
|
1861
|
+
|
|
1862
|
+
Args:
|
|
1863
|
+
manifest_data: The manifest data to validate
|
|
1864
|
+
use_cache: Whether to use validation cache (default: True, set False to force re-validation)
|
|
1865
|
+
|
|
1866
|
+
Returns:
|
|
1867
|
+
Tuple of (is_valid, error_message, error_paths)
|
|
1868
|
+
- is_valid: True if valid, False otherwise
|
|
1869
|
+
- error_message: Human-readable error message (None if valid)
|
|
1870
|
+
- error_paths: List of JSON paths with errors (None if valid)
|
|
1871
|
+
|
|
1872
|
+
Note: This function does NOT validate developer_id against the database.
|
|
1873
|
+
Use validate_manifest_with_db() for database validation.
|
|
1874
|
+
"""
|
|
1875
|
+
# Check cache first
|
|
1876
|
+
if use_cache:
|
|
1877
|
+
cache_key = (
|
|
1878
|
+
_get_manifest_hash(manifest_data) + "_" + get_schema_version(manifest_data)
|
|
1879
|
+
)
|
|
1880
|
+
if cache_key in _validation_cache:
|
|
1881
|
+
return _validation_cache[cache_key]
|
|
1882
|
+
|
|
1883
|
+
try:
|
|
1884
|
+
# Get schema version
|
|
1885
|
+
version = get_schema_version(manifest_data)
|
|
1886
|
+
schema = get_schema_for_version(version)
|
|
1887
|
+
|
|
1888
|
+
# Note: Tuple-to-list conversion should happen at the API boundary (register_app),
|
|
1889
|
+
# not here. This keeps validation logic clean and schema-agnostic.
|
|
1890
|
+
# Validate against appropriate schema
|
|
1891
|
+
validate(instance=manifest_data, schema=schema)
|
|
1892
|
+
|
|
1893
|
+
# Cache success result
|
|
1894
|
+
result = (True, None, None)
|
|
1895
|
+
if use_cache:
|
|
1896
|
+
cache_key = _get_manifest_hash(manifest_data) + "_" + version
|
|
1897
|
+
_validation_cache[cache_key] = result
|
|
1898
|
+
|
|
1899
|
+
return result
|
|
1900
|
+
|
|
1901
|
+
except ValidationError as e:
|
|
1902
|
+
error_paths = []
|
|
1903
|
+
error_messages = []
|
|
1904
|
+
|
|
1905
|
+
# Extract error paths and messages
|
|
1906
|
+
path_parts = list(e.absolute_path)
|
|
1907
|
+
if path_parts:
|
|
1908
|
+
error_paths.append(".".join(str(p) for p in path_parts))
|
|
1909
|
+
else:
|
|
1910
|
+
error_paths.append("root")
|
|
1911
|
+
|
|
1912
|
+
error_messages.append(e.message)
|
|
1913
|
+
|
|
1914
|
+
# Follow the error chain for nested errors
|
|
1915
|
+
error = e
|
|
1916
|
+
while hasattr(error, "context") and error.context:
|
|
1917
|
+
for suberror in error.context:
|
|
1918
|
+
subpath_parts = list(suberror.absolute_path)
|
|
1919
|
+
if subpath_parts:
|
|
1920
|
+
error_paths.append(".".join(str(p) for p in subpath_parts))
|
|
1921
|
+
error_messages.append(suberror.message)
|
|
1922
|
+
break # Only process first level of context
|
|
1923
|
+
|
|
1924
|
+
error_message = "; ".join(set(error_messages)) # Deduplicate messages
|
|
1925
|
+
|
|
1926
|
+
# Cache error result
|
|
1927
|
+
result = (False, error_message, error_paths)
|
|
1928
|
+
if use_cache:
|
|
1929
|
+
cache_key = _get_manifest_hash(manifest_data) + "_" + version
|
|
1930
|
+
_validation_cache[cache_key] = result
|
|
1931
|
+
|
|
1932
|
+
return result
|
|
1933
|
+
|
|
1934
|
+
except SchemaError as e:
|
|
1935
|
+
error_message = f"Invalid schema definition: {e.message}"
|
|
1936
|
+
result = (False, error_message, ["schema"])
|
|
1937
|
+
if use_cache:
|
|
1938
|
+
cache_key = (
|
|
1939
|
+
_get_manifest_hash(manifest_data)
|
|
1940
|
+
+ "_"
|
|
1941
|
+
+ get_schema_version(manifest_data)
|
|
1942
|
+
)
|
|
1943
|
+
_validation_cache[cache_key] = result
|
|
1944
|
+
|
|
1945
|
+
return result
|
|
1946
|
+
|
|
1947
|
+
except (ValidationError, SchemaError) as e:
|
|
1948
|
+
# Expected validation errors - extract details
|
|
1949
|
+
error_paths = []
|
|
1950
|
+
error_messages = []
|
|
1951
|
+
if isinstance(e, ValidationError):
|
|
1952
|
+
error_paths = [
|
|
1953
|
+
f".{'.'.join(str(p) for p in error.path)}" for error in e.context or [e]
|
|
1954
|
+
]
|
|
1955
|
+
error_messages = [error.message for error in e.context or [e]]
|
|
1956
|
+
else:
|
|
1957
|
+
error_messages = [str(e)]
|
|
1958
|
+
|
|
1959
|
+
error_message = "; ".join(error_messages) if error_messages else str(e)
|
|
1960
|
+
result = (False, error_message, error_paths if error_paths else None)
|
|
1961
|
+
if use_cache:
|
|
1962
|
+
cache_key = (
|
|
1963
|
+
_get_manifest_hash(manifest_data)
|
|
1964
|
+
+ "_"
|
|
1965
|
+
+ get_schema_version(manifest_data)
|
|
1966
|
+
)
|
|
1967
|
+
_validation_cache[cache_key] = result
|
|
1968
|
+
|
|
1969
|
+
return result
|
|
1970
|
+
except (TypeError, ValueError, KeyError) as e:
|
|
1971
|
+
# Programming errors - these should not happen in normal operation
|
|
1972
|
+
error_message = f"Manifest structure error: {str(e)}"
|
|
1973
|
+
logger.exception("Unexpected error during manifest validation")
|
|
1974
|
+
result = (False, error_message, None)
|
|
1975
|
+
if use_cache:
|
|
1976
|
+
cache_key = (
|
|
1977
|
+
_get_manifest_hash(manifest_data)
|
|
1978
|
+
+ "_"
|
|
1979
|
+
+ get_schema_version(manifest_data)
|
|
1980
|
+
)
|
|
1981
|
+
_validation_cache[cache_key] = result
|
|
1982
|
+
|
|
1983
|
+
return result
|
|
1984
|
+
|
|
1985
|
+
|
|
1986
|
+
def clear_validation_cache():
|
|
1987
|
+
"""Clear the validation cache. Useful for testing or when schemas change."""
|
|
1988
|
+
global _validation_cache
|
|
1989
|
+
_validation_cache.clear()
|
|
1990
|
+
logger.debug("Validation cache cleared")
|
|
1991
|
+
|
|
1992
|
+
|
|
1993
|
+
async def validate_manifests_parallel(
|
|
1994
|
+
manifests: List[Dict[str, Any]], use_cache: bool = True
|
|
1995
|
+
) -> List[Tuple[bool, Optional[str], Optional[List[str]], Optional[str]]]:
|
|
1996
|
+
"""
|
|
1997
|
+
Validate multiple manifests in parallel for scale.
|
|
1998
|
+
|
|
1999
|
+
Args:
|
|
2000
|
+
manifests: List of manifest dictionaries to validate
|
|
2001
|
+
use_cache: Whether to use validation cache
|
|
2002
|
+
|
|
2003
|
+
Returns:
|
|
2004
|
+
List of tuples: (is_valid, error_message, error_paths, slug)
|
|
2005
|
+
Each tuple corresponds to the manifest at the same index
|
|
2006
|
+
"""
|
|
2007
|
+
|
|
2008
|
+
async def validate_one(
|
|
2009
|
+
manifest: Dict[str, Any]
|
|
2010
|
+
) -> Tuple[bool, Optional[str], Optional[List[str]], Optional[str]]:
|
|
2011
|
+
slug = manifest.get("slug", "unknown")
|
|
2012
|
+
is_valid, error, paths = await _validate_manifest_async(
|
|
2013
|
+
manifest, use_cache=use_cache
|
|
2014
|
+
)
|
|
2015
|
+
return (is_valid, error, paths, slug)
|
|
2016
|
+
|
|
2017
|
+
# Run validations in parallel
|
|
2018
|
+
results = await asyncio.gather(
|
|
2019
|
+
*[validate_one(m) for m in manifests], return_exceptions=True
|
|
2020
|
+
)
|
|
2021
|
+
|
|
2022
|
+
# Handle exceptions
|
|
2023
|
+
validated_results = []
|
|
2024
|
+
for i, result in enumerate(results):
|
|
2025
|
+
if isinstance(result, Exception):
|
|
2026
|
+
slug = manifests[i].get("slug", "unknown")
|
|
2027
|
+
validated_results.append(
|
|
2028
|
+
(False, f"Validation error: {str(result)}", None, slug)
|
|
2029
|
+
)
|
|
2030
|
+
else:
|
|
2031
|
+
validated_results.append(result)
|
|
2032
|
+
|
|
2033
|
+
return validated_results
|
|
2034
|
+
|
|
2035
|
+
|
|
2036
|
+
async def validate_developer_id(
|
|
2037
|
+
developer_id: str, db_validator: Optional[Callable[[str], Awaitable[bool]]] = None
|
|
2038
|
+
) -> Tuple[bool, Optional[str]]:
|
|
2039
|
+
"""
|
|
2040
|
+
Validate that a developer_id exists in the system and has developer role.
|
|
2041
|
+
|
|
2042
|
+
Args:
|
|
2043
|
+
developer_id: The developer email to validate
|
|
2044
|
+
db_validator: Optional async function that checks if user exists and has developer role
|
|
2045
|
+
Should return True if valid, False otherwise
|
|
2046
|
+
|
|
2047
|
+
Returns:
|
|
2048
|
+
Tuple of (is_valid, error_message)
|
|
2049
|
+
- is_valid: True if valid, False otherwise
|
|
2050
|
+
- error_message: Human-readable error message (None if valid)
|
|
2051
|
+
"""
|
|
2052
|
+
if not developer_id:
|
|
2053
|
+
return False, "developer_id cannot be empty"
|
|
2054
|
+
|
|
2055
|
+
if not isinstance(developer_id, str):
|
|
2056
|
+
return False, "developer_id must be a string (email)"
|
|
2057
|
+
|
|
2058
|
+
# Basic email format check (JSON schema will also validate format)
|
|
2059
|
+
if "@" not in developer_id or "." not in developer_id:
|
|
2060
|
+
return (
|
|
2061
|
+
False,
|
|
2062
|
+
f"developer_id '{developer_id}' does not appear to be a valid email",
|
|
2063
|
+
)
|
|
2064
|
+
|
|
2065
|
+
# If db_validator is provided, check database
|
|
2066
|
+
if db_validator:
|
|
2067
|
+
try:
|
|
2068
|
+
is_valid = await db_validator(developer_id)
|
|
2069
|
+
if not is_valid:
|
|
2070
|
+
return (
|
|
2071
|
+
False,
|
|
2072
|
+
f"developer_id '{developer_id}' does not exist or does not have developer role",
|
|
2073
|
+
)
|
|
2074
|
+
except (ValueError, TypeError, AttributeError) as e:
|
|
2075
|
+
logger.exception(
|
|
2076
|
+
f"Validation error validating developer_id '{developer_id}'"
|
|
2077
|
+
)
|
|
2078
|
+
return False, f"Error validating developer_id: {e}"
|
|
2079
|
+
|
|
2080
|
+
return True, None
|
|
2081
|
+
|
|
2082
|
+
|
|
2083
|
+
async def validate_manifest_with_db(
|
|
2084
|
+
manifest_data: Dict[str, Any],
|
|
2085
|
+
db_validator: Callable[[str], Awaitable[bool]],
|
|
2086
|
+
use_cache: bool = True,
|
|
2087
|
+
) -> Tuple[bool, Optional[str], Optional[List[str]]]:
|
|
2088
|
+
"""
|
|
2089
|
+
Validate a manifest against the JSON Schema (with versioning) and check
|
|
2090
|
+
developer_id exists in system.
|
|
2091
|
+
|
|
2092
|
+
Args:
|
|
2093
|
+
manifest_data: The manifest data to validate
|
|
2094
|
+
db_validator: Async function that checks if developer_id exists and has developer role
|
|
2095
|
+
Should accept developer_id (str) and return bool
|
|
2096
|
+
use_cache: Whether to use validation cache (default: True)
|
|
2097
|
+
|
|
2098
|
+
Returns:
|
|
2099
|
+
Tuple of (is_valid, error_message, error_paths)
|
|
2100
|
+
- is_valid: True if valid, False otherwise
|
|
2101
|
+
- error_message: Human-readable error message (None if valid)
|
|
2102
|
+
- error_paths: List of JSON paths with errors (None if valid)
|
|
2103
|
+
"""
|
|
2104
|
+
# First validate schema (with versioning support) - use async version directly
|
|
2105
|
+
is_valid, error_message, error_paths = await _validate_manifest_async(
|
|
2106
|
+
manifest_data, use_cache=use_cache
|
|
2107
|
+
)
|
|
2108
|
+
if not is_valid:
|
|
2109
|
+
return False, error_message, error_paths
|
|
2110
|
+
|
|
2111
|
+
# Then validate developer_id if present
|
|
2112
|
+
if "developer_id" in manifest_data:
|
|
2113
|
+
dev_id = manifest_data.get("developer_id")
|
|
2114
|
+
is_valid, error_msg = await validate_developer_id(dev_id, db_validator)
|
|
2115
|
+
if not is_valid:
|
|
2116
|
+
return (
|
|
2117
|
+
False,
|
|
2118
|
+
f"developer_id validation failed: {error_msg}",
|
|
2119
|
+
["developer_id"],
|
|
2120
|
+
)
|
|
2121
|
+
|
|
2122
|
+
return True, None, None
|
|
2123
|
+
|
|
2124
|
+
|
|
2125
|
+
# Public API: Synchronous wrapper for backward compatibility
|
|
2126
|
+
# Most callers use this synchronously, so we provide a sync wrapper
|
|
2127
|
+
def validate_manifest(
|
|
2128
|
+
manifest_data: Dict[str, Any], use_cache: bool = True
|
|
2129
|
+
) -> Tuple[bool, Optional[str], Optional[List[str]]]:
|
|
2130
|
+
"""
|
|
2131
|
+
Validate a manifest against the JSON Schema with versioning and caching
|
|
2132
|
+
support (synchronous wrapper).
|
|
2133
|
+
|
|
2134
|
+
This function wraps the async validation for backward compatibility.
|
|
2135
|
+
In async contexts, use _validate_manifest_async() directly for better performance.
|
|
2136
|
+
|
|
2137
|
+
Args:
|
|
2138
|
+
manifest_data: The manifest data to validate
|
|
2139
|
+
use_cache: Whether to use validation cache (default: True)
|
|
2140
|
+
|
|
2141
|
+
Returns:
|
|
2142
|
+
Tuple of (is_valid, error_message, error_paths)
|
|
2143
|
+
- is_valid: True if valid, False otherwise
|
|
2144
|
+
- error_message: Human-readable error message (None if valid)
|
|
2145
|
+
- error_paths: List of JSON paths with errors (None if valid)
|
|
2146
|
+
"""
|
|
2147
|
+
import asyncio
|
|
2148
|
+
|
|
2149
|
+
try:
|
|
2150
|
+
loop = asyncio.get_event_loop()
|
|
2151
|
+
if loop.is_running():
|
|
2152
|
+
# If we're in an async context, use a thread pool to run sync
|
|
2153
|
+
import concurrent.futures
|
|
2154
|
+
|
|
2155
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
2156
|
+
future = executor.submit(
|
|
2157
|
+
lambda: asyncio.run(
|
|
2158
|
+
_validate_manifest_async(manifest_data, use_cache)
|
|
2159
|
+
)
|
|
2160
|
+
)
|
|
2161
|
+
return future.result()
|
|
2162
|
+
else:
|
|
2163
|
+
return loop.run_until_complete(
|
|
2164
|
+
_validate_manifest_async(manifest_data, use_cache)
|
|
2165
|
+
)
|
|
2166
|
+
except RuntimeError:
|
|
2167
|
+
# No event loop, create one
|
|
2168
|
+
return asyncio.run(_validate_manifest_async(manifest_data, use_cache))
|
|
2169
|
+
|
|
2170
|
+
|
|
2171
|
+
def _validate_regular_index(
|
|
2172
|
+
index_def: Dict[str, Any], collection_name: str, index_name: str
|
|
2173
|
+
) -> Tuple[bool, Optional[str]]:
|
|
2174
|
+
"""Validate a regular index definition."""
|
|
2175
|
+
if "keys" not in index_def:
|
|
2176
|
+
return (
|
|
2177
|
+
False,
|
|
2178
|
+
f"Regular index '{index_name}' in collection "
|
|
2179
|
+
f"'{collection_name}' requires 'keys' field",
|
|
2180
|
+
)
|
|
2181
|
+
keys = index_def.get("keys")
|
|
2182
|
+
if (
|
|
2183
|
+
not keys
|
|
2184
|
+
or (isinstance(keys, dict) and len(keys) == 0)
|
|
2185
|
+
or (isinstance(keys, list) and len(keys) == 0)
|
|
2186
|
+
):
|
|
2187
|
+
return (
|
|
2188
|
+
False,
|
|
2189
|
+
f"Regular index '{index_name}' in collection "
|
|
2190
|
+
f"'{collection_name}' has empty 'keys'",
|
|
2191
|
+
)
|
|
2192
|
+
|
|
2193
|
+
# Check for _id index
|
|
2194
|
+
is_id_index = False
|
|
2195
|
+
if isinstance(keys, dict):
|
|
2196
|
+
is_id_index = len(keys) == 1 and "_id" in keys
|
|
2197
|
+
elif isinstance(keys, list):
|
|
2198
|
+
is_id_index = len(keys) == 1 and len(keys[0]) >= 1 and keys[0][0] == "_id"
|
|
2199
|
+
|
|
2200
|
+
if is_id_index:
|
|
2201
|
+
return (
|
|
2202
|
+
False,
|
|
2203
|
+
f"Index '{index_name}' in collection '{collection_name}' "
|
|
2204
|
+
f"cannot target '_id' field (MongoDB creates _id indexes "
|
|
2205
|
+
f"automatically)",
|
|
2206
|
+
)
|
|
2207
|
+
return True, None
|
|
2208
|
+
|
|
2209
|
+
|
|
2210
|
+
def _validate_ttl_index(
|
|
2211
|
+
index_def: Dict[str, Any], collection_name: str, index_name: str
|
|
2212
|
+
) -> Tuple[bool, Optional[str]]:
|
|
2213
|
+
"""Validate a TTL index definition."""
|
|
2214
|
+
if "keys" not in index_def:
|
|
2215
|
+
return (
|
|
2216
|
+
False,
|
|
2217
|
+
f"TTL index '{index_name}' in collection '{collection_name}' "
|
|
2218
|
+
f"requires 'keys' field",
|
|
2219
|
+
)
|
|
2220
|
+
options = index_def.get("options", {})
|
|
2221
|
+
if "expireAfterSeconds" not in options:
|
|
2222
|
+
return (
|
|
2223
|
+
False,
|
|
2224
|
+
f"TTL index '{index_name}' in collection '{collection_name}' "
|
|
2225
|
+
f"requires 'expireAfterSeconds' in options",
|
|
2226
|
+
)
|
|
2227
|
+
expire_after = options.get("expireAfterSeconds")
|
|
2228
|
+
if not isinstance(expire_after, int) or expire_after < MIN_TTL_SECONDS:
|
|
2229
|
+
return (
|
|
2230
|
+
False,
|
|
2231
|
+
f"TTL index '{index_name}' in collection '{collection_name}' "
|
|
2232
|
+
f"requires 'expireAfterSeconds' to be >= {MIN_TTL_SECONDS}",
|
|
2233
|
+
)
|
|
2234
|
+
if expire_after > MAX_TTL_SECONDS:
|
|
2235
|
+
return (
|
|
2236
|
+
False,
|
|
2237
|
+
f"TTL index '{index_name}' in collection '{collection_name}' "
|
|
2238
|
+
f"has 'expireAfterSeconds' too large ({expire_after}). "
|
|
2239
|
+
f"Maximum recommended is {MAX_TTL_SECONDS} (1 year). "
|
|
2240
|
+
f"Consider if this is intentional.",
|
|
2241
|
+
)
|
|
2242
|
+
return True, None
|
|
2243
|
+
|
|
2244
|
+
|
|
2245
|
+
def _validate_partial_index(
|
|
2246
|
+
index_def: Dict[str, Any], collection_name: str, index_name: str
|
|
2247
|
+
) -> Tuple[bool, Optional[str]]:
|
|
2248
|
+
"""Validate a partial index definition."""
|
|
2249
|
+
if "keys" not in index_def:
|
|
2250
|
+
return (
|
|
2251
|
+
False,
|
|
2252
|
+
f"Partial index '{index_name}' in collection "
|
|
2253
|
+
f"'{collection_name}' requires 'keys' field",
|
|
2254
|
+
)
|
|
2255
|
+
options = index_def.get("options", {})
|
|
2256
|
+
if "partialFilterExpression" not in options:
|
|
2257
|
+
return (
|
|
2258
|
+
False,
|
|
2259
|
+
f"Partial index '{index_name}' in collection "
|
|
2260
|
+
f"'{collection_name}' requires 'partialFilterExpression' in "
|
|
2261
|
+
f"options",
|
|
2262
|
+
)
|
|
2263
|
+
return True, None
|
|
2264
|
+
|
|
2265
|
+
|
|
2266
|
+
def _validate_text_index(
|
|
2267
|
+
index_def: Dict[str, Any], collection_name: str, index_name: str
|
|
2268
|
+
) -> Tuple[bool, Optional[str]]:
|
|
2269
|
+
"""Validate a text index definition."""
|
|
2270
|
+
if "keys" not in index_def:
|
|
2271
|
+
return (
|
|
2272
|
+
False,
|
|
2273
|
+
f"Text index '{index_name}' in collection '{collection_name}' "
|
|
2274
|
+
f"requires 'keys' field",
|
|
2275
|
+
)
|
|
2276
|
+
keys = index_def.get("keys")
|
|
2277
|
+
# Text indexes should have text type in keys
|
|
2278
|
+
has_text = False
|
|
2279
|
+
if isinstance(keys, dict):
|
|
2280
|
+
has_text = any(v == "text" or v == "TEXT" for v in keys.values())
|
|
2281
|
+
elif isinstance(keys, list):
|
|
2282
|
+
has_text = any(len(k) >= 2 and (k[1] == "text" or k[1] == "TEXT") for k in keys)
|
|
2283
|
+
if not has_text:
|
|
2284
|
+
return (
|
|
2285
|
+
False,
|
|
2286
|
+
f"Text index '{index_name}' in collection '{collection_name}' "
|
|
2287
|
+
f"must have at least one field with 'text' type in keys",
|
|
2288
|
+
)
|
|
2289
|
+
return True, None
|
|
2290
|
+
|
|
2291
|
+
|
|
2292
|
+
def _validate_geospatial_index(
|
|
2293
|
+
index_def: Dict[str, Any], collection_name: str, index_name: str
|
|
2294
|
+
) -> Tuple[bool, Optional[str]]:
|
|
2295
|
+
"""Validate a geospatial index definition."""
|
|
2296
|
+
if "keys" not in index_def:
|
|
2297
|
+
return (
|
|
2298
|
+
False,
|
|
2299
|
+
f"Geospatial index '{index_name}' in collection "
|
|
2300
|
+
f"'{collection_name}' requires 'keys' field",
|
|
2301
|
+
)
|
|
2302
|
+
keys = index_def.get("keys")
|
|
2303
|
+
# Geospatial indexes should have geospatial type in keys
|
|
2304
|
+
has_geo = False
|
|
2305
|
+
if isinstance(keys, dict):
|
|
2306
|
+
has_geo = any(v in ["2dsphere", "2d", "geoHaystack"] for v in keys.values())
|
|
2307
|
+
elif isinstance(keys, list):
|
|
2308
|
+
has_geo = any(
|
|
2309
|
+
len(k) >= 2 and k[1] in ["2dsphere", "2d", "geoHaystack"] for k in keys
|
|
2310
|
+
)
|
|
2311
|
+
if not has_geo:
|
|
2312
|
+
return (
|
|
2313
|
+
False,
|
|
2314
|
+
f"Geospatial index '{index_name}' in collection "
|
|
2315
|
+
f"'{collection_name}' must have at least one field with "
|
|
2316
|
+
f"geospatial type ('2dsphere', '2d', or 'geoHaystack') in keys",
|
|
2317
|
+
)
|
|
2318
|
+
return True, None
|
|
2319
|
+
|
|
2320
|
+
|
|
2321
|
+
def _validate_vector_search_index(
|
|
2322
|
+
index_def: Dict[str, Any], collection_name: str, index_name: str, index_type: str
|
|
2323
|
+
) -> Tuple[bool, Optional[str]]:
|
|
2324
|
+
"""Validate a vectorSearch or search index definition."""
|
|
2325
|
+
if "definition" not in index_def:
|
|
2326
|
+
return (
|
|
2327
|
+
False,
|
|
2328
|
+
f"{index_type} index '{index_name}' in collection "
|
|
2329
|
+
f"'{collection_name}' requires 'definition' field",
|
|
2330
|
+
)
|
|
2331
|
+
definition = index_def.get("definition")
|
|
2332
|
+
if not isinstance(definition, dict):
|
|
2333
|
+
return (
|
|
2334
|
+
False,
|
|
2335
|
+
f"{index_type} index '{index_name}' in collection "
|
|
2336
|
+
f"'{collection_name}' requires 'definition' to be an object",
|
|
2337
|
+
)
|
|
2338
|
+
|
|
2339
|
+
# Additional validation for vectorSearch indexes
|
|
2340
|
+
if index_type == "vectorSearch":
|
|
2341
|
+
fields = definition.get("fields", [])
|
|
2342
|
+
if not isinstance(fields, list) or len(fields) == 0:
|
|
2343
|
+
return (
|
|
2344
|
+
False,
|
|
2345
|
+
f"VectorSearch index '{index_name}' in collection "
|
|
2346
|
+
f"'{collection_name}' requires 'definition.fields' to be "
|
|
2347
|
+
f"a non-empty array",
|
|
2348
|
+
)
|
|
2349
|
+
|
|
2350
|
+
# Validate vector field dimensions
|
|
2351
|
+
for field in fields:
|
|
2352
|
+
if isinstance(field, dict) and field.get("type") == "vector":
|
|
2353
|
+
num_dims = field.get("numDimensions")
|
|
2354
|
+
if (
|
|
2355
|
+
not isinstance(num_dims, int)
|
|
2356
|
+
or num_dims < MIN_VECTOR_DIMENSIONS
|
|
2357
|
+
or num_dims > MAX_VECTOR_DIMENSIONS
|
|
2358
|
+
):
|
|
2359
|
+
return (
|
|
2360
|
+
False,
|
|
2361
|
+
f"VectorSearch index '{index_name}' in collection "
|
|
2362
|
+
f"'{collection_name}' requires 'numDimensions' "
|
|
2363
|
+
f"to be between {MIN_VECTOR_DIMENSIONS} and "
|
|
2364
|
+
f"{MAX_VECTOR_DIMENSIONS}, got: {num_dims}",
|
|
2365
|
+
)
|
|
2366
|
+
return True, None
|
|
2367
|
+
|
|
2368
|
+
|
|
2369
|
+
def _validate_hybrid_index(
|
|
2370
|
+
index_def: Dict[str, Any], collection_name: str, index_name: str
|
|
2371
|
+
) -> Tuple[bool, Optional[str]]:
|
|
2372
|
+
"""Validate a hybrid index definition."""
|
|
2373
|
+
if "hybrid" not in index_def:
|
|
2374
|
+
return (
|
|
2375
|
+
False,
|
|
2376
|
+
f"Hybrid index '{index_name}' in collection '{collection_name}' "
|
|
2377
|
+
f"requires 'hybrid' field",
|
|
2378
|
+
)
|
|
2379
|
+
hybrid = index_def.get("hybrid")
|
|
2380
|
+
if not isinstance(hybrid, dict):
|
|
2381
|
+
return (
|
|
2382
|
+
False,
|
|
2383
|
+
f"Hybrid index '{index_name}' in collection '{collection_name}' "
|
|
2384
|
+
f"requires 'hybrid' to be an object",
|
|
2385
|
+
)
|
|
2386
|
+
|
|
2387
|
+
# Validate vector_index
|
|
2388
|
+
vector_index = hybrid.get("vector_index")
|
|
2389
|
+
if not vector_index or not isinstance(vector_index, dict):
|
|
2390
|
+
return (
|
|
2391
|
+
False,
|
|
2392
|
+
f"Hybrid index '{index_name}' in collection '{collection_name}' "
|
|
2393
|
+
f"requires 'hybrid.vector_index' to be an object",
|
|
2394
|
+
)
|
|
2395
|
+
if "definition" not in vector_index:
|
|
2396
|
+
return (
|
|
2397
|
+
False,
|
|
2398
|
+
f"Hybrid index '{index_name}' in collection '{collection_name}' "
|
|
2399
|
+
f"requires 'hybrid.vector_index.definition' field",
|
|
2400
|
+
)
|
|
2401
|
+
vector_def = vector_index.get("definition")
|
|
2402
|
+
if not isinstance(vector_def, dict):
|
|
2403
|
+
return (
|
|
2404
|
+
False,
|
|
2405
|
+
f"Hybrid index '{index_name}' in collection '{collection_name}' "
|
|
2406
|
+
f"requires 'hybrid.vector_index.definition' to be an object",
|
|
2407
|
+
)
|
|
2408
|
+
|
|
2409
|
+
# Validate text_index
|
|
2410
|
+
text_index = hybrid.get("text_index")
|
|
2411
|
+
if not text_index or not isinstance(text_index, dict):
|
|
2412
|
+
return (
|
|
2413
|
+
False,
|
|
2414
|
+
f"Hybrid index '{index_name}' in collection '{collection_name}' "
|
|
2415
|
+
f"requires 'hybrid.text_index' to be an object",
|
|
2416
|
+
)
|
|
2417
|
+
if "definition" not in text_index:
|
|
2418
|
+
return (
|
|
2419
|
+
False,
|
|
2420
|
+
f"Hybrid index '{index_name}' in collection '{collection_name}' "
|
|
2421
|
+
f"requires 'hybrid.text_index.definition' field",
|
|
2422
|
+
)
|
|
2423
|
+
text_def = text_index.get("definition")
|
|
2424
|
+
if not isinstance(text_def, dict):
|
|
2425
|
+
return (
|
|
2426
|
+
False,
|
|
2427
|
+
f"Hybrid index '{index_name}' in collection '{collection_name}' "
|
|
2428
|
+
f"requires 'hybrid.text_index.definition' to be an object",
|
|
2429
|
+
)
|
|
2430
|
+
return True, None
|
|
2431
|
+
|
|
2432
|
+
|
|
2433
|
+
def validate_index_definition(
|
|
2434
|
+
index_def: Dict[str, Any], collection_name: str, index_name: str
|
|
2435
|
+
) -> Tuple[bool, Optional[str]]:
|
|
2436
|
+
"""
|
|
2437
|
+
Validate a single index definition with context-specific checks.
|
|
2438
|
+
|
|
2439
|
+
Args:
|
|
2440
|
+
index_def: The index definition to validate
|
|
2441
|
+
collection_name: Name of the collection (for error context)
|
|
2442
|
+
index_name: Name of the index (for error context)
|
|
2443
|
+
|
|
2444
|
+
Returns:
|
|
2445
|
+
Tuple of (is_valid, error_message)
|
|
2446
|
+
"""
|
|
2447
|
+
index_type = index_def.get("type")
|
|
2448
|
+
if not index_type:
|
|
2449
|
+
return (
|
|
2450
|
+
False,
|
|
2451
|
+
f"Index '{index_name}' in collection '{collection_name}' "
|
|
2452
|
+
f"is missing 'type' field",
|
|
2453
|
+
)
|
|
2454
|
+
|
|
2455
|
+
# Type-specific validation
|
|
2456
|
+
if index_type == "regular":
|
|
2457
|
+
return _validate_regular_index(index_def, collection_name, index_name)
|
|
2458
|
+
elif index_type == "ttl":
|
|
2459
|
+
return _validate_ttl_index(index_def, collection_name, index_name)
|
|
2460
|
+
elif index_type == "partial":
|
|
2461
|
+
return _validate_partial_index(index_def, collection_name, index_name)
|
|
2462
|
+
elif index_type == "text":
|
|
2463
|
+
return _validate_text_index(index_def, collection_name, index_name)
|
|
2464
|
+
elif index_type == "geospatial":
|
|
2465
|
+
return _validate_geospatial_index(index_def, collection_name, index_name)
|
|
2466
|
+
elif index_type in ("vectorSearch", "search"):
|
|
2467
|
+
return _validate_vector_search_index(
|
|
2468
|
+
index_def, collection_name, index_name, index_type
|
|
2469
|
+
)
|
|
2470
|
+
elif index_type == "hybrid":
|
|
2471
|
+
return _validate_hybrid_index(index_def, collection_name, index_name)
|
|
2472
|
+
else:
|
|
2473
|
+
return (
|
|
2474
|
+
False,
|
|
2475
|
+
f"Unknown index type '{index_type}' for index '{index_name}' "
|
|
2476
|
+
f"in collection '{collection_name}'",
|
|
2477
|
+
)
|
|
2478
|
+
|
|
2479
|
+
|
|
2480
|
+
def validate_managed_indexes(
|
|
2481
|
+
managed_indexes: Dict[str, List[Dict[str, Any]]]
|
|
2482
|
+
) -> Tuple[bool, Optional[str]]:
|
|
2483
|
+
"""
|
|
2484
|
+
Validate all managed indexes with collection and index context.
|
|
2485
|
+
|
|
2486
|
+
Args:
|
|
2487
|
+
managed_indexes: The managed_indexes object from manifest
|
|
2488
|
+
|
|
2489
|
+
Returns:
|
|
2490
|
+
Tuple of (is_valid, error_message)
|
|
2491
|
+
"""
|
|
2492
|
+
if not isinstance(managed_indexes, dict):
|
|
2493
|
+
return (
|
|
2494
|
+
False,
|
|
2495
|
+
"'managed_indexes' must be an object mapping collection names to index arrays",
|
|
2496
|
+
)
|
|
2497
|
+
|
|
2498
|
+
for collection_name, indexes in managed_indexes.items():
|
|
2499
|
+
if not isinstance(collection_name, str) or not collection_name:
|
|
2500
|
+
return (
|
|
2501
|
+
False,
|
|
2502
|
+
f"Collection name must be a non-empty string, got: {collection_name}",
|
|
2503
|
+
)
|
|
2504
|
+
|
|
2505
|
+
if not isinstance(indexes, list):
|
|
2506
|
+
return False, f"Indexes for collection '{collection_name}' must be an array"
|
|
2507
|
+
|
|
2508
|
+
if len(indexes) == 0:
|
|
2509
|
+
return False, f"Collection '{collection_name}' has an empty indexes array"
|
|
2510
|
+
|
|
2511
|
+
for idx, index_def in enumerate(indexes):
|
|
2512
|
+
if not isinstance(index_def, dict):
|
|
2513
|
+
return (
|
|
2514
|
+
False,
|
|
2515
|
+
f"Index #{idx} in collection '{collection_name}' must be an object",
|
|
2516
|
+
)
|
|
2517
|
+
|
|
2518
|
+
index_name = index_def.get("name", f"index_{idx}")
|
|
2519
|
+
is_valid, error_msg = validate_index_definition(
|
|
2520
|
+
index_def, collection_name, index_name
|
|
2521
|
+
)
|
|
2522
|
+
if not is_valid:
|
|
2523
|
+
return False, error_msg
|
|
2524
|
+
|
|
2525
|
+
return True, None
|
|
2526
|
+
|
|
2527
|
+
|
|
2528
|
+
# ============================================================================
|
|
2529
|
+
# CLASS-BASED API (Enterprise-ready)
|
|
2530
|
+
# ============================================================================
|
|
2531
|
+
|
|
2532
|
+
|
|
2533
|
+
class ManifestValidator:
|
|
2534
|
+
"""
|
|
2535
|
+
Enterprise-grade manifest validator with versioning and caching.
|
|
2536
|
+
|
|
2537
|
+
Provides a clean class-based API for manifest validation while
|
|
2538
|
+
maintaining backward compatibility with functional API.
|
|
2539
|
+
"""
|
|
2540
|
+
|
|
2541
|
+
def __init__(self, use_cache: bool = True):
|
|
2542
|
+
"""
|
|
2543
|
+
Initialize validator.
|
|
2544
|
+
|
|
2545
|
+
Args:
|
|
2546
|
+
use_cache: Whether to use validation cache (default: True)
|
|
2547
|
+
"""
|
|
2548
|
+
self.use_cache = use_cache
|
|
2549
|
+
|
|
2550
|
+
@staticmethod
|
|
2551
|
+
def validate(
|
|
2552
|
+
manifest: Dict[str, Any], use_cache: bool = True
|
|
2553
|
+
) -> Tuple[bool, Optional[str], Optional[List[str]]]:
|
|
2554
|
+
"""
|
|
2555
|
+
Validate manifest against schema.
|
|
2556
|
+
|
|
2557
|
+
Args:
|
|
2558
|
+
manifest: Manifest dictionary to validate
|
|
2559
|
+
use_cache: Whether to use validation cache
|
|
2560
|
+
|
|
2561
|
+
Returns:
|
|
2562
|
+
Tuple of (is_valid, error_message, error_paths)
|
|
2563
|
+
"""
|
|
2564
|
+
return validate_manifest(manifest, use_cache=use_cache)
|
|
2565
|
+
|
|
2566
|
+
@staticmethod
|
|
2567
|
+
async def validate_async(
|
|
2568
|
+
manifest: Dict[str, Any], use_cache: bool = True
|
|
2569
|
+
) -> Tuple[bool, Optional[str], Optional[List[str]]]:
|
|
2570
|
+
"""
|
|
2571
|
+
Validate manifest asynchronously.
|
|
2572
|
+
|
|
2573
|
+
This includes:
|
|
2574
|
+
- JSON Schema validation
|
|
2575
|
+
- Cross-field dependency validation
|
|
2576
|
+
|
|
2577
|
+
Args:
|
|
2578
|
+
manifest: Manifest dictionary to validate
|
|
2579
|
+
use_cache: Whether to use validation cache
|
|
2580
|
+
|
|
2581
|
+
Returns:
|
|
2582
|
+
Tuple of (is_valid, error_message, error_paths)
|
|
2583
|
+
"""
|
|
2584
|
+
return await _validate_manifest_async(manifest, use_cache=use_cache)
|
|
2585
|
+
|
|
2586
|
+
@staticmethod
|
|
2587
|
+
async def validate_with_db(
|
|
2588
|
+
manifest: Dict[str, Any],
|
|
2589
|
+
db_validator: Callable[[str], Awaitable[bool]],
|
|
2590
|
+
use_cache: bool = True,
|
|
2591
|
+
) -> Tuple[bool, Optional[str], Optional[List[str]]]:
|
|
2592
|
+
"""
|
|
2593
|
+
Validate manifest and check developer_id exists in database.
|
|
2594
|
+
|
|
2595
|
+
Args:
|
|
2596
|
+
manifest: Manifest dictionary to validate
|
|
2597
|
+
db_validator: Async function that checks if developer_id exists
|
|
2598
|
+
use_cache: Whether to use validation cache
|
|
2599
|
+
|
|
2600
|
+
Returns:
|
|
2601
|
+
Tuple of (is_valid, error_message, error_paths)
|
|
2602
|
+
"""
|
|
2603
|
+
return await validate_manifest_with_db(
|
|
2604
|
+
manifest, db_validator, use_cache=use_cache
|
|
2605
|
+
)
|
|
2606
|
+
|
|
2607
|
+
@staticmethod
|
|
2608
|
+
def validate_managed_indexes(
|
|
2609
|
+
managed_indexes: Dict[str, List[Dict[str, Any]]]
|
|
2610
|
+
) -> Tuple[bool, Optional[str]]:
|
|
2611
|
+
"""
|
|
2612
|
+
Validate managed indexes configuration.
|
|
2613
|
+
|
|
2614
|
+
Args:
|
|
2615
|
+
managed_indexes: Managed indexes dictionary
|
|
2616
|
+
|
|
2617
|
+
Returns:
|
|
2618
|
+
Tuple of (is_valid, error_message)
|
|
2619
|
+
"""
|
|
2620
|
+
return validate_managed_indexes(managed_indexes)
|
|
2621
|
+
|
|
2622
|
+
@staticmethod
|
|
2623
|
+
def validate_index_definition(
|
|
2624
|
+
index_def: Dict[str, Any], collection_name: str, index_name: str
|
|
2625
|
+
) -> Tuple[bool, Optional[str]]:
|
|
2626
|
+
"""
|
|
2627
|
+
Validate a single index definition.
|
|
2628
|
+
|
|
2629
|
+
Args:
|
|
2630
|
+
index_def: Index definition dictionary
|
|
2631
|
+
collection_name: Collection name for context
|
|
2632
|
+
index_name: Index name for context
|
|
2633
|
+
|
|
2634
|
+
Returns:
|
|
2635
|
+
Tuple of (is_valid, error_message)
|
|
2636
|
+
"""
|
|
2637
|
+
return validate_index_definition(index_def, collection_name, index_name)
|
|
2638
|
+
|
|
2639
|
+
@staticmethod
|
|
2640
|
+
def get_schema_version(manifest: Dict[str, Any]) -> str:
|
|
2641
|
+
"""
|
|
2642
|
+
Get schema version from manifest.
|
|
2643
|
+
|
|
2644
|
+
Args:
|
|
2645
|
+
manifest: Manifest dictionary
|
|
2646
|
+
|
|
2647
|
+
Returns:
|
|
2648
|
+
Schema version string (e.g., "1.0", "2.0")
|
|
2649
|
+
"""
|
|
2650
|
+
return get_schema_version(manifest)
|
|
2651
|
+
|
|
2652
|
+
@staticmethod
|
|
2653
|
+
def migrate(
|
|
2654
|
+
manifest: Dict[str, Any], target_version: str = CURRENT_SCHEMA_VERSION
|
|
2655
|
+
) -> Dict[str, Any]:
|
|
2656
|
+
"""
|
|
2657
|
+
Migrate manifest to target schema version.
|
|
2658
|
+
|
|
2659
|
+
Args:
|
|
2660
|
+
manifest: Manifest dictionary to migrate
|
|
2661
|
+
target_version: Target schema version
|
|
2662
|
+
|
|
2663
|
+
Returns:
|
|
2664
|
+
Migrated manifest dictionary
|
|
2665
|
+
"""
|
|
2666
|
+
return migrate_manifest(manifest, target_version)
|
|
2667
|
+
|
|
2668
|
+
@staticmethod
|
|
2669
|
+
def clear_cache():
|
|
2670
|
+
"""Clear validation cache."""
|
|
2671
|
+
clear_validation_cache()
|
|
2672
|
+
|
|
2673
|
+
|
|
2674
|
+
class ManifestParser:
|
|
2675
|
+
"""
|
|
2676
|
+
Manifest parser for loading and processing manifest files.
|
|
2677
|
+
|
|
2678
|
+
Provides utilities for loading manifests from files or dictionaries
|
|
2679
|
+
with automatic validation and migration.
|
|
2680
|
+
"""
|
|
2681
|
+
|
|
2682
|
+
def __init__(self, validator: Optional[ManifestValidator] = None):
|
|
2683
|
+
"""
|
|
2684
|
+
Initialize parser.
|
|
2685
|
+
|
|
2686
|
+
Args:
|
|
2687
|
+
validator: Optional ManifestValidator instance (creates default if None)
|
|
2688
|
+
"""
|
|
2689
|
+
self.validator = validator or ManifestValidator()
|
|
2690
|
+
|
|
2691
|
+
@staticmethod
|
|
2692
|
+
async def load_from_file(path: Any, validate: bool = True) -> Dict[str, Any]:
|
|
2693
|
+
"""
|
|
2694
|
+
Load and validate manifest from file.
|
|
2695
|
+
|
|
2696
|
+
Args:
|
|
2697
|
+
path: Path to manifest.json file (Path object or string)
|
|
2698
|
+
validate: Whether to validate after loading (default: True)
|
|
2699
|
+
|
|
2700
|
+
Returns:
|
|
2701
|
+
Manifest dictionary
|
|
2702
|
+
|
|
2703
|
+
Raises:
|
|
2704
|
+
FileNotFoundError: If file doesn't exist
|
|
2705
|
+
ValueError: If validation fails
|
|
2706
|
+
"""
|
|
2707
|
+
import json
|
|
2708
|
+
from pathlib import Path
|
|
2709
|
+
|
|
2710
|
+
path_obj = Path(path) if not isinstance(path, Path) else path
|
|
2711
|
+
|
|
2712
|
+
if not path_obj.exists():
|
|
2713
|
+
raise FileNotFoundError(f"Manifest file not found: {path_obj}")
|
|
2714
|
+
|
|
2715
|
+
# Read file
|
|
2716
|
+
content = path_obj.read_text(encoding="utf-8")
|
|
2717
|
+
manifest_data = json.loads(content)
|
|
2718
|
+
|
|
2719
|
+
# Validate if requested
|
|
2720
|
+
if validate:
|
|
2721
|
+
is_valid, error, paths = ManifestValidator.validate(manifest_data)
|
|
2722
|
+
if not is_valid:
|
|
2723
|
+
error_path_str = (
|
|
2724
|
+
f" (errors in: {', '.join(paths[:3])})" if paths else ""
|
|
2725
|
+
)
|
|
2726
|
+
raise ValueError(f"Manifest validation failed: {error}{error_path_str}")
|
|
2727
|
+
|
|
2728
|
+
return manifest_data
|
|
2729
|
+
|
|
2730
|
+
@staticmethod
|
|
2731
|
+
async def load_from_dict(
|
|
2732
|
+
data: Dict[str, Any], validate: bool = True
|
|
2733
|
+
) -> Dict[str, Any]:
|
|
2734
|
+
"""
|
|
2735
|
+
Load and validate manifest from dictionary.
|
|
2736
|
+
|
|
2737
|
+
Args:
|
|
2738
|
+
data: Manifest dictionary
|
|
2739
|
+
validate: Whether to validate (default: True)
|
|
2740
|
+
|
|
2741
|
+
Returns:
|
|
2742
|
+
Validated manifest dictionary
|
|
2743
|
+
|
|
2744
|
+
Raises:
|
|
2745
|
+
ValueError: If validation fails
|
|
2746
|
+
"""
|
|
2747
|
+
# Validate if requested
|
|
2748
|
+
if validate:
|
|
2749
|
+
is_valid, error, paths = ManifestValidator.validate(data)
|
|
2750
|
+
if not is_valid:
|
|
2751
|
+
error_path_str = (
|
|
2752
|
+
f" (errors in: {', '.join(paths[:3])})" if paths else ""
|
|
2753
|
+
)
|
|
2754
|
+
raise ValueError(f"Manifest validation failed: {error}{error_path_str}")
|
|
2755
|
+
|
|
2756
|
+
return data.copy()
|
|
2757
|
+
|
|
2758
|
+
@staticmethod
|
|
2759
|
+
async def load_from_string(content: str, validate: bool = True) -> Dict[str, Any]:
|
|
2760
|
+
"""
|
|
2761
|
+
Load and validate manifest from JSON string.
|
|
2762
|
+
|
|
2763
|
+
Args:
|
|
2764
|
+
content: JSON string content
|
|
2765
|
+
validate: Whether to validate (default: True)
|
|
2766
|
+
|
|
2767
|
+
Returns:
|
|
2768
|
+
Manifest dictionary
|
|
2769
|
+
|
|
2770
|
+
Raises:
|
|
2771
|
+
json.JSONDecodeError: If JSON is invalid
|
|
2772
|
+
ValueError: If validation fails
|
|
2773
|
+
"""
|
|
2774
|
+
import json
|
|
2775
|
+
|
|
2776
|
+
manifest_data = json.loads(content)
|
|
2777
|
+
return await ManifestParser.load_from_dict(manifest_data, validate=validate)
|
|
2778
|
+
|
|
2779
|
+
@staticmethod
|
|
2780
|
+
async def load_and_migrate(
|
|
2781
|
+
manifest: Dict[str, Any], target_version: str = CURRENT_SCHEMA_VERSION
|
|
2782
|
+
) -> Dict[str, Any]:
|
|
2783
|
+
"""
|
|
2784
|
+
Load manifest and migrate to target version.
|
|
2785
|
+
|
|
2786
|
+
Args:
|
|
2787
|
+
manifest: Manifest dictionary
|
|
2788
|
+
target_version: Target schema version
|
|
2789
|
+
|
|
2790
|
+
Returns:
|
|
2791
|
+
Migrated manifest dictionary
|
|
2792
|
+
"""
|
|
2793
|
+
return ManifestValidator.migrate(manifest, target_version)
|