@intentsolutionsio/nosql-data-modeler 1.0.0 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/nosql-agent.md +28 -0
- package/package.json +1 -1
- package/skills/modeling-nosql-data/SKILL.md +11 -6
- package/skills/modeling-nosql-data/references/README.md +0 -1
- package/skills/modeling-nosql-data/scripts/generate_sample_data.py +16 -49
- package/skills/modeling-nosql-data/scripts/migrate_schema.py +36 -81
- package/skills/modeling-nosql-data/scripts/validate_schema.py +92 -98
package/agents/nosql-agent.md
CHANGED
|
@@ -1,6 +1,34 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: nosql-agent
|
|
3
3
|
description: Design NoSQL data models
|
|
4
|
+
tools:
|
|
5
|
+
- Read
|
|
6
|
+
- Write
|
|
7
|
+
- Edit
|
|
8
|
+
- Bash
|
|
9
|
+
- Glob
|
|
10
|
+
- Grep
|
|
11
|
+
- WebFetch
|
|
12
|
+
- WebSearch
|
|
13
|
+
- Task
|
|
14
|
+
- TodoWrite
|
|
15
|
+
model: sonnet
|
|
16
|
+
color: purple
|
|
17
|
+
version: 1.0.0
|
|
18
|
+
author: Jeremy Longshore <jeremy@intentsolutions.io>
|
|
19
|
+
tags:
|
|
20
|
+
- database
|
|
21
|
+
- nosql
|
|
22
|
+
disallowedTools: []
|
|
23
|
+
skills: []
|
|
24
|
+
background: false
|
|
25
|
+
# ── upgrade levers — uncomment + set when tuning this agent ──
|
|
26
|
+
# effort: high # reasoning depth: low/medium/high/xhigh/max (omit = inherit session)
|
|
27
|
+
# maxTurns: 50 # cap the agentic loop (omit = engine default)
|
|
28
|
+
# memory: project # persistent scope: user/project/local (omit = ephemeral)
|
|
29
|
+
# isolation: worktree # run in an isolated git worktree
|
|
30
|
+
# initialPrompt: "…" # seed the agent's first turn
|
|
31
|
+
# hooks / mcpServers / permissionMode → set at the PLUGIN level, not on a plugin agent
|
|
4
32
|
---
|
|
5
33
|
# NoSQL Data Modeler
|
|
6
34
|
|
package/package.json
CHANGED
|
@@ -1,17 +1,22 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: modeling-nosql-data
|
|
3
|
-
description:
|
|
4
|
-
|
|
3
|
+
description: 'Build use when you need to work with NoSQL data modeling.
|
|
4
|
+
|
|
5
5
|
This skill provides NoSQL database design with comprehensive guidance and automation.
|
|
6
|
+
|
|
6
7
|
Trigger with phrases like "model NoSQL data", "design document structure",
|
|
8
|
+
|
|
7
9
|
or "optimize NoSQL schema".
|
|
8
10
|
|
|
11
|
+
'
|
|
9
12
|
allowed-tools: Read, Write, Edit, Grep, Glob, Bash(psql:*), Bash(mysql:*), Bash(mongosh:*)
|
|
10
13
|
version: 1.0.0
|
|
11
14
|
author: Jeremy Longshore <jeremy@intentsolutions.io>
|
|
12
15
|
license: MIT
|
|
13
|
-
|
|
14
|
-
|
|
16
|
+
tags:
|
|
17
|
+
- database
|
|
18
|
+
- modeling-nosql
|
|
19
|
+
compatibility: Designed for Claude Code, also compatible with Codex and OpenClaw
|
|
15
20
|
---
|
|
16
21
|
# NoSQL Data Modeler
|
|
17
22
|
|
|
@@ -81,6 +86,6 @@ Design data models for NoSQL databases including MongoDB (document), DynamoDB (k
|
|
|
81
86
|
|
|
82
87
|
- MongoDB data modeling patterns: https://www.mongodb.com/docs/manual/data-modeling/
|
|
83
88
|
- DynamoDB single-table design: https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/bp-modeling-nosql.html
|
|
84
|
-
- Cassandra data modeling guide:
|
|
89
|
+
- Cassandra data modeling guide:
|
|
85
90
|
- Redis data structures: https://redis.io/docs/data-types/
|
|
86
|
-
- NoSQL design patterns catalog:
|
|
91
|
+
- NoSQL design patterns catalog:
|
|
@@ -11,8 +11,7 @@ import json
|
|
|
11
11
|
import random
|
|
12
12
|
import sys
|
|
13
13
|
from datetime import datetime, timedelta
|
|
14
|
-
from
|
|
15
|
-
from typing import Dict, List, Any, Union
|
|
14
|
+
from typing import Dict, List, Any
|
|
16
15
|
from uuid import uuid4
|
|
17
16
|
|
|
18
17
|
|
|
@@ -79,10 +78,7 @@ class SampleDataGenerator:
|
|
|
79
78
|
count = random.randint(min_items, max_items)
|
|
80
79
|
|
|
81
80
|
if "items" in field_def:
|
|
82
|
-
return [
|
|
83
|
-
SampleDataGenerator.generate_value(field_def["items"])
|
|
84
|
-
for _ in range(count)
|
|
85
|
-
]
|
|
81
|
+
return [SampleDataGenerator.generate_value(field_def["items"]) for _ in range(count)]
|
|
86
82
|
else:
|
|
87
83
|
return [f"item{i}" for i in range(count)]
|
|
88
84
|
|
|
@@ -147,7 +143,7 @@ class SampleDataGenerator:
|
|
|
147
143
|
True if successful, False otherwise
|
|
148
144
|
"""
|
|
149
145
|
try:
|
|
150
|
-
with open(filepath,
|
|
146
|
+
with open(filepath, "w") as f:
|
|
151
147
|
json.dump(self.generated_data, f, indent=2)
|
|
152
148
|
return True
|
|
153
149
|
except Exception as e:
|
|
@@ -165,9 +161,9 @@ class SampleDataGenerator:
|
|
|
165
161
|
True if successful, False otherwise
|
|
166
162
|
"""
|
|
167
163
|
try:
|
|
168
|
-
with open(filepath,
|
|
164
|
+
with open(filepath, "w") as f:
|
|
169
165
|
for doc in self.generated_data:
|
|
170
|
-
f.write(json.dumps(doc) +
|
|
166
|
+
f.write(json.dumps(doc) + "\n")
|
|
171
167
|
return True
|
|
172
168
|
except Exception as e:
|
|
173
169
|
print(f"Error exporting JSONL: {e}", file=sys.stderr)
|
|
@@ -200,7 +196,7 @@ class SampleDataGenerator:
|
|
|
200
196
|
|
|
201
197
|
all_keys = sorted(list(all_keys))
|
|
202
198
|
|
|
203
|
-
with open(filepath,
|
|
199
|
+
with open(filepath, "w", newline="") as f:
|
|
204
200
|
writer = csv.DictWriter(f, fieldnames=all_keys)
|
|
205
201
|
writer.writeheader()
|
|
206
202
|
writer.writerows(flattened)
|
|
@@ -210,7 +206,7 @@ class SampleDataGenerator:
|
|
|
210
206
|
print(f"Error exporting CSV: {e}", file=sys.stderr)
|
|
211
207
|
return False
|
|
212
208
|
|
|
213
|
-
def _flatten_dict(self, d: Dict, parent_key: str =
|
|
209
|
+
def _flatten_dict(self, d: Dict, parent_key: str = "", sep: str = ".") -> Dict:
|
|
214
210
|
"""
|
|
215
211
|
Flatten nested dictionary.
|
|
216
212
|
|
|
@@ -252,7 +248,7 @@ def load_schema(filepath: str) -> Dict[str, Any]:
|
|
|
252
248
|
json.JSONDecodeError: If file is not valid JSON
|
|
253
249
|
"""
|
|
254
250
|
try:
|
|
255
|
-
with open(filepath,
|
|
251
|
+
with open(filepath, "r") as f:
|
|
256
252
|
return json.load(f)
|
|
257
253
|
except FileNotFoundError:
|
|
258
254
|
print(f"Error: Schema file not found: {filepath}", file=sys.stderr)
|
|
@@ -280,45 +276,16 @@ Examples:
|
|
|
280
276
|
|
|
281
277
|
# Print to stdout
|
|
282
278
|
%(prog)s --schema schema.json --count 5 --print
|
|
283
|
-
"""
|
|
279
|
+
""",
|
|
284
280
|
)
|
|
285
281
|
|
|
286
|
-
parser.add_argument(
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
)
|
|
291
|
-
parser.add_argument(
|
|
292
|
-
|
|
293
|
-
type=int,
|
|
294
|
-
default=10,
|
|
295
|
-
help="Number of sample documents to generate (default: 10)"
|
|
296
|
-
)
|
|
297
|
-
parser.add_argument(
|
|
298
|
-
"--format",
|
|
299
|
-
default="json",
|
|
300
|
-
choices=["json", "jsonl", "csv"],
|
|
301
|
-
help="Output format"
|
|
302
|
-
)
|
|
303
|
-
parser.add_argument(
|
|
304
|
-
"--output",
|
|
305
|
-
help="Output file path"
|
|
306
|
-
)
|
|
307
|
-
parser.add_argument(
|
|
308
|
-
"--print",
|
|
309
|
-
action="store_true",
|
|
310
|
-
help="Print generated data to stdout"
|
|
311
|
-
)
|
|
312
|
-
parser.add_argument(
|
|
313
|
-
"--seed",
|
|
314
|
-
type=int,
|
|
315
|
-
help="Random seed for reproducible data"
|
|
316
|
-
)
|
|
317
|
-
parser.add_argument(
|
|
318
|
-
"--verbose",
|
|
319
|
-
action="store_true",
|
|
320
|
-
help="Print detailed output"
|
|
321
|
-
)
|
|
282
|
+
parser.add_argument("--schema", required=True, help="Path to JSON schema file")
|
|
283
|
+
parser.add_argument("--count", type=int, default=10, help="Number of sample documents to generate (default: 10)")
|
|
284
|
+
parser.add_argument("--format", default="json", choices=["json", "jsonl", "csv"], help="Output format")
|
|
285
|
+
parser.add_argument("--output", help="Output file path")
|
|
286
|
+
parser.add_argument("--print", action="store_true", help="Print generated data to stdout")
|
|
287
|
+
parser.add_argument("--seed", type=int, help="Random seed for reproducible data")
|
|
288
|
+
parser.add_argument("--verbose", action="store_true", help="Print detailed output")
|
|
322
289
|
|
|
323
290
|
args = parser.parse_args()
|
|
324
291
|
|
|
@@ -9,9 +9,7 @@ This script converts schema definitions between different NoSQL database formats
|
|
|
9
9
|
import argparse
|
|
10
10
|
import json
|
|
11
11
|
import sys
|
|
12
|
-
from
|
|
13
|
-
from pathlib import Path
|
|
14
|
-
from typing import Dict, List, Any, Optional, Tuple
|
|
12
|
+
from typing import Dict, List, Any
|
|
15
13
|
|
|
16
14
|
|
|
17
15
|
class SchemaTransformer:
|
|
@@ -30,8 +28,8 @@ class SchemaTransformer:
|
|
|
30
28
|
"date": "date",
|
|
31
29
|
"object": "object",
|
|
32
30
|
"array": "array",
|
|
33
|
-
"null": "null"
|
|
34
|
-
}
|
|
31
|
+
"null": "null",
|
|
32
|
+
},
|
|
35
33
|
},
|
|
36
34
|
"dynamodb": {
|
|
37
35
|
"name": "DynamoDB",
|
|
@@ -44,8 +42,8 @@ class SchemaTransformer:
|
|
|
44
42
|
"date": "S",
|
|
45
43
|
"object": "M",
|
|
46
44
|
"array": "L",
|
|
47
|
-
"binary": "B"
|
|
48
|
-
}
|
|
45
|
+
"binary": "B",
|
|
46
|
+
},
|
|
49
47
|
},
|
|
50
48
|
"firestore": {
|
|
51
49
|
"name": "Firestore",
|
|
@@ -58,8 +56,8 @@ class SchemaTransformer:
|
|
|
58
56
|
"date": "timestampValue",
|
|
59
57
|
"object": "mapValue",
|
|
60
58
|
"array": "arrayValue",
|
|
61
|
-
"null": "nullValue"
|
|
62
|
-
}
|
|
59
|
+
"null": "nullValue",
|
|
60
|
+
},
|
|
63
61
|
},
|
|
64
62
|
"cosmosdb": {
|
|
65
63
|
"name": "Azure Cosmos DB",
|
|
@@ -71,9 +69,9 @@ class SchemaTransformer:
|
|
|
71
69
|
"boolean": "boolean",
|
|
72
70
|
"date": "date",
|
|
73
71
|
"object": "object",
|
|
74
|
-
"array": "array"
|
|
75
|
-
}
|
|
76
|
-
}
|
|
72
|
+
"array": "array",
|
|
73
|
+
},
|
|
74
|
+
},
|
|
77
75
|
}
|
|
78
76
|
|
|
79
77
|
def __init__(self, source_type: str, target_type: str):
|
|
@@ -129,7 +127,7 @@ class SchemaTransformer:
|
|
|
129
127
|
parsed = {
|
|
130
128
|
"name": schema.get("$id", schema.get("title", "Schema")),
|
|
131
129
|
"description": schema.get("description", ""),
|
|
132
|
-
"fields": {}
|
|
130
|
+
"fields": {},
|
|
133
131
|
}
|
|
134
132
|
|
|
135
133
|
# Extract fields based on source type
|
|
@@ -158,7 +156,7 @@ class SchemaTransformer:
|
|
|
158
156
|
"type": field_def.get("type", "string"),
|
|
159
157
|
"description": field_def.get("description", ""),
|
|
160
158
|
"required": "required" in str(field_def),
|
|
161
|
-
"indexed": field_def.get("indexed", False)
|
|
159
|
+
"indexed": field_def.get("indexed", False),
|
|
162
160
|
}
|
|
163
161
|
|
|
164
162
|
return fields
|
|
@@ -172,7 +170,7 @@ class SchemaTransformer:
|
|
|
172
170
|
for attr in schema["AttributeDefinitions"]:
|
|
173
171
|
fields[attr["AttributeName"]] = {
|
|
174
172
|
"type": self._map_dynamodb_type(attr["AttributeType"]),
|
|
175
|
-
"required": attr["AttributeName"] in schema.get("KeySchema", [])
|
|
173
|
+
"required": attr["AttributeName"] in schema.get("KeySchema", []),
|
|
176
174
|
}
|
|
177
175
|
|
|
178
176
|
return fields
|
|
@@ -184,10 +182,7 @@ class SchemaTransformer:
|
|
|
184
182
|
if "fields" in schema:
|
|
185
183
|
for field_name, field_def in schema["fields"].items():
|
|
186
184
|
field_type = self._extract_firestore_type(field_def)
|
|
187
|
-
fields[field_name] = {
|
|
188
|
-
"type": field_type,
|
|
189
|
-
"indexed": field_def.get("indexed", False)
|
|
190
|
-
}
|
|
185
|
+
fields[field_name] = {"type": field_type, "indexed": field_def.get("indexed", False)}
|
|
191
186
|
|
|
192
187
|
return fields
|
|
193
188
|
|
|
@@ -211,7 +206,7 @@ class SchemaTransformer:
|
|
|
211
206
|
for field_name, field_info in parsed["fields"].items():
|
|
212
207
|
properties[field_name] = {
|
|
213
208
|
"type": field_info.get("type", "string"),
|
|
214
|
-
"description": field_info.get("description", "")
|
|
209
|
+
"description": field_info.get("description", ""),
|
|
215
210
|
}
|
|
216
211
|
|
|
217
212
|
if field_info.get("indexed"):
|
|
@@ -222,7 +217,7 @@ class SchemaTransformer:
|
|
|
222
217
|
"$id": parsed["name"],
|
|
223
218
|
"title": parsed["name"],
|
|
224
219
|
"type": "object",
|
|
225
|
-
"properties": properties
|
|
220
|
+
"properties": properties,
|
|
226
221
|
}
|
|
227
222
|
|
|
228
223
|
return schema
|
|
@@ -235,18 +230,13 @@ class SchemaTransformer:
|
|
|
235
230
|
field_type = field_info.get("type", "string")
|
|
236
231
|
dynamo_type = self._map_type_to_dynamodb(field_type)
|
|
237
232
|
|
|
238
|
-
attributes.append({
|
|
239
|
-
"AttributeName": field_name,
|
|
240
|
-
"AttributeType": dynamo_type
|
|
241
|
-
})
|
|
233
|
+
attributes.append({"AttributeName": field_name, "AttributeType": dynamo_type})
|
|
242
234
|
|
|
243
235
|
schema = {
|
|
244
236
|
"TableName": parsed["name"],
|
|
245
237
|
"AttributeDefinitions": attributes,
|
|
246
|
-
"KeySchema": [
|
|
247
|
-
|
|
248
|
-
],
|
|
249
|
-
"BillingMode": "PAY_PER_REQUEST"
|
|
238
|
+
"KeySchema": [{"AttributeName": "id", "KeyType": "HASH"}],
|
|
239
|
+
"BillingMode": "PAY_PER_REQUEST",
|
|
250
240
|
}
|
|
251
241
|
|
|
252
242
|
return schema
|
|
@@ -258,17 +248,12 @@ class SchemaTransformer:
|
|
|
258
248
|
for field_name, field_info in parsed["fields"].items():
|
|
259
249
|
field_type = field_info.get("type", "string")
|
|
260
250
|
|
|
261
|
-
fields[field_name] = {
|
|
262
|
-
field_type + "Value": self._get_firestore_default(field_type)
|
|
263
|
-
}
|
|
251
|
+
fields[field_name] = {field_type + "Value": self._get_firestore_default(field_type)}
|
|
264
252
|
|
|
265
253
|
if field_info.get("indexed"):
|
|
266
254
|
fields[field_name]["indexed"] = True
|
|
267
255
|
|
|
268
|
-
schema = {
|
|
269
|
-
"name": f"projects/PROJECT_ID/databases/(default)/documents/{parsed['name']}",
|
|
270
|
-
"fields": fields
|
|
271
|
-
}
|
|
256
|
+
schema = {"name": f"projects/PROJECT_ID/databases/(default)/documents/{parsed['name']}", "fields": fields}
|
|
272
257
|
|
|
273
258
|
return schema
|
|
274
259
|
|
|
@@ -285,7 +270,7 @@ class SchemaTransformer:
|
|
|
285
270
|
"boolean": "BOOL",
|
|
286
271
|
"date": "S",
|
|
287
272
|
"object": "M",
|
|
288
|
-
"array": "L"
|
|
273
|
+
"array": "L",
|
|
289
274
|
}
|
|
290
275
|
return type_map.get(source_type, "S")
|
|
291
276
|
|
|
@@ -300,7 +285,7 @@ class SchemaTransformer:
|
|
|
300
285
|
"BS": "binary",
|
|
301
286
|
"M": "object",
|
|
302
287
|
"L": "array",
|
|
303
|
-
"BOOL": "boolean"
|
|
288
|
+
"BOOL": "boolean",
|
|
304
289
|
}
|
|
305
290
|
return type_map.get(dynamo_type, "string")
|
|
306
291
|
|
|
@@ -316,7 +301,7 @@ class SchemaTransformer:
|
|
|
316
301
|
"boolean": "boolean",
|
|
317
302
|
"timestamp": "date",
|
|
318
303
|
"map": "object",
|
|
319
|
-
"array": "array"
|
|
304
|
+
"array": "array",
|
|
320
305
|
}
|
|
321
306
|
return type_map.get(type_name, "string")
|
|
322
307
|
|
|
@@ -324,22 +309,14 @@ class SchemaTransformer:
|
|
|
324
309
|
|
|
325
310
|
def _get_firestore_default(self, field_type: str) -> Any:
|
|
326
311
|
"""Get default value for Firestore field type."""
|
|
327
|
-
defaults = {
|
|
328
|
-
"string": "",
|
|
329
|
-
"number": 0,
|
|
330
|
-
"integer": 0,
|
|
331
|
-
"boolean": False,
|
|
332
|
-
"date": "",
|
|
333
|
-
"object": {},
|
|
334
|
-
"array": []
|
|
335
|
-
}
|
|
312
|
+
defaults = {"string": "", "number": 0, "integer": 0, "boolean": False, "date": "", "object": {}, "array": []}
|
|
336
313
|
return defaults.get(field_type)
|
|
337
314
|
|
|
338
315
|
|
|
339
316
|
def load_schema(filepath: str) -> Dict[str, Any]:
|
|
340
317
|
"""Load schema from JSON file."""
|
|
341
318
|
try:
|
|
342
|
-
with open(filepath,
|
|
319
|
+
with open(filepath, "r") as f:
|
|
343
320
|
return json.load(f)
|
|
344
321
|
except FileNotFoundError:
|
|
345
322
|
print(f"Error: Schema file not found: {filepath}", file=sys.stderr)
|
|
@@ -360,7 +337,7 @@ def main():
|
|
|
360
337
|
description="Migrate NoSQL schema between database types",
|
|
361
338
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
362
339
|
epilog=f"""
|
|
363
|
-
Supported database types: {
|
|
340
|
+
Supported database types: {", ".join(get_supported_databases())}
|
|
364
341
|
|
|
365
342
|
Examples:
|
|
366
343
|
# MongoDB to DynamoDB
|
|
@@ -374,37 +351,15 @@ Examples:
|
|
|
374
351
|
|
|
375
352
|
# List supported types
|
|
376
353
|
%(prog)s --list-types
|
|
377
|
-
"""
|
|
354
|
+
""",
|
|
378
355
|
)
|
|
379
356
|
|
|
380
|
-
parser.add_argument(
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
)
|
|
385
|
-
parser.add_argument(
|
|
386
|
-
"--to",
|
|
387
|
-
dest="target_type",
|
|
388
|
-
help="Target database type"
|
|
389
|
-
)
|
|
390
|
-
parser.add_argument(
|
|
391
|
-
"--schema",
|
|
392
|
-
help="Path to source schema file"
|
|
393
|
-
)
|
|
394
|
-
parser.add_argument(
|
|
395
|
-
"--output",
|
|
396
|
-
help="Output file for migrated schema"
|
|
397
|
-
)
|
|
398
|
-
parser.add_argument(
|
|
399
|
-
"--list-types",
|
|
400
|
-
action="store_true",
|
|
401
|
-
help="List supported database types"
|
|
402
|
-
)
|
|
403
|
-
parser.add_argument(
|
|
404
|
-
"--verbose",
|
|
405
|
-
action="store_true",
|
|
406
|
-
help="Print detailed output"
|
|
407
|
-
)
|
|
357
|
+
parser.add_argument("--from", dest="source_type", help="Source database type")
|
|
358
|
+
parser.add_argument("--to", dest="target_type", help="Target database type")
|
|
359
|
+
parser.add_argument("--schema", help="Path to source schema file")
|
|
360
|
+
parser.add_argument("--output", help="Output file for migrated schema")
|
|
361
|
+
parser.add_argument("--list-types", action="store_true", help="List supported database types")
|
|
362
|
+
parser.add_argument("--verbose", action="store_true", help="Print detailed output")
|
|
408
363
|
|
|
409
364
|
args = parser.parse_args()
|
|
410
365
|
|
|
@@ -437,11 +392,11 @@ Examples:
|
|
|
437
392
|
|
|
438
393
|
# Save to file if requested
|
|
439
394
|
if args.output:
|
|
440
|
-
with open(args.output,
|
|
395
|
+
with open(args.output, "w") as f:
|
|
441
396
|
f.write(output_json)
|
|
442
397
|
|
|
443
398
|
if args.verbose:
|
|
444
|
-
print(
|
|
399
|
+
print("✓ Schema migrated successfully", file=sys.stderr)
|
|
445
400
|
print(f"✓ Saved to {args.output}", file=sys.stderr)
|
|
446
401
|
|
|
447
402
|
sys.exit(0)
|
|
@@ -10,8 +10,7 @@ import argparse
|
|
|
10
10
|
import json
|
|
11
11
|
import sys
|
|
12
12
|
from datetime import datetime
|
|
13
|
-
from
|
|
14
|
-
from typing import Dict, List, Any, Tuple
|
|
13
|
+
from typing import Dict, List, Any
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
class NoSQLSchemaValidator:
|
|
@@ -24,41 +23,41 @@ class NoSQLSchemaValidator:
|
|
|
24
23
|
"checks": [
|
|
25
24
|
"Use camelCase or snake_case consistently",
|
|
26
25
|
"Avoid single-letter field names",
|
|
27
|
-
"Use descriptive names"
|
|
28
|
-
]
|
|
26
|
+
"Use descriptive names",
|
|
27
|
+
],
|
|
29
28
|
},
|
|
30
29
|
"indexing": {
|
|
31
30
|
"rule": "Index strategy is defined",
|
|
32
31
|
"checks": [
|
|
33
32
|
"Frequently queried fields are indexed",
|
|
34
33
|
"Composite indexes are defined for common queries",
|
|
35
|
-
"Index overhead is considered"
|
|
36
|
-
]
|
|
34
|
+
"Index overhead is considered",
|
|
35
|
+
],
|
|
37
36
|
},
|
|
38
37
|
"denormalization": {
|
|
39
38
|
"rule": "Denormalization is used appropriately",
|
|
40
39
|
"checks": [
|
|
41
40
|
"Denormalization reduces query complexity",
|
|
42
41
|
"Duplicate data is managed intentionally",
|
|
43
|
-
"Update patterns are considered"
|
|
44
|
-
]
|
|
42
|
+
"Update patterns are considered",
|
|
43
|
+
],
|
|
45
44
|
},
|
|
46
45
|
"data_types": {
|
|
47
46
|
"rule": "Data types are appropriate",
|
|
48
47
|
"checks": [
|
|
49
48
|
"Numeric fields use appropriate numeric types",
|
|
50
49
|
"Dates use datetime types",
|
|
51
|
-
"IDs use consistent types"
|
|
52
|
-
]
|
|
50
|
+
"IDs use consistent types",
|
|
51
|
+
],
|
|
53
52
|
},
|
|
54
53
|
"document_size": {
|
|
55
54
|
"rule": "Document size is reasonable",
|
|
56
55
|
"checks": [
|
|
57
56
|
"Documents don't exceed size limits (16MB for MongoDB)",
|
|
58
57
|
"Array fields don't grow unbounded",
|
|
59
|
-
"Large nested objects are avoided"
|
|
60
|
-
]
|
|
61
|
-
}
|
|
58
|
+
"Large nested objects are avoided",
|
|
59
|
+
],
|
|
60
|
+
},
|
|
62
61
|
}
|
|
63
62
|
|
|
64
63
|
# Common anti-patterns
|
|
@@ -67,38 +66,38 @@ class NoSQLSchemaValidator:
|
|
|
67
66
|
"name": "unbounded_arrays",
|
|
68
67
|
"description": "Arrays that can grow without limits",
|
|
69
68
|
"severity": "high",
|
|
70
|
-
"recommendation": "Cap array size or use separate collections"
|
|
69
|
+
"recommendation": "Cap array size or use separate collections",
|
|
71
70
|
},
|
|
72
71
|
{
|
|
73
72
|
"name": "deeply_nested",
|
|
74
73
|
"description": "Deeply nested document structures (>3 levels)",
|
|
75
74
|
"severity": "medium",
|
|
76
|
-
"recommendation": "Flatten structure or normalize data"
|
|
75
|
+
"recommendation": "Flatten structure or normalize data",
|
|
77
76
|
},
|
|
78
77
|
{
|
|
79
78
|
"name": "no_indexes",
|
|
80
79
|
"description": "Frequently queried fields without indexes",
|
|
81
80
|
"severity": "high",
|
|
82
|
-
"recommendation": "Add indexes for query performance"
|
|
81
|
+
"recommendation": "Add indexes for query performance",
|
|
83
82
|
},
|
|
84
83
|
{
|
|
85
84
|
"name": "inconsistent_types",
|
|
86
85
|
"description": "Field with inconsistent data types across documents",
|
|
87
86
|
"severity": "medium",
|
|
88
|
-
"recommendation": "Enforce schema validation or add type hints"
|
|
87
|
+
"recommendation": "Enforce schema validation or add type hints",
|
|
89
88
|
},
|
|
90
89
|
{
|
|
91
90
|
"name": "circular_references",
|
|
92
91
|
"description": "Circular document references",
|
|
93
92
|
"severity": "high",
|
|
94
|
-
"recommendation": "Use one-way references or denormalization"
|
|
93
|
+
"recommendation": "Use one-way references or denormalization",
|
|
95
94
|
},
|
|
96
95
|
{
|
|
97
96
|
"name": "missing_ids",
|
|
98
97
|
"description": "Documents or arrays without ID fields",
|
|
99
98
|
"severity": "high",
|
|
100
|
-
"recommendation": "Add unique IDs for referencing"
|
|
101
|
-
}
|
|
99
|
+
"recommendation": "Add unique IDs for referencing",
|
|
100
|
+
},
|
|
102
101
|
]
|
|
103
102
|
|
|
104
103
|
def __init__(self):
|
|
@@ -117,7 +116,7 @@ class NoSQLSchemaValidator:
|
|
|
117
116
|
True if successful, False otherwise
|
|
118
117
|
"""
|
|
119
118
|
try:
|
|
120
|
-
with open(filepath,
|
|
119
|
+
with open(filepath, "r") as f:
|
|
121
120
|
self.schema = json.load(f)
|
|
122
121
|
return True
|
|
123
122
|
except (FileNotFoundError, json.JSONDecodeError) as e:
|
|
@@ -147,12 +146,14 @@ class NoSQLSchemaValidator:
|
|
|
147
146
|
# Check for inconsistency
|
|
148
147
|
non_zero_styles = [count for count in naming_styles.values() if count > 0]
|
|
149
148
|
if len(non_zero_styles) > 1:
|
|
150
|
-
issues.append(
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
149
|
+
issues.append(
|
|
150
|
+
{
|
|
151
|
+
"severity": "medium",
|
|
152
|
+
"type": "naming_inconsistency",
|
|
153
|
+
"message": "Inconsistent naming convention across fields",
|
|
154
|
+
"details": naming_styles,
|
|
155
|
+
}
|
|
156
|
+
)
|
|
156
157
|
|
|
157
158
|
return issues
|
|
158
159
|
|
|
@@ -166,21 +167,25 @@ class NoSQLSchemaValidator:
|
|
|
166
167
|
issues = []
|
|
167
168
|
|
|
168
169
|
if "indexes" not in self.schema:
|
|
169
|
-
issues.append(
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
170
|
+
issues.append(
|
|
171
|
+
{
|
|
172
|
+
"severity": "medium",
|
|
173
|
+
"type": "missing_indexes",
|
|
174
|
+
"message": "No indexes defined in schema",
|
|
175
|
+
"recommendation": "Define indexes for frequently queried fields",
|
|
176
|
+
}
|
|
177
|
+
)
|
|
175
178
|
else:
|
|
176
179
|
indexes = self.schema.get("indexes", [])
|
|
177
180
|
if not indexes:
|
|
178
|
-
issues.append(
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
181
|
+
issues.append(
|
|
182
|
+
{
|
|
183
|
+
"severity": "medium",
|
|
184
|
+
"type": "empty_indexes",
|
|
185
|
+
"message": "Indexes array is empty",
|
|
186
|
+
"recommendation": "Add indexes for query optimization",
|
|
187
|
+
}
|
|
188
|
+
)
|
|
184
189
|
|
|
185
190
|
return issues
|
|
186
191
|
|
|
@@ -197,23 +202,27 @@ class NoSQLSchemaValidator:
|
|
|
197
202
|
for field, field_def in self._extract_fields(self.schema).items():
|
|
198
203
|
if field_def.get("type") == "array":
|
|
199
204
|
if "max_items" not in field_def:
|
|
200
|
-
issues.append(
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
205
|
+
issues.append(
|
|
206
|
+
{
|
|
207
|
+
"severity": "high",
|
|
208
|
+
"type": "unbounded_array",
|
|
209
|
+
"field": field,
|
|
210
|
+
"message": f"Array field '{field}' has no maximum size limit",
|
|
211
|
+
"recommendation": "Set max_items or use separate collection",
|
|
212
|
+
}
|
|
213
|
+
)
|
|
207
214
|
|
|
208
215
|
# Check for deeply nested structures
|
|
209
216
|
depth = self._calculate_nesting_depth(self.schema)
|
|
210
217
|
if depth > 3:
|
|
211
|
-
issues.append(
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
218
|
+
issues.append(
|
|
219
|
+
{
|
|
220
|
+
"severity": "medium",
|
|
221
|
+
"type": "deeply_nested",
|
|
222
|
+
"message": f"Document nesting depth is {depth} levels (recommended: ≤3)",
|
|
223
|
+
"recommendation": "Flatten structure or normalize data",
|
|
224
|
+
}
|
|
225
|
+
)
|
|
217
226
|
|
|
218
227
|
return issues
|
|
219
228
|
|
|
@@ -233,21 +242,25 @@ class NoSQLSchemaValidator:
|
|
|
233
242
|
|
|
234
243
|
# Check for type mismatches
|
|
235
244
|
if field_type not in ["string", "number", "boolean", "object", "array", "date", "null"]:
|
|
236
|
-
issues.append(
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
245
|
+
issues.append(
|
|
246
|
+
{
|
|
247
|
+
"severity": "medium",
|
|
248
|
+
"type": "unknown_type",
|
|
249
|
+
"field": field,
|
|
250
|
+
"message": f"Unknown type '{field_type}' for field '{field}'",
|
|
251
|
+
}
|
|
252
|
+
)
|
|
242
253
|
|
|
243
254
|
# Check for ID fields without proper type
|
|
244
255
|
if "id" in field.lower() and field_type not in ["string", "number"]:
|
|
245
|
-
issues.append(
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
256
|
+
issues.append(
|
|
257
|
+
{
|
|
258
|
+
"severity": "high",
|
|
259
|
+
"type": "invalid_id_type",
|
|
260
|
+
"field": field,
|
|
261
|
+
"message": f"ID field '{field}' should be string or number, not {field_type}",
|
|
262
|
+
}
|
|
263
|
+
)
|
|
251
264
|
|
|
252
265
|
return issues
|
|
253
266
|
|
|
@@ -327,12 +340,7 @@ class NoSQLSchemaValidator:
|
|
|
327
340
|
"timestamp": datetime.now().isoformat(),
|
|
328
341
|
"schema_name": self.schema.get("$id", "unknown"),
|
|
329
342
|
"validations": [],
|
|
330
|
-
"summary": {
|
|
331
|
-
"critical": 0,
|
|
332
|
-
"high": 0,
|
|
333
|
-
"medium": 0,
|
|
334
|
-
"low": 0
|
|
335
|
-
}
|
|
343
|
+
"summary": {"critical": 0, "high": 0, "medium": 0, "low": 0},
|
|
336
344
|
}
|
|
337
345
|
|
|
338
346
|
# Run all validation methods
|
|
@@ -341,7 +349,7 @@ class NoSQLSchemaValidator:
|
|
|
341
349
|
self.validate_indexes,
|
|
342
350
|
self.validate_document_structure,
|
|
343
351
|
self.validate_data_types,
|
|
344
|
-
self.validate_references
|
|
352
|
+
self.validate_references,
|
|
345
353
|
]
|
|
346
354
|
|
|
347
355
|
for method in validation_methods:
|
|
@@ -367,10 +375,10 @@ def format_validation_report(results: Dict[str, Any]) -> str:
|
|
|
367
375
|
Formatted report string
|
|
368
376
|
"""
|
|
369
377
|
report = []
|
|
370
|
-
report.append(f"\n{'='*70}")
|
|
371
|
-
report.append(
|
|
378
|
+
report.append(f"\n{'=' * 70}")
|
|
379
|
+
report.append("NoSQL Schema Validation Report")
|
|
372
380
|
report.append(f"Schema: {results['schema_name']}")
|
|
373
|
-
report.append(f"{'='*70}\n")
|
|
381
|
+
report.append(f"{'=' * 70}\n")
|
|
374
382
|
|
|
375
383
|
summary = results.get("summary", {})
|
|
376
384
|
report.append("Summary:")
|
|
@@ -387,7 +395,9 @@ def format_validation_report(results: Dict[str, Any]) -> str:
|
|
|
387
395
|
else:
|
|
388
396
|
report.append("Issues Found:\n")
|
|
389
397
|
|
|
390
|
-
for issue in sorted(
|
|
398
|
+
for issue in sorted(
|
|
399
|
+
validations, key=lambda x: ["critical", "high", "medium", "low"].index(x.get("severity", "low"))
|
|
400
|
+
):
|
|
391
401
|
severity = issue.get("severity", "low").upper()
|
|
392
402
|
type_name = issue.get("type", "unknown")
|
|
393
403
|
message = issue.get("message", "")
|
|
@@ -403,7 +413,7 @@ def format_validation_report(results: Dict[str, Any]) -> str:
|
|
|
403
413
|
|
|
404
414
|
report.append("")
|
|
405
415
|
|
|
406
|
-
report.append(f"{'='*70}\n")
|
|
416
|
+
report.append(f"{'=' * 70}\n")
|
|
407
417
|
|
|
408
418
|
return "\n".join(report)
|
|
409
419
|
|
|
@@ -418,29 +428,13 @@ Examples:
|
|
|
418
428
|
%(prog)s --schema schema.json
|
|
419
429
|
%(prog)s --schema user-schema.json --output report.json
|
|
420
430
|
%(prog)s --schema product-schema.json --format json
|
|
421
|
-
"""
|
|
431
|
+
""",
|
|
422
432
|
)
|
|
423
433
|
|
|
424
|
-
parser.add_argument(
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
)
|
|
429
|
-
parser.add_argument(
|
|
430
|
-
"--output",
|
|
431
|
-
help="Output file for validation report (JSON)"
|
|
432
|
-
)
|
|
433
|
-
parser.add_argument(
|
|
434
|
-
"--format",
|
|
435
|
-
default="text",
|
|
436
|
-
choices=["text", "json"],
|
|
437
|
-
help="Output format"
|
|
438
|
-
)
|
|
439
|
-
parser.add_argument(
|
|
440
|
-
"--verbose",
|
|
441
|
-
action="store_true",
|
|
442
|
-
help="Print detailed output"
|
|
443
|
-
)
|
|
434
|
+
parser.add_argument("--schema", required=True, help="Path to JSON schema file")
|
|
435
|
+
parser.add_argument("--output", help="Output file for validation report (JSON)")
|
|
436
|
+
parser.add_argument("--format", default="text", choices=["text", "json"], help="Output format")
|
|
437
|
+
parser.add_argument("--verbose", action="store_true", help="Print detailed output")
|
|
444
438
|
|
|
445
439
|
args = parser.parse_args()
|
|
446
440
|
|
|
@@ -468,7 +462,7 @@ Examples:
|
|
|
468
462
|
|
|
469
463
|
# Save to file if requested
|
|
470
464
|
if args.output:
|
|
471
|
-
with open(args.output,
|
|
465
|
+
with open(args.output, "w") as f:
|
|
472
466
|
if args.format == "json":
|
|
473
467
|
json.dump(results, f, indent=2)
|
|
474
468
|
else:
|