json-explorer-mcp 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +68 -5
- package/dist/index.js +10 -3
- package/dist/tools/query.d.ts +44 -3
- package/dist/tools/query.js +247 -9
- package/dist/tools/structure.d.ts +8 -1
- package/dist/tools/structure.js +189 -6
- package/dist/utils/json-parser.js +8 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -8,8 +8,8 @@ An MCP (Model Context Protocol) server for efficiently exploring large JSON file
|
|
|
8
8
|
- **Smart truncation** - Large values are automatically summarized
|
|
9
9
|
- **Caching** - Parsed JSON is cached with file modification checks
|
|
10
10
|
- **Schema inference** - Understand structure without reading all data
|
|
11
|
-
- **Schema validation** - Validate data against JSON Schema
|
|
12
|
-
- **Aggregate statistics** - Get counts, distributions, and numeric stats
|
|
11
|
+
- **Schema validation** - Validate data against JSON Schema with `$ref` resolution
|
|
12
|
+
- **Aggregate statistics** - Get counts, distributions, and numeric stats for arrays
|
|
13
13
|
|
|
14
14
|
## Installation
|
|
15
15
|
|
|
@@ -141,6 +141,12 @@ Returns:
|
|
|
141
141
|
|
|
142
142
|
Validate JSON data against a JSON Schema. Schema can be provided inline or as a path to a schema file.
|
|
143
143
|
|
|
144
|
+
**Features:**
|
|
145
|
+
- Automatic resolution of local file `$ref` references
|
|
146
|
+
- Optional network `$ref` resolution (disabled by default)
|
|
147
|
+
- Validates that referenced files are actual JSON Schemas
|
|
148
|
+
- Error limiting (default 10) to avoid huge error lists
|
|
149
|
+
|
|
144
150
|
```typescript
|
|
145
151
|
json_validate(
|
|
146
152
|
file: "/path/to/data.json",
|
|
@@ -149,7 +155,7 @@ json_validate(
|
|
|
149
155
|
)
|
|
150
156
|
```
|
|
151
157
|
|
|
152
|
-
Or with a schema file:
|
|
158
|
+
Or with a schema file (local `$ref`s are automatically resolved):
|
|
153
159
|
|
|
154
160
|
```typescript
|
|
155
161
|
json_validate(
|
|
@@ -158,16 +164,31 @@ json_validate(
|
|
|
158
164
|
)
|
|
159
165
|
```
|
|
160
166
|
|
|
167
|
+
With options:
|
|
168
|
+
|
|
169
|
+
```typescript
|
|
170
|
+
json_validate(
|
|
171
|
+
file: "/path/to/data.json",
|
|
172
|
+
schema: "/path/to/schema.json",
|
|
173
|
+
path: "$.users",
|
|
174
|
+
errorLimit: 5, // Max errors to return (default: 10)
|
|
175
|
+
resolveLocalRefs: true, // Resolve local file $refs (default: true)
|
|
176
|
+
resolveNetworkRefs: false // Resolve HTTP $refs (default: false)
|
|
177
|
+
)
|
|
178
|
+
```
|
|
179
|
+
|
|
161
180
|
Returns:
|
|
162
181
|
|
|
163
182
|
```json
|
|
164
183
|
{
|
|
165
184
|
"valid": false,
|
|
166
|
-
"errorCount":
|
|
185
|
+
"errorCount": 15,
|
|
186
|
+
"truncatedErrorCount": 5,
|
|
167
187
|
"errors": [
|
|
168
188
|
{ "path": "/id", "message": "must be integer", "keyword": "type", "params": {} },
|
|
169
189
|
{ "path": "", "message": "must have required property 'name'", "keyword": "required", "params": {} }
|
|
170
|
-
]
|
|
190
|
+
],
|
|
191
|
+
"resolvedRefs": ["/path/to/definitions.json"]
|
|
171
192
|
}
|
|
172
193
|
```
|
|
173
194
|
|
|
@@ -230,6 +251,48 @@ All path parameters support JSONPath syntax:
|
|
|
230
251
|
- `$.users[*]` - All array items (in search results)
|
|
231
252
|
- `$["special-key"]` - Bracket notation for special characters
|
|
232
253
|
|
|
254
|
+
## Security
|
|
255
|
+
|
|
256
|
+
### Schema Validation
|
|
257
|
+
|
|
258
|
+
All referenced schemas (both local files and network URLs) are validated before use. The tool checks that fetched content is a valid JSON Schema object containing appropriate keywords (`type`, `properties`, `$ref`, etc.), rejecting arrays, primitives, or unrelated JSON files.
|
|
259
|
+
|
|
260
|
+
### Network Schema Resolution
|
|
261
|
+
|
|
262
|
+
By default, `json_validate` only resolves local file `$ref` references. Network (HTTP/HTTPS) resolution is disabled by default for security.
|
|
263
|
+
|
|
264
|
+
To enable network refs for a specific validation:
|
|
265
|
+
|
|
266
|
+
```typescript
|
|
267
|
+
json_validate(file: "data.json", schema: "schema.json", resolveNetworkRefs: true)
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### Disabling Network Resolution Completely
|
|
271
|
+
|
|
272
|
+
For security-conscious environments, you can completely disable network schema resolution using an environment variable:
|
|
273
|
+
|
|
274
|
+
```bash
|
|
275
|
+
JSON_EXPLORER_NO_NETWORK=1
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
When set to `1`, this overrides any `resolveNetworkRefs: true` option, ensuring schemas are never fetched from the network.
|
|
279
|
+
|
|
280
|
+
**Claude Desktop config with network disabled:**
|
|
281
|
+
|
|
282
|
+
```json
|
|
283
|
+
{
|
|
284
|
+
"mcpServers": {
|
|
285
|
+
"json-explorer": {
|
|
286
|
+
"command": "npx",
|
|
287
|
+
"args": ["-y", "json-explorer-mcp"],
|
|
288
|
+
"env": {
|
|
289
|
+
"JSON_EXPLORER_NO_NETWORK": "1"
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
```
|
|
295
|
+
|
|
233
296
|
## Development
|
|
234
297
|
|
|
235
298
|
```bash
|
package/dist/index.js
CHANGED
|
@@ -123,13 +123,20 @@ server.tool("json_schema", "Infer the JSON schema/structure at a path. For array
|
|
|
123
123
|
}
|
|
124
124
|
});
|
|
125
125
|
// Tool: json_validate - Validate against JSON Schema
|
|
126
|
-
server.tool("json_validate", "Validate JSON data against a JSON Schema. Schema can be provided inline or as a path to a schema file.", {
|
|
126
|
+
server.tool("json_validate", "Validate JSON data against a JSON Schema. Schema can be provided inline or as a path to a schema file. Automatically resolves local file $refs. Use resolveNetworkRefs to also fetch remote schemas.", {
|
|
127
127
|
file: z.string().describe("Absolute path to the JSON file to validate"),
|
|
128
128
|
schema: z.union([z.string(), z.object({}).passthrough()]).describe("JSON Schema object or path to schema file"),
|
|
129
129
|
path: z.string().optional().describe("JSONPath to validate. Defaults to root."),
|
|
130
|
-
|
|
130
|
+
errorLimit: z.number().optional().describe("Maximum number of errors to return. Defaults to 10."),
|
|
131
|
+
resolveLocalRefs: z.boolean().optional().describe("Resolve $ref to local files. Defaults to true."),
|
|
132
|
+
resolveNetworkRefs: z.boolean().optional().describe("Resolve $ref to HTTP URLs. Defaults to false for security."),
|
|
133
|
+
}, { readOnlyHint: true }, async ({ file, schema, path, errorLimit, resolveLocalRefs, resolveNetworkRefs }) => {
|
|
131
134
|
try {
|
|
132
|
-
const result = await jsonValidate(file, schema, path
|
|
135
|
+
const result = await jsonValidate(file, schema, path, {
|
|
136
|
+
errorLimit,
|
|
137
|
+
resolveLocalRefs,
|
|
138
|
+
resolveNetworkRefs,
|
|
139
|
+
});
|
|
133
140
|
return {
|
|
134
141
|
content: [
|
|
135
142
|
{
|
package/dist/tools/query.d.ts
CHANGED
|
@@ -39,16 +39,57 @@ export interface FieldStats {
|
|
|
39
39
|
export interface ArrayInfo {
|
|
40
40
|
path: string;
|
|
41
41
|
length: number;
|
|
42
|
-
itemType: string;
|
|
42
|
+
itemType: "object" | "string" | "number" | "mixed";
|
|
43
43
|
fields?: string[];
|
|
44
44
|
fieldCount?: number;
|
|
45
45
|
}
|
|
46
|
+
export interface PrimitiveArrayStats {
|
|
47
|
+
path: string;
|
|
48
|
+
arrayLength: number;
|
|
49
|
+
itemType: "string" | "number" | "mixed";
|
|
50
|
+
typeBreakdown?: Record<string, number>;
|
|
51
|
+
nullCount: number;
|
|
52
|
+
uniqueCount?: number;
|
|
53
|
+
stringStats?: {
|
|
54
|
+
count: number;
|
|
55
|
+
uniqueCount: number;
|
|
56
|
+
lengthStats: {
|
|
57
|
+
min: number;
|
|
58
|
+
max: number;
|
|
59
|
+
avg: number;
|
|
60
|
+
median: number;
|
|
61
|
+
};
|
|
62
|
+
distribution?: Record<string, number>;
|
|
63
|
+
};
|
|
64
|
+
lengthStats?: {
|
|
65
|
+
min: number;
|
|
66
|
+
max: number;
|
|
67
|
+
avg: number;
|
|
68
|
+
median: number;
|
|
69
|
+
};
|
|
70
|
+
distribution?: Record<string, number>;
|
|
71
|
+
numericStats?: {
|
|
72
|
+
count?: number;
|
|
73
|
+
min: number;
|
|
74
|
+
max: number;
|
|
75
|
+
avg: number;
|
|
76
|
+
median: number;
|
|
77
|
+
stdDev: number;
|
|
78
|
+
percentiles: {
|
|
79
|
+
p25: number;
|
|
80
|
+
p50: number;
|
|
81
|
+
p75: number;
|
|
82
|
+
p90: number;
|
|
83
|
+
p99: number;
|
|
84
|
+
};
|
|
85
|
+
};
|
|
86
|
+
}
|
|
46
87
|
export interface StatsResult {
|
|
47
88
|
path: string;
|
|
48
89
|
arrayLength: number;
|
|
49
90
|
fields: FieldStats[];
|
|
50
91
|
}
|
|
51
92
|
export interface MultiStatsResult {
|
|
52
|
-
stats: StatsResult[];
|
|
93
|
+
stats: (StatsResult | PrimitiveArrayStats)[];
|
|
53
94
|
}
|
|
54
|
-
export declare function jsonStats(filePath: string, path?: string, fields?: string[]): Promise<StatsResult | MultiStatsResult>;
|
|
95
|
+
export declare function jsonStats(filePath: string, path?: string, fields?: string[]): Promise<StatsResult | PrimitiveArrayStats | MultiStatsResult>;
|
package/dist/tools/query.js
CHANGED
|
@@ -118,13 +118,13 @@ export async function jsonSample(filePath, path, count = 5, mode = "first", rang
|
|
|
118
118
|
hasMore: arrayLength > count,
|
|
119
119
|
};
|
|
120
120
|
}
|
|
121
|
-
function
|
|
121
|
+
function findArrays(data, currentPath = "$", maxDepth = 5, depth = 0) {
|
|
122
122
|
const results = [];
|
|
123
123
|
if (depth > maxDepth)
|
|
124
124
|
return results;
|
|
125
125
|
if (Array.isArray(data) && data.length > 0) {
|
|
126
126
|
const firstItem = data[0];
|
|
127
|
-
//
|
|
127
|
+
// Arrays of objects
|
|
128
128
|
if (typeof firstItem === "object" && firstItem !== null && !Array.isArray(firstItem)) {
|
|
129
129
|
const fields = Object.keys(firstItem);
|
|
130
130
|
results.push({
|
|
@@ -152,29 +152,69 @@ function findArraysOfObjects(data, currentPath = "$", maxDepth = 5, depth = 0) {
|
|
|
152
152
|
}
|
|
153
153
|
}
|
|
154
154
|
}
|
|
155
|
+
// Arrays of primitives (strings, numbers, or mixed)
|
|
156
|
+
else if (typeof firstItem === "string" || typeof firstItem === "number") {
|
|
157
|
+
// Check if array has mixed types
|
|
158
|
+
const types = new Set(data.map((item) => typeof item).filter((t) => t === "string" || t === "number"));
|
|
159
|
+
if (types.size > 1) {
|
|
160
|
+
results.push({
|
|
161
|
+
path: currentPath,
|
|
162
|
+
length: data.length,
|
|
163
|
+
itemType: "mixed",
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
else if (typeof firstItem === "string") {
|
|
167
|
+
results.push({
|
|
168
|
+
path: currentPath,
|
|
169
|
+
length: data.length,
|
|
170
|
+
itemType: "string",
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
else {
|
|
174
|
+
results.push({
|
|
175
|
+
path: currentPath,
|
|
176
|
+
length: data.length,
|
|
177
|
+
itemType: "number",
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
}
|
|
155
181
|
}
|
|
156
182
|
else if (typeof data === "object" && data !== null) {
|
|
157
183
|
for (const key of Object.keys(data)) {
|
|
158
184
|
const childPath = /^[a-zA-Z_][a-zA-Z0-9_]*$/.test(key)
|
|
159
185
|
? `${currentPath}.${key}`
|
|
160
186
|
: `${currentPath}["${key}"]`;
|
|
161
|
-
results.push(...
|
|
187
|
+
results.push(...findArrays(data[key], childPath, maxDepth, depth + 1));
|
|
162
188
|
}
|
|
163
189
|
}
|
|
164
190
|
return results;
|
|
165
191
|
}
|
|
166
192
|
export async function jsonStats(filePath, path, fields) {
|
|
167
193
|
const data = await loadJson(filePath);
|
|
168
|
-
// If no path provided, compute stats for all arrays
|
|
194
|
+
// If no path provided, compute stats for all arrays
|
|
169
195
|
if (!path) {
|
|
170
|
-
const arrays =
|
|
196
|
+
const arrays = findArrays(data);
|
|
171
197
|
const allStats = [];
|
|
172
198
|
for (const arr of arrays) {
|
|
173
199
|
try {
|
|
174
200
|
const value = getValueAtPath(data, arr.path);
|
|
175
201
|
if (Array.isArray(value) && value.length > 0) {
|
|
176
|
-
|
|
177
|
-
|
|
202
|
+
if (arr.itemType === "object") {
|
|
203
|
+
const stats = computeObjectArrayStats(value, arr.path, fields);
|
|
204
|
+
allStats.push(stats);
|
|
205
|
+
}
|
|
206
|
+
else if (arr.itemType === "string") {
|
|
207
|
+
const stats = computeStringArrayStats(value, arr.path);
|
|
208
|
+
allStats.push(stats);
|
|
209
|
+
}
|
|
210
|
+
else if (arr.itemType === "number") {
|
|
211
|
+
const stats = computeNumberArrayStats(value, arr.path);
|
|
212
|
+
allStats.push(stats);
|
|
213
|
+
}
|
|
214
|
+
else if (arr.itemType === "mixed") {
|
|
215
|
+
const stats = computeMixedArrayStats(value, arr.path);
|
|
216
|
+
allStats.push(stats);
|
|
217
|
+
}
|
|
178
218
|
}
|
|
179
219
|
}
|
|
180
220
|
catch {
|
|
@@ -187,9 +227,27 @@ export async function jsonStats(filePath, path, fields) {
|
|
|
187
227
|
if (!Array.isArray(value)) {
|
|
188
228
|
throw new Error(`Path "${path}" is not an array. Got: ${getValueType(value)}`);
|
|
189
229
|
}
|
|
190
|
-
|
|
230
|
+
// Determine array type
|
|
231
|
+
if (value.length === 0) {
|
|
232
|
+
return { path, arrayLength: 0, fields: [] };
|
|
233
|
+
}
|
|
234
|
+
const firstItem = value[0];
|
|
235
|
+
// Check for mixed primitive types
|
|
236
|
+
if (typeof firstItem === "string" || typeof firstItem === "number") {
|
|
237
|
+
const types = new Set(value.map((item) => typeof item).filter((t) => t === "string" || t === "number"));
|
|
238
|
+
if (types.size > 1) {
|
|
239
|
+
return computeMixedArrayStats(value, path);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
if (typeof firstItem === "string") {
|
|
243
|
+
return computeStringArrayStats(value, path);
|
|
244
|
+
}
|
|
245
|
+
else if (typeof firstItem === "number") {
|
|
246
|
+
return computeNumberArrayStats(value, path);
|
|
247
|
+
}
|
|
248
|
+
return computeObjectArrayStats(value, path, fields);
|
|
191
249
|
}
|
|
192
|
-
function
|
|
250
|
+
function computeObjectArrayStats(value, path, fields) {
|
|
193
251
|
if (value.length === 0) {
|
|
194
252
|
return { path, arrayLength: 0, fields: [] };
|
|
195
253
|
}
|
|
@@ -247,3 +305,183 @@ function computeArrayStats(value, path, fields) {
|
|
|
247
305
|
fields: fieldStats,
|
|
248
306
|
};
|
|
249
307
|
}
|
|
308
|
+
function computeStringArrayStats(value, path) {
|
|
309
|
+
const strings = value.filter((v) => typeof v === "string");
|
|
310
|
+
const nullCount = value.length - strings.length;
|
|
311
|
+
// Compute length stats
|
|
312
|
+
const lengths = strings.map((s) => s.length);
|
|
313
|
+
const sortedLengths = [...lengths].sort((a, b) => a - b);
|
|
314
|
+
const lengthStats = lengths.length > 0 ? {
|
|
315
|
+
min: Math.min(...lengths),
|
|
316
|
+
max: Math.max(...lengths),
|
|
317
|
+
avg: Math.round(lengths.reduce((a, b) => a + b, 0) / lengths.length * 100) / 100,
|
|
318
|
+
median: sortedLengths[Math.floor(sortedLengths.length / 2)],
|
|
319
|
+
} : undefined;
|
|
320
|
+
// Compute value distribution (if reasonable number of unique values)
|
|
321
|
+
const distribution = {};
|
|
322
|
+
for (const s of strings) {
|
|
323
|
+
distribution[s] = (distribution[s] || 0) + 1;
|
|
324
|
+
}
|
|
325
|
+
const uniqueCount = Object.keys(distribution).length;
|
|
326
|
+
// Only include distribution if <= 50 unique values
|
|
327
|
+
const result = {
|
|
328
|
+
path,
|
|
329
|
+
arrayLength: value.length,
|
|
330
|
+
itemType: "string",
|
|
331
|
+
nullCount,
|
|
332
|
+
uniqueCount,
|
|
333
|
+
lengthStats,
|
|
334
|
+
};
|
|
335
|
+
if (uniqueCount <= 50) {
|
|
336
|
+
// Sort by count descending
|
|
337
|
+
const sortedDistribution = {};
|
|
338
|
+
Object.entries(distribution)
|
|
339
|
+
.sort(([, a], [, b]) => b - a)
|
|
340
|
+
.slice(0, 20) // Top 20 values
|
|
341
|
+
.forEach(([k, v]) => { sortedDistribution[k] = v; });
|
|
342
|
+
result.distribution = sortedDistribution;
|
|
343
|
+
}
|
|
344
|
+
return result;
|
|
345
|
+
}
|
|
346
|
+
function computeNumberArrayStats(value, path) {
|
|
347
|
+
const numbers = value.filter((v) => typeof v === "number" && !isNaN(v));
|
|
348
|
+
const nullCount = value.length - numbers.length;
|
|
349
|
+
if (numbers.length === 0) {
|
|
350
|
+
return {
|
|
351
|
+
path,
|
|
352
|
+
arrayLength: value.length,
|
|
353
|
+
itemType: "number",
|
|
354
|
+
nullCount,
|
|
355
|
+
uniqueCount: 0,
|
|
356
|
+
};
|
|
357
|
+
}
|
|
358
|
+
const sorted = [...numbers].sort((a, b) => a - b);
|
|
359
|
+
const sum = numbers.reduce((a, b) => a + b, 0);
|
|
360
|
+
const avg = sum / numbers.length;
|
|
361
|
+
// Standard deviation
|
|
362
|
+
const squaredDiffs = numbers.map((n) => Math.pow(n - avg, 2));
|
|
363
|
+
const avgSquaredDiff = squaredDiffs.reduce((a, b) => a + b, 0) / numbers.length;
|
|
364
|
+
const stdDev = Math.sqrt(avgSquaredDiff);
|
|
365
|
+
// Percentile helper
|
|
366
|
+
const percentile = (p) => {
|
|
367
|
+
const index = (p / 100) * (sorted.length - 1);
|
|
368
|
+
const lower = Math.floor(index);
|
|
369
|
+
const upper = Math.ceil(index);
|
|
370
|
+
if (lower === upper)
|
|
371
|
+
return sorted[lower];
|
|
372
|
+
return sorted[lower] + (index - lower) * (sorted[upper] - sorted[lower]);
|
|
373
|
+
};
|
|
374
|
+
// Unique values
|
|
375
|
+
const uniqueCount = new Set(numbers).size;
|
|
376
|
+
const result = {
|
|
377
|
+
path,
|
|
378
|
+
arrayLength: value.length,
|
|
379
|
+
itemType: "number",
|
|
380
|
+
nullCount,
|
|
381
|
+
uniqueCount,
|
|
382
|
+
numericStats: {
|
|
383
|
+
min: sorted[0],
|
|
384
|
+
max: sorted[sorted.length - 1],
|
|
385
|
+
avg: Math.round(avg * 1000) / 1000,
|
|
386
|
+
median: percentile(50),
|
|
387
|
+
stdDev: Math.round(stdDev * 1000) / 1000,
|
|
388
|
+
percentiles: {
|
|
389
|
+
p25: Math.round(percentile(25) * 1000) / 1000,
|
|
390
|
+
p50: Math.round(percentile(50) * 1000) / 1000,
|
|
391
|
+
p75: Math.round(percentile(75) * 1000) / 1000,
|
|
392
|
+
p90: Math.round(percentile(90) * 1000) / 1000,
|
|
393
|
+
p99: Math.round(percentile(99) * 1000) / 1000,
|
|
394
|
+
},
|
|
395
|
+
},
|
|
396
|
+
};
|
|
397
|
+
// If there are few unique values, include distribution
|
|
398
|
+
if (uniqueCount <= 20) {
|
|
399
|
+
const distribution = {};
|
|
400
|
+
for (const n of numbers) {
|
|
401
|
+
const key = String(n);
|
|
402
|
+
distribution[key] = (distribution[key] || 0) + 1;
|
|
403
|
+
}
|
|
404
|
+
result.distribution = distribution;
|
|
405
|
+
}
|
|
406
|
+
return result;
|
|
407
|
+
}
|
|
408
|
+
function computeMixedArrayStats(value, path) {
|
|
409
|
+
// Count types
|
|
410
|
+
const typeBreakdown = {};
|
|
411
|
+
for (const item of value) {
|
|
412
|
+
const t = item === null ? "null" : typeof item;
|
|
413
|
+
typeBreakdown[t] = (typeBreakdown[t] || 0) + 1;
|
|
414
|
+
}
|
|
415
|
+
const nullCount = typeBreakdown["null"] || 0 + (typeBreakdown["undefined"] || 0);
|
|
416
|
+
const result = {
|
|
417
|
+
path,
|
|
418
|
+
arrayLength: value.length,
|
|
419
|
+
itemType: "mixed",
|
|
420
|
+
typeBreakdown,
|
|
421
|
+
nullCount,
|
|
422
|
+
};
|
|
423
|
+
// Compute string stats if there are strings
|
|
424
|
+
const strings = value.filter((v) => typeof v === "string");
|
|
425
|
+
if (strings.length > 0) {
|
|
426
|
+
const lengths = strings.map((s) => s.length);
|
|
427
|
+
const sortedLengths = [...lengths].sort((a, b) => a - b);
|
|
428
|
+
const stringDistribution = {};
|
|
429
|
+
for (const s of strings) {
|
|
430
|
+
stringDistribution[s] = (stringDistribution[s] || 0) + 1;
|
|
431
|
+
}
|
|
432
|
+
const stringUniqueCount = Object.keys(stringDistribution).length;
|
|
433
|
+
result.stringStats = {
|
|
434
|
+
count: strings.length,
|
|
435
|
+
uniqueCount: stringUniqueCount,
|
|
436
|
+
lengthStats: {
|
|
437
|
+
min: Math.min(...lengths),
|
|
438
|
+
max: Math.max(...lengths),
|
|
439
|
+
avg: Math.round(lengths.reduce((a, b) => a + b, 0) / lengths.length * 100) / 100,
|
|
440
|
+
median: sortedLengths[Math.floor(sortedLengths.length / 2)],
|
|
441
|
+
},
|
|
442
|
+
};
|
|
443
|
+
// Include distribution if reasonable number of unique values
|
|
444
|
+
if (stringUniqueCount <= 30) {
|
|
445
|
+
const sortedDistribution = {};
|
|
446
|
+
Object.entries(stringDistribution)
|
|
447
|
+
.sort(([, a], [, b]) => b - a)
|
|
448
|
+
.slice(0, 15)
|
|
449
|
+
.forEach(([k, v]) => { sortedDistribution[k] = v; });
|
|
450
|
+
result.stringStats.distribution = sortedDistribution;
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
// Compute number stats if there are numbers
|
|
454
|
+
const numbers = value.filter((v) => typeof v === "number" && !isNaN(v));
|
|
455
|
+
if (numbers.length > 0) {
|
|
456
|
+
const sorted = [...numbers].sort((a, b) => a - b);
|
|
457
|
+
const sum = numbers.reduce((a, b) => a + b, 0);
|
|
458
|
+
const avg = sum / numbers.length;
|
|
459
|
+
const squaredDiffs = numbers.map((n) => Math.pow(n - avg, 2));
|
|
460
|
+
const avgSquaredDiff = squaredDiffs.reduce((a, b) => a + b, 0) / numbers.length;
|
|
461
|
+
const stdDev = Math.sqrt(avgSquaredDiff);
|
|
462
|
+
const percentile = (p) => {
|
|
463
|
+
const index = (p / 100) * (sorted.length - 1);
|
|
464
|
+
const lower = Math.floor(index);
|
|
465
|
+
const upper = Math.ceil(index);
|
|
466
|
+
if (lower === upper)
|
|
467
|
+
return sorted[lower];
|
|
468
|
+
return sorted[lower] + (index - lower) * (sorted[upper] - sorted[lower]);
|
|
469
|
+
};
|
|
470
|
+
result.numericStats = {
|
|
471
|
+
count: numbers.length,
|
|
472
|
+
min: sorted[0],
|
|
473
|
+
max: sorted[sorted.length - 1],
|
|
474
|
+
avg: Math.round(avg * 1000) / 1000,
|
|
475
|
+
median: percentile(50),
|
|
476
|
+
stdDev: Math.round(stdDev * 1000) / 1000,
|
|
477
|
+
percentiles: {
|
|
478
|
+
p25: Math.round(percentile(25) * 1000) / 1000,
|
|
479
|
+
p50: Math.round(percentile(50) * 1000) / 1000,
|
|
480
|
+
p75: Math.round(percentile(75) * 1000) / 1000,
|
|
481
|
+
p90: Math.round(percentile(90) * 1000) / 1000,
|
|
482
|
+
p99: Math.round(percentile(99) * 1000) / 1000,
|
|
483
|
+
},
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
return result;
|
|
487
|
+
}
|
|
@@ -28,6 +28,13 @@ export interface ValidateResult {
|
|
|
28
28
|
valid: boolean;
|
|
29
29
|
errors: ValidationError[];
|
|
30
30
|
errorCount: number;
|
|
31
|
+
truncatedErrorCount?: number;
|
|
32
|
+
resolvedRefs?: string[];
|
|
31
33
|
}
|
|
32
|
-
export
|
|
34
|
+
export interface ValidateOptions {
|
|
35
|
+
errorLimit?: number;
|
|
36
|
+
resolveLocalRefs?: boolean;
|
|
37
|
+
resolveNetworkRefs?: boolean;
|
|
38
|
+
}
|
|
39
|
+
export declare function jsonValidate(filePath: string, schema: object | string, path?: string, options?: ValidateOptions | number): Promise<ValidateResult>;
|
|
33
40
|
export {};
|
package/dist/tools/structure.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { loadJson, getFileInfo, formatBytes, getValueType } from "../utils/json-parser.js";
|
|
2
2
|
import { getDepthPreview, getValueAtPath } from "../utils/path-helpers.js";
|
|
3
|
+
import { dirname, resolve, isAbsolute } from "path";
|
|
3
4
|
// Dynamic import for ajv (ESM/CJS compat)
|
|
4
5
|
async function getAjv() {
|
|
5
6
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
@@ -115,7 +116,130 @@ export async function jsonSchema(filePath, path) {
|
|
|
115
116
|
schema: inferSchema(targetData),
|
|
116
117
|
};
|
|
117
118
|
}
|
|
118
|
-
|
|
119
|
+
// Find all $ref values in a schema object
|
|
120
|
+
function findRefs(obj, refs = new Set()) {
|
|
121
|
+
if (obj === null || typeof obj !== "object") {
|
|
122
|
+
return refs;
|
|
123
|
+
}
|
|
124
|
+
if (Array.isArray(obj)) {
|
|
125
|
+
for (const item of obj) {
|
|
126
|
+
findRefs(item, refs);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
else {
|
|
130
|
+
const record = obj;
|
|
131
|
+
if (typeof record.$ref === "string") {
|
|
132
|
+
refs.add(record.$ref);
|
|
133
|
+
}
|
|
134
|
+
for (const value of Object.values(record)) {
|
|
135
|
+
findRefs(value, refs);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return refs;
|
|
139
|
+
}
|
|
140
|
+
// Check if a ref is a local file path (not a URL or JSON pointer)
|
|
141
|
+
function isLocalFileRef(ref) {
|
|
142
|
+
// Skip JSON pointers (start with #)
|
|
143
|
+
if (ref.startsWith("#"))
|
|
144
|
+
return false;
|
|
145
|
+
// Skip URLs
|
|
146
|
+
if (ref.startsWith("http://") || ref.startsWith("https://"))
|
|
147
|
+
return false;
|
|
148
|
+
// It's a local file ref
|
|
149
|
+
return true;
|
|
150
|
+
}
|
|
151
|
+
// Check if a ref is an HTTP URL
|
|
152
|
+
function isNetworkRef(ref) {
|
|
153
|
+
return ref.startsWith("http://") || ref.startsWith("https://");
|
|
154
|
+
}
|
|
155
|
+
// Resolve a local file ref relative to the schema's directory
|
|
156
|
+
function resolveLocalRef(ref, schemaDir) {
|
|
157
|
+
// Remove any JSON pointer fragment
|
|
158
|
+
const [filePath] = ref.split("#");
|
|
159
|
+
if (!filePath)
|
|
160
|
+
return ref;
|
|
161
|
+
if (isAbsolute(filePath)) {
|
|
162
|
+
return filePath;
|
|
163
|
+
}
|
|
164
|
+
return resolve(schemaDir, filePath);
|
|
165
|
+
}
|
|
166
|
+
// Recursively load all local schema refs
|
|
167
|
+
async function loadLocalRefs(schemaObj, schemaDir, loaded = new Map()) {
|
|
168
|
+
const refs = findRefs(schemaObj);
|
|
169
|
+
for (const ref of refs) {
|
|
170
|
+
if (!isLocalFileRef(ref))
|
|
171
|
+
continue;
|
|
172
|
+
const filePath = resolveLocalRef(ref, schemaDir);
|
|
173
|
+
if (loaded.has(filePath))
|
|
174
|
+
continue;
|
|
175
|
+
try {
|
|
176
|
+
const refSchema = await loadJson(filePath);
|
|
177
|
+
// Validate that it's actually a JSON Schema
|
|
178
|
+
if (!isValidJsonSchema(refSchema)) {
|
|
179
|
+
throw new Error(`File ${filePath} is not a valid JSON Schema`);
|
|
180
|
+
}
|
|
181
|
+
loaded.set(filePath, refSchema);
|
|
182
|
+
// Recursively load refs from this schema
|
|
183
|
+
const refDir = dirname(filePath);
|
|
184
|
+
await loadLocalRefs(refSchema, refDir, loaded);
|
|
185
|
+
}
|
|
186
|
+
catch (err) {
|
|
187
|
+
// Re-throw validation errors, skip other errors (file not found, etc.)
|
|
188
|
+
if (err instanceof Error && err.message.includes("not a valid JSON Schema")) {
|
|
189
|
+
throw err;
|
|
190
|
+
}
|
|
191
|
+
// Skip refs we can't load - ajv will report the error
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
return loaded;
|
|
195
|
+
}
|
|
196
|
+
// Validate that an object looks like a JSON Schema
|
|
197
|
+
function isValidJsonSchema(obj) {
|
|
198
|
+
if (obj === null || typeof obj !== "object" || Array.isArray(obj)) {
|
|
199
|
+
return false;
|
|
200
|
+
}
|
|
201
|
+
const schema = obj;
|
|
202
|
+
// A valid JSON Schema should be an object with schema-like properties
|
|
203
|
+
// Check for common JSON Schema keywords
|
|
204
|
+
const schemaKeywords = [
|
|
205
|
+
"type", "properties", "items", "required", "enum", "const",
|
|
206
|
+
"allOf", "anyOf", "oneOf", "not", "$ref", "$id", "$schema",
|
|
207
|
+
"definitions", "$defs", "additionalProperties", "patternProperties",
|
|
208
|
+
"minimum", "maximum", "minLength", "maxLength", "pattern", "format"
|
|
209
|
+
];
|
|
210
|
+
// Must have at least one schema keyword, OR be a boolean schema (but we already checked it's an object)
|
|
211
|
+
// Empty objects {} are valid schemas (match anything), so we allow those too
|
|
212
|
+
const hasSchemaKeyword = schemaKeywords.some(keyword => keyword in schema);
|
|
213
|
+
const isEmpty = Object.keys(schema).length === 0;
|
|
214
|
+
return hasSchemaKeyword || isEmpty;
|
|
215
|
+
}
|
|
216
|
+
// Fetch a schema from a URL
|
|
217
|
+
async function fetchNetworkSchema(url) {
|
|
218
|
+
const response = await fetch(url);
|
|
219
|
+
if (!response.ok) {
|
|
220
|
+
throw new Error(`Failed to fetch schema from ${url}: ${response.status} ${response.statusText}`);
|
|
221
|
+
}
|
|
222
|
+
let data;
|
|
223
|
+
try {
|
|
224
|
+
data = await response.json();
|
|
225
|
+
}
|
|
226
|
+
catch {
|
|
227
|
+
throw new Error(`Invalid JSON received from ${url}`);
|
|
228
|
+
}
|
|
229
|
+
if (!isValidJsonSchema(data)) {
|
|
230
|
+
throw new Error(`URL ${url} did not return a valid JSON Schema`);
|
|
231
|
+
}
|
|
232
|
+
return data;
|
|
233
|
+
}
|
|
234
|
+
export async function jsonValidate(filePath, schema, path, options = {}) {
|
|
235
|
+
// Handle legacy signature where 4th param was errorLimit number
|
|
236
|
+
const opts = typeof options === "number"
|
|
237
|
+
? { errorLimit: options }
|
|
238
|
+
: options;
|
|
239
|
+
// Environment variable can completely disable network refs for security
|
|
240
|
+
const networkRefsDisabled = process.env.JSON_EXPLORER_NO_NETWORK === "1";
|
|
241
|
+
const { errorLimit = 10, resolveLocalRefs = true, resolveNetworkRefs = false, } = opts;
|
|
242
|
+
const effectiveResolveNetworkRefs = resolveNetworkRefs && !networkRefsDisabled;
|
|
119
243
|
const data = await loadJson(filePath);
|
|
120
244
|
const targetData = path ? getValueAtPath(data, path) : data;
|
|
121
245
|
if (targetData === undefined) {
|
|
@@ -123,26 +247,85 @@ export async function jsonValidate(filePath, schema, path) {
|
|
|
123
247
|
}
|
|
124
248
|
// Load schema from file if it's a string path
|
|
125
249
|
let schemaObj;
|
|
250
|
+
let schemaDir;
|
|
126
251
|
if (typeof schema === "string") {
|
|
127
252
|
schemaObj = (await loadJson(schema));
|
|
253
|
+
schemaDir = dirname(resolve(schema));
|
|
128
254
|
}
|
|
129
255
|
else {
|
|
130
256
|
schemaObj = schema;
|
|
257
|
+
// For inline schemas, use current working directory
|
|
258
|
+
schemaDir = process.cwd();
|
|
131
259
|
}
|
|
132
260
|
const { Ajv, addFormats } = await getAjv();
|
|
133
|
-
|
|
261
|
+
// Configure ajv options
|
|
262
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
263
|
+
const ajvOptions = { allErrors: true };
|
|
264
|
+
// Set up network schema loading if enabled
|
|
265
|
+
if (effectiveResolveNetworkRefs) {
|
|
266
|
+
ajvOptions.loadSchema = async (uri) => {
|
|
267
|
+
if (isNetworkRef(uri)) {
|
|
268
|
+
return fetchNetworkSchema(uri);
|
|
269
|
+
}
|
|
270
|
+
throw new Error(`Cannot load non-network ref: ${uri}`);
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
const ajv = new Ajv(ajvOptions);
|
|
134
274
|
addFormats(ajv);
|
|
135
|
-
|
|
275
|
+
// Pre-load local refs if enabled
|
|
276
|
+
const resolvedRefs = [];
|
|
277
|
+
if (resolveLocalRefs) {
|
|
278
|
+
const localRefs = await loadLocalRefs(schemaObj, schemaDir);
|
|
279
|
+
for (const [refPath, refSchema] of localRefs) {
|
|
280
|
+
// If schema has $id, ajv will use that for resolution
|
|
281
|
+
// Otherwise we add it by its relative filename
|
|
282
|
+
const refRecord = refSchema;
|
|
283
|
+
if (refRecord.$id) {
|
|
284
|
+
// Schema has its own $id - ajv will register it by that
|
|
285
|
+
ajv.addSchema(refSchema);
|
|
286
|
+
}
|
|
287
|
+
else {
|
|
288
|
+
// No $id - register by filename (last component of path)
|
|
289
|
+
const filename = refPath.split("/").pop() || refPath;
|
|
290
|
+
ajv.addSchema(refSchema, filename);
|
|
291
|
+
}
|
|
292
|
+
resolvedRefs.push(refPath);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
// Compile and validate
|
|
296
|
+
let validate;
|
|
297
|
+
try {
|
|
298
|
+
if (effectiveResolveNetworkRefs) {
|
|
299
|
+
// Use compileAsync for network refs
|
|
300
|
+
validate = await ajv.compileAsync(schemaObj);
|
|
301
|
+
}
|
|
302
|
+
else {
|
|
303
|
+
validate = ajv.compile(schemaObj);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
catch (err) {
|
|
307
|
+
throw new Error(`Schema compilation failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
308
|
+
}
|
|
136
309
|
const valid = validate(targetData);
|
|
137
|
-
const
|
|
310
|
+
const allErrors = (validate.errors || []).map((err) => ({
|
|
138
311
|
path: err.instancePath || "$",
|
|
139
312
|
message: err.message || "Unknown error",
|
|
140
313
|
keyword: err.keyword,
|
|
141
314
|
params: err.params,
|
|
142
315
|
}));
|
|
143
|
-
|
|
316
|
+
const totalErrorCount = allErrors.length;
|
|
317
|
+
const truncated = totalErrorCount > errorLimit;
|
|
318
|
+
const errors = truncated ? allErrors.slice(0, errorLimit) : allErrors;
|
|
319
|
+
const result = {
|
|
144
320
|
valid: valid === true,
|
|
145
321
|
errors,
|
|
146
|
-
errorCount:
|
|
322
|
+
errorCount: totalErrorCount,
|
|
147
323
|
};
|
|
324
|
+
if (truncated) {
|
|
325
|
+
result.truncatedErrorCount = totalErrorCount - errorLimit;
|
|
326
|
+
}
|
|
327
|
+
if (resolvedRefs.length > 0) {
|
|
328
|
+
result.resolvedRefs = resolvedRefs;
|
|
329
|
+
}
|
|
330
|
+
return result;
|
|
148
331
|
}
|
|
@@ -27,7 +27,14 @@ export async function loadJson(filePath) {
|
|
|
27
27
|
}
|
|
28
28
|
// Read and parse file
|
|
29
29
|
const content = await readFile(filePath, "utf-8");
|
|
30
|
-
|
|
30
|
+
let data;
|
|
31
|
+
try {
|
|
32
|
+
data = JSON.parse(content);
|
|
33
|
+
}
|
|
34
|
+
catch (err) {
|
|
35
|
+
const message = err instanceof SyntaxError ? err.message : "Unknown parse error";
|
|
36
|
+
throw new Error(`Invalid JSON in ${filePath}: ${message}`);
|
|
37
|
+
}
|
|
31
38
|
// Update cache (with size management)
|
|
32
39
|
if (info.size < MAX_CACHE_SIZE / 2) {
|
|
33
40
|
// Only cache files smaller than half the max cache size
|