json-explorer-mcp 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,13 +8,19 @@ An MCP (Model Context Protocol) server for efficiently exploring large JSON file
8
8
  - **Smart truncation** - Large values are automatically summarized
9
9
  - **Caching** - Parsed JSON is cached with file modification checks
10
10
  - **Schema inference** - Understand structure without reading all data
11
- - **Aggregate statistics** - Get counts, distributions, and numeric stats
11
+ - **Schema validation** - Validate data against JSON Schema with `$ref` resolution
12
+ - **Aggregate statistics** - Get counts, distributions, and numeric stats for arrays
12
13
 
13
14
  ## Installation
14
15
 
15
16
  ```bash
16
- npm install
17
- npm run build
17
+ npm install -g json-explorer-mcp
18
+ ```
19
+
20
+ Or use directly with npx:
21
+
22
+ ```bash
23
+ npx json-explorer-mcp
18
24
  ```
19
25
 
20
26
  ## Usage
@@ -27,8 +33,8 @@ Add to your Claude Desktop configuration (`~/Library/Application Support/Claude/
27
33
  {
28
34
  "mcpServers": {
29
35
  "json-explorer": {
30
- "command": "node",
31
- "args": ["/path/to/json-explorer-mcp/dist/index.js"]
36
+ "command": "npx",
37
+ "args": ["-y", "json-explorer-mcp"]
32
38
  }
33
39
  }
34
40
  }
@@ -42,8 +48,8 @@ Add to `.mcp.json` in your project:
42
48
  {
43
49
  "mcpServers": {
44
50
  "json-explorer": {
45
- "command": "node",
46
- "args": ["dist/index.js"]
51
+ "command": "npx",
52
+ "args": ["-y", "json-explorer-mcp"]
47
53
  }
48
54
  }
49
55
  }
@@ -55,11 +61,12 @@ Add to `.mcp.json` in your project:
55
61
 
56
62
  Get an overview of a JSON file including size, structure type, and a depth-limited preview.
57
63
 
58
- ```
64
+ ```typescript
59
65
  json_inspect(file: "/path/to/data.json")
60
66
  ```
61
67
 
62
68
  Returns:
69
+
63
70
  ```json
64
71
  {
65
72
  "file": "/path/to/data.json",
@@ -77,18 +84,19 @@ Returns:
77
84
 
78
85
  List all keys (for objects) or indices (for arrays) at a given path with type info and previews.
79
86
 
80
- ```
87
+ ```typescript
81
88
  json_keys(file: "/path/to/data.json", path: "$.users")
82
89
  ```
83
90
 
84
91
  Returns:
92
+
85
93
  ```json
86
94
  {
87
95
  "path": "$.users",
88
96
  "type": "array",
89
97
  "keys": [
90
- { "key": "[0]", "type": "object", "preview": "{\"id\": 1, \"name\": \"Alice\", ...}", "path": "$.users[0]" },
91
- { "key": "[1]", "type": "object", "preview": "{\"id\": 2, \"name\": \"Bob\", ...}", "path": "$.users[1]" }
98
+ { "key": "[0]", "type": "object", "preview": "{\"id\": 1, ...}", "path": "$.users[0]" },
99
+ { "key": "[1]", "type": "object", "preview": "{\"id\": 2, ...}", "path": "$.users[1]" }
92
100
  ],
93
101
  "totalCount": 1000
94
102
  }
@@ -98,7 +106,7 @@ Returns:
98
106
 
99
107
  Retrieve the value at a specific path. Large values are automatically truncated.
100
108
 
101
- ```
109
+ ```typescript
102
110
  json_get(file: "/path/to/data.json", path: "$.users[0]")
103
111
  ```
104
112
 
@@ -106,11 +114,12 @@ json_get(file: "/path/to/data.json", path: "$.users[0]")
106
114
 
107
115
  Infer the JSON schema/structure at a path. For arrays, samples items to determine the item schema.
108
116
 
109
- ```
117
+ ```typescript
110
118
  json_schema(file: "/path/to/data.json", path: "$.users")
111
119
  ```
112
120
 
113
121
  Returns:
122
+
114
123
  ```json
115
124
  {
116
125
  "path": "$.users",
@@ -128,11 +137,66 @@ Returns:
128
137
  }
129
138
  ```
130
139
 
140
+ ### json_validate
141
+
142
+ Validate JSON data against a JSON Schema. Schema can be provided inline or as a path to a schema file.
143
+
144
+ **Features:**
145
+ - Automatic resolution of local file `$ref` references
146
+ - Optional network `$ref` resolution (disabled by default)
147
+ - Validates that referenced files are actual JSON Schemas
148
+ - Error limiting (default 10) to avoid huge error lists
149
+
150
+ ```typescript
151
+ json_validate(
152
+ file: "/path/to/data.json",
153
+ schema: { "type": "object", "properties": { "id": { "type": "integer" } }, "required": ["id"] },
154
+ path: "$.users[0]"
155
+ )
156
+ ```
157
+
158
+ Or with a schema file (local `$ref`s are automatically resolved):
159
+
160
+ ```typescript
161
+ json_validate(
162
+ file: "/path/to/data.json",
163
+ schema: "/path/to/schema.json"
164
+ )
165
+ ```
166
+
167
+ With options:
168
+
169
+ ```typescript
170
+ json_validate(
171
+ file: "/path/to/data.json",
172
+ schema: "/path/to/schema.json",
173
+ path: "$.users",
174
+ errorLimit: 5, // Max errors to return (default: 10)
175
+ resolveLocalRefs: true, // Resolve local file $refs (default: true)
176
+ resolveNetworkRefs: false // Resolve HTTP $refs (default: false)
177
+ )
178
+ ```
179
+
180
+ Returns:
181
+
182
+ ```json
183
+ {
184
+ "valid": false,
185
+ "errorCount": 15,
186
+ "truncatedErrorCount": 5,
187
+ "errors": [
188
+ { "path": "/id", "message": "must be integer", "keyword": "type", "params": {} },
189
+ { "path": "", "message": "must have required property 'name'", "keyword": "required", "params": {} }
190
+ ],
191
+ "resolvedRefs": ["/path/to/definitions.json"]
192
+ }
193
+ ```
194
+
131
195
  ### json_search
132
196
 
133
197
  Search for keys or values matching a pattern (regex supported).
134
198
 
135
- ```
199
+ ```typescript
136
200
  json_search(file: "/path/to/data.json", query: "email", searchType: "key")
137
201
  json_search(file: "/path/to/data.json", query: "@example.com", searchType: "value")
138
202
  ```
@@ -141,7 +205,7 @@ json_search(file: "/path/to/data.json", query: "@example.com", searchType: "valu
141
205
 
142
206
  Get sample items from an array. Supports first, last, random, or range-based sampling.
143
207
 
144
- ```
208
+ ```typescript
145
209
  json_sample(file: "/path/to/data.json", path: "$.users", count: 5, mode: "random")
146
210
  ```
147
211
 
@@ -149,16 +213,14 @@ json_sample(file: "/path/to/data.json", path: "$.users", count: 5, mode: "random
149
213
 
150
214
  Get aggregate statistics for array fields. If no path provided, discovers and analyzes all arrays of objects in the file.
151
215
 
152
- ```
216
+ ```typescript
153
217
  json_stats(file: "/path/to/data.json")
154
218
  ```
155
219
 
156
220
  Returns:
221
+
157
222
  ```json
158
223
  {
159
- "arrays": [
160
- { "path": "$.users", "length": 1000, "itemType": "object", "fields": ["id", "name", "status"], "fieldCount": 10 }
161
- ],
162
224
  "stats": [
163
225
  {
164
226
  "path": "$.users",
@@ -173,7 +235,8 @@ Returns:
173
235
  ```
174
236
 
175
237
  With a specific path:
176
- ```
238
+
239
+ ```typescript
177
240
  json_stats(file: "/path/to/data.json", path: "$.users", fields: ["status", "created_at"])
178
241
  ```
179
242
 
@@ -188,6 +251,48 @@ All path parameters support JSONPath syntax:
188
251
  - `$.users[*]` - All array items (in search results)
189
252
  - `$["special-key"]` - Bracket notation for special characters
190
253
 
254
+ ## Security
255
+
256
+ ### Schema Validation
257
+
258
+ All referenced schemas (both local files and network URLs) are validated before use. The tool checks that fetched content is a valid JSON Schema object containing appropriate keywords (`type`, `properties`, `$ref`, etc.), rejecting arrays, primitives, or unrelated JSON files.
259
+
260
+ ### Network Schema Resolution
261
+
262
+ By default, `json_validate` only resolves local file `$ref` references. Network (HTTP/HTTPS) resolution is disabled by default for security.
263
+
264
+ To enable network refs for a specific validation:
265
+
266
+ ```typescript
267
+ json_validate(file: "data.json", schema: "schema.json", resolveNetworkRefs: true)
268
+ ```
269
+
270
+ ### Disabling Network Resolution Completely
271
+
272
+ For security-conscious environments, you can completely disable network schema resolution using an environment variable:
273
+
274
+ ```bash
275
+ JSON_EXPLORER_NO_NETWORK=1
276
+ ```
277
+
278
+ When set to `1`, this overrides any `resolveNetworkRefs: true` option, ensuring schemas are never fetched from the network.
279
+
280
+ **Claude Desktop config with network disabled:**
281
+
282
+ ```json
283
+ {
284
+ "mcpServers": {
285
+ "json-explorer": {
286
+ "command": "npx",
287
+ "args": ["-y", "json-explorer-mcp"],
288
+ "env": {
289
+ "JSON_EXPLORER_NO_NETWORK": "1"
290
+ }
291
+ }
292
+ }
293
+ }
294
+ ```
295
+
191
296
  ## Development
192
297
 
193
298
  ```bash
@@ -197,6 +302,9 @@ npm install
197
302
  # Build
198
303
  npm run build
199
304
 
305
+ # Run tests
306
+ npm test
307
+
200
308
  # Run in development mode
201
309
  npm run dev
202
310
  ```
package/dist/index.js CHANGED
@@ -123,13 +123,20 @@ server.tool("json_schema", "Infer the JSON schema/structure at a path. For array
123
123
  }
124
124
  });
125
125
  // Tool: json_validate - Validate against JSON Schema
126
- server.tool("json_validate", "Validate JSON data against a JSON Schema. Schema can be provided inline or as a path to a schema file.", {
126
+ server.tool("json_validate", "Validate JSON data against a JSON Schema. Schema can be provided inline or as a path to a schema file. Automatically resolves local file $refs. Use resolveNetworkRefs to also fetch remote schemas.", {
127
127
  file: z.string().describe("Absolute path to the JSON file to validate"),
128
128
  schema: z.union([z.string(), z.object({}).passthrough()]).describe("JSON Schema object or path to schema file"),
129
129
  path: z.string().optional().describe("JSONPath to validate. Defaults to root."),
130
- }, { readOnlyHint: true }, async ({ file, schema, path }) => {
130
+ errorLimit: z.number().optional().describe("Maximum number of errors to return. Defaults to 10."),
131
+ resolveLocalRefs: z.boolean().optional().describe("Resolve $ref to local files. Defaults to true."),
132
+ resolveNetworkRefs: z.boolean().optional().describe("Resolve $ref to HTTP URLs. Defaults to false for security."),
133
+ }, { readOnlyHint: true }, async ({ file, schema, path, errorLimit, resolveLocalRefs, resolveNetworkRefs }) => {
131
134
  try {
132
- const result = await jsonValidate(file, schema, path);
135
+ const result = await jsonValidate(file, schema, path, {
136
+ errorLimit,
137
+ resolveLocalRefs,
138
+ resolveNetworkRefs,
139
+ });
133
140
  return {
134
141
  content: [
135
142
  {
@@ -39,16 +39,57 @@ export interface FieldStats {
39
39
  export interface ArrayInfo {
40
40
  path: string;
41
41
  length: number;
42
- itemType: string;
42
+ itemType: "object" | "string" | "number" | "mixed";
43
43
  fields?: string[];
44
44
  fieldCount?: number;
45
45
  }
46
+ export interface PrimitiveArrayStats {
47
+ path: string;
48
+ arrayLength: number;
49
+ itemType: "string" | "number" | "mixed";
50
+ typeBreakdown?: Record<string, number>;
51
+ nullCount: number;
52
+ uniqueCount?: number;
53
+ stringStats?: {
54
+ count: number;
55
+ uniqueCount: number;
56
+ lengthStats: {
57
+ min: number;
58
+ max: number;
59
+ avg: number;
60
+ median: number;
61
+ };
62
+ distribution?: Record<string, number>;
63
+ };
64
+ lengthStats?: {
65
+ min: number;
66
+ max: number;
67
+ avg: number;
68
+ median: number;
69
+ };
70
+ distribution?: Record<string, number>;
71
+ numericStats?: {
72
+ count?: number;
73
+ min: number;
74
+ max: number;
75
+ avg: number;
76
+ median: number;
77
+ stdDev: number;
78
+ percentiles: {
79
+ p25: number;
80
+ p50: number;
81
+ p75: number;
82
+ p90: number;
83
+ p99: number;
84
+ };
85
+ };
86
+ }
46
87
  export interface StatsResult {
47
88
  path: string;
48
89
  arrayLength: number;
49
90
  fields: FieldStats[];
50
91
  }
51
92
  export interface MultiStatsResult {
52
- stats: StatsResult[];
93
+ stats: (StatsResult | PrimitiveArrayStats)[];
53
94
  }
54
- export declare function jsonStats(filePath: string, path?: string, fields?: string[]): Promise<StatsResult | MultiStatsResult>;
95
+ export declare function jsonStats(filePath: string, path?: string, fields?: string[]): Promise<StatsResult | PrimitiveArrayStats | MultiStatsResult>;
@@ -118,13 +118,13 @@ export async function jsonSample(filePath, path, count = 5, mode = "first", rang
118
118
  hasMore: arrayLength > count,
119
119
  };
120
120
  }
121
- function findArraysOfObjects(data, currentPath = "$", maxDepth = 5, depth = 0) {
121
+ function findArrays(data, currentPath = "$", maxDepth = 5, depth = 0) {
122
122
  const results = [];
123
123
  if (depth > maxDepth)
124
124
  return results;
125
125
  if (Array.isArray(data) && data.length > 0) {
126
126
  const firstItem = data[0];
127
- // Only include arrays of objects (not primitives or nested arrays)
127
+ // Arrays of objects
128
128
  if (typeof firstItem === "object" && firstItem !== null && !Array.isArray(firstItem)) {
129
129
  const fields = Object.keys(firstItem);
130
130
  results.push({
@@ -152,29 +152,69 @@ function findArraysOfObjects(data, currentPath = "$", maxDepth = 5, depth = 0) {
152
152
  }
153
153
  }
154
154
  }
155
+ // Arrays of primitives (strings, numbers, or mixed)
156
+ else if (typeof firstItem === "string" || typeof firstItem === "number") {
157
+ // Check if array has mixed types
158
+ const types = new Set(data.map((item) => typeof item).filter((t) => t === "string" || t === "number"));
159
+ if (types.size > 1) {
160
+ results.push({
161
+ path: currentPath,
162
+ length: data.length,
163
+ itemType: "mixed",
164
+ });
165
+ }
166
+ else if (typeof firstItem === "string") {
167
+ results.push({
168
+ path: currentPath,
169
+ length: data.length,
170
+ itemType: "string",
171
+ });
172
+ }
173
+ else {
174
+ results.push({
175
+ path: currentPath,
176
+ length: data.length,
177
+ itemType: "number",
178
+ });
179
+ }
180
+ }
155
181
  }
156
182
  else if (typeof data === "object" && data !== null) {
157
183
  for (const key of Object.keys(data)) {
158
184
  const childPath = /^[a-zA-Z_][a-zA-Z0-9_]*$/.test(key)
159
185
  ? `${currentPath}.${key}`
160
186
  : `${currentPath}["${key}"]`;
161
- results.push(...findArraysOfObjects(data[key], childPath, maxDepth, depth + 1));
187
+ results.push(...findArrays(data[key], childPath, maxDepth, depth + 1));
162
188
  }
163
189
  }
164
190
  return results;
165
191
  }
166
192
  export async function jsonStats(filePath, path, fields) {
167
193
  const data = await loadJson(filePath);
168
- // If no path provided, compute stats for all arrays of objects
194
+ // If no path provided, compute stats for all arrays
169
195
  if (!path) {
170
- const arrays = findArraysOfObjects(data);
196
+ const arrays = findArrays(data);
171
197
  const allStats = [];
172
198
  for (const arr of arrays) {
173
199
  try {
174
200
  const value = getValueAtPath(data, arr.path);
175
201
  if (Array.isArray(value) && value.length > 0) {
176
- const stats = computeArrayStats(value, arr.path, fields);
177
- allStats.push(stats);
202
+ if (arr.itemType === "object") {
203
+ const stats = computeObjectArrayStats(value, arr.path, fields);
204
+ allStats.push(stats);
205
+ }
206
+ else if (arr.itemType === "string") {
207
+ const stats = computeStringArrayStats(value, arr.path);
208
+ allStats.push(stats);
209
+ }
210
+ else if (arr.itemType === "number") {
211
+ const stats = computeNumberArrayStats(value, arr.path);
212
+ allStats.push(stats);
213
+ }
214
+ else if (arr.itemType === "mixed") {
215
+ const stats = computeMixedArrayStats(value, arr.path);
216
+ allStats.push(stats);
217
+ }
178
218
  }
179
219
  }
180
220
  catch {
@@ -187,9 +227,27 @@ export async function jsonStats(filePath, path, fields) {
187
227
  if (!Array.isArray(value)) {
188
228
  throw new Error(`Path "${path}" is not an array. Got: ${getValueType(value)}`);
189
229
  }
190
- return computeArrayStats(value, path, fields);
230
+ // Determine array type
231
+ if (value.length === 0) {
232
+ return { path, arrayLength: 0, fields: [] };
233
+ }
234
+ const firstItem = value[0];
235
+ // Check for mixed primitive types
236
+ if (typeof firstItem === "string" || typeof firstItem === "number") {
237
+ const types = new Set(value.map((item) => typeof item).filter((t) => t === "string" || t === "number"));
238
+ if (types.size > 1) {
239
+ return computeMixedArrayStats(value, path);
240
+ }
241
+ }
242
+ if (typeof firstItem === "string") {
243
+ return computeStringArrayStats(value, path);
244
+ }
245
+ else if (typeof firstItem === "number") {
246
+ return computeNumberArrayStats(value, path);
247
+ }
248
+ return computeObjectArrayStats(value, path, fields);
191
249
  }
192
- function computeArrayStats(value, path, fields) {
250
+ function computeObjectArrayStats(value, path, fields) {
193
251
  if (value.length === 0) {
194
252
  return { path, arrayLength: 0, fields: [] };
195
253
  }
@@ -247,3 +305,183 @@ function computeArrayStats(value, path, fields) {
247
305
  fields: fieldStats,
248
306
  };
249
307
  }
308
+ function computeStringArrayStats(value, path) {
309
+ const strings = value.filter((v) => typeof v === "string");
310
+ const nullCount = value.length - strings.length;
311
+ // Compute length stats
312
+ const lengths = strings.map((s) => s.length);
313
+ const sortedLengths = [...lengths].sort((a, b) => a - b);
314
+ const lengthStats = lengths.length > 0 ? {
315
+ min: Math.min(...lengths),
316
+ max: Math.max(...lengths),
317
+ avg: Math.round(lengths.reduce((a, b) => a + b, 0) / lengths.length * 100) / 100,
318
+ median: sortedLengths[Math.floor(sortedLengths.length / 2)],
319
+ } : undefined;
320
+ // Compute value distribution (if reasonable number of unique values)
321
+ const distribution = {};
322
+ for (const s of strings) {
323
+ distribution[s] = (distribution[s] || 0) + 1;
324
+ }
325
+ const uniqueCount = Object.keys(distribution).length;
326
+ // Only include distribution if <= 50 unique values
327
+ const result = {
328
+ path,
329
+ arrayLength: value.length,
330
+ itemType: "string",
331
+ nullCount,
332
+ uniqueCount,
333
+ lengthStats,
334
+ };
335
+ if (uniqueCount <= 50) {
336
+ // Sort by count descending
337
+ const sortedDistribution = {};
338
+ Object.entries(distribution)
339
+ .sort(([, a], [, b]) => b - a)
340
+ .slice(0, 20) // Top 20 values
341
+ .forEach(([k, v]) => { sortedDistribution[k] = v; });
342
+ result.distribution = sortedDistribution;
343
+ }
344
+ return result;
345
+ }
346
+ function computeNumberArrayStats(value, path) {
347
+ const numbers = value.filter((v) => typeof v === "number" && !isNaN(v));
348
+ const nullCount = value.length - numbers.length;
349
+ if (numbers.length === 0) {
350
+ return {
351
+ path,
352
+ arrayLength: value.length,
353
+ itemType: "number",
354
+ nullCount,
355
+ uniqueCount: 0,
356
+ };
357
+ }
358
+ const sorted = [...numbers].sort((a, b) => a - b);
359
+ const sum = numbers.reduce((a, b) => a + b, 0);
360
+ const avg = sum / numbers.length;
361
+ // Standard deviation
362
+ const squaredDiffs = numbers.map((n) => Math.pow(n - avg, 2));
363
+ const avgSquaredDiff = squaredDiffs.reduce((a, b) => a + b, 0) / numbers.length;
364
+ const stdDev = Math.sqrt(avgSquaredDiff);
365
+ // Percentile helper
366
+ const percentile = (p) => {
367
+ const index = (p / 100) * (sorted.length - 1);
368
+ const lower = Math.floor(index);
369
+ const upper = Math.ceil(index);
370
+ if (lower === upper)
371
+ return sorted[lower];
372
+ return sorted[lower] + (index - lower) * (sorted[upper] - sorted[lower]);
373
+ };
374
+ // Unique values
375
+ const uniqueCount = new Set(numbers).size;
376
+ const result = {
377
+ path,
378
+ arrayLength: value.length,
379
+ itemType: "number",
380
+ nullCount,
381
+ uniqueCount,
382
+ numericStats: {
383
+ min: sorted[0],
384
+ max: sorted[sorted.length - 1],
385
+ avg: Math.round(avg * 1000) / 1000,
386
+ median: percentile(50),
387
+ stdDev: Math.round(stdDev * 1000) / 1000,
388
+ percentiles: {
389
+ p25: Math.round(percentile(25) * 1000) / 1000,
390
+ p50: Math.round(percentile(50) * 1000) / 1000,
391
+ p75: Math.round(percentile(75) * 1000) / 1000,
392
+ p90: Math.round(percentile(90) * 1000) / 1000,
393
+ p99: Math.round(percentile(99) * 1000) / 1000,
394
+ },
395
+ },
396
+ };
397
+ // If there are few unique values, include distribution
398
+ if (uniqueCount <= 20) {
399
+ const distribution = {};
400
+ for (const n of numbers) {
401
+ const key = String(n);
402
+ distribution[key] = (distribution[key] || 0) + 1;
403
+ }
404
+ result.distribution = distribution;
405
+ }
406
+ return result;
407
+ }
408
+ function computeMixedArrayStats(value, path) {
409
+ // Count types
410
+ const typeBreakdown = {};
411
+ for (const item of value) {
412
+ const t = item === null ? "null" : typeof item;
413
+ typeBreakdown[t] = (typeBreakdown[t] || 0) + 1;
414
+ }
415
+ const nullCount = typeBreakdown["null"] || 0 + (typeBreakdown["undefined"] || 0);
416
+ const result = {
417
+ path,
418
+ arrayLength: value.length,
419
+ itemType: "mixed",
420
+ typeBreakdown,
421
+ nullCount,
422
+ };
423
+ // Compute string stats if there are strings
424
+ const strings = value.filter((v) => typeof v === "string");
425
+ if (strings.length > 0) {
426
+ const lengths = strings.map((s) => s.length);
427
+ const sortedLengths = [...lengths].sort((a, b) => a - b);
428
+ const stringDistribution = {};
429
+ for (const s of strings) {
430
+ stringDistribution[s] = (stringDistribution[s] || 0) + 1;
431
+ }
432
+ const stringUniqueCount = Object.keys(stringDistribution).length;
433
+ result.stringStats = {
434
+ count: strings.length,
435
+ uniqueCount: stringUniqueCount,
436
+ lengthStats: {
437
+ min: Math.min(...lengths),
438
+ max: Math.max(...lengths),
439
+ avg: Math.round(lengths.reduce((a, b) => a + b, 0) / lengths.length * 100) / 100,
440
+ median: sortedLengths[Math.floor(sortedLengths.length / 2)],
441
+ },
442
+ };
443
+ // Include distribution if reasonable number of unique values
444
+ if (stringUniqueCount <= 30) {
445
+ const sortedDistribution = {};
446
+ Object.entries(stringDistribution)
447
+ .sort(([, a], [, b]) => b - a)
448
+ .slice(0, 15)
449
+ .forEach(([k, v]) => { sortedDistribution[k] = v; });
450
+ result.stringStats.distribution = sortedDistribution;
451
+ }
452
+ }
453
+ // Compute number stats if there are numbers
454
+ const numbers = value.filter((v) => typeof v === "number" && !isNaN(v));
455
+ if (numbers.length > 0) {
456
+ const sorted = [...numbers].sort((a, b) => a - b);
457
+ const sum = numbers.reduce((a, b) => a + b, 0);
458
+ const avg = sum / numbers.length;
459
+ const squaredDiffs = numbers.map((n) => Math.pow(n - avg, 2));
460
+ const avgSquaredDiff = squaredDiffs.reduce((a, b) => a + b, 0) / numbers.length;
461
+ const stdDev = Math.sqrt(avgSquaredDiff);
462
+ const percentile = (p) => {
463
+ const index = (p / 100) * (sorted.length - 1);
464
+ const lower = Math.floor(index);
465
+ const upper = Math.ceil(index);
466
+ if (lower === upper)
467
+ return sorted[lower];
468
+ return sorted[lower] + (index - lower) * (sorted[upper] - sorted[lower]);
469
+ };
470
+ result.numericStats = {
471
+ count: numbers.length,
472
+ min: sorted[0],
473
+ max: sorted[sorted.length - 1],
474
+ avg: Math.round(avg * 1000) / 1000,
475
+ median: percentile(50),
476
+ stdDev: Math.round(stdDev * 1000) / 1000,
477
+ percentiles: {
478
+ p25: Math.round(percentile(25) * 1000) / 1000,
479
+ p50: Math.round(percentile(50) * 1000) / 1000,
480
+ p75: Math.round(percentile(75) * 1000) / 1000,
481
+ p90: Math.round(percentile(90) * 1000) / 1000,
482
+ p99: Math.round(percentile(99) * 1000) / 1000,
483
+ },
484
+ };
485
+ }
486
+ return result;
487
+ }
@@ -28,6 +28,13 @@ export interface ValidateResult {
28
28
  valid: boolean;
29
29
  errors: ValidationError[];
30
30
  errorCount: number;
31
+ truncatedErrorCount?: number;
32
+ resolvedRefs?: string[];
31
33
  }
32
- export declare function jsonValidate(filePath: string, schema: object | string, path?: string): Promise<ValidateResult>;
34
+ export interface ValidateOptions {
35
+ errorLimit?: number;
36
+ resolveLocalRefs?: boolean;
37
+ resolveNetworkRefs?: boolean;
38
+ }
39
+ export declare function jsonValidate(filePath: string, schema: object | string, path?: string, options?: ValidateOptions | number): Promise<ValidateResult>;
33
40
  export {};
@@ -1,5 +1,6 @@
1
1
  import { loadJson, getFileInfo, formatBytes, getValueType } from "../utils/json-parser.js";
2
2
  import { getDepthPreview, getValueAtPath } from "../utils/path-helpers.js";
3
+ import { dirname, resolve, isAbsolute } from "path";
3
4
  // Dynamic import for ajv (ESM/CJS compat)
4
5
  async function getAjv() {
5
6
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -115,7 +116,130 @@ export async function jsonSchema(filePath, path) {
115
116
  schema: inferSchema(targetData),
116
117
  };
117
118
  }
118
- export async function jsonValidate(filePath, schema, path) {
119
+ // Find all $ref values in a schema object
120
+ function findRefs(obj, refs = new Set()) {
121
+ if (obj === null || typeof obj !== "object") {
122
+ return refs;
123
+ }
124
+ if (Array.isArray(obj)) {
125
+ for (const item of obj) {
126
+ findRefs(item, refs);
127
+ }
128
+ }
129
+ else {
130
+ const record = obj;
131
+ if (typeof record.$ref === "string") {
132
+ refs.add(record.$ref);
133
+ }
134
+ for (const value of Object.values(record)) {
135
+ findRefs(value, refs);
136
+ }
137
+ }
138
+ return refs;
139
+ }
140
+ // Check if a ref is a local file path (not a URL or JSON pointer)
141
+ function isLocalFileRef(ref) {
142
+ // Skip JSON pointers (start with #)
143
+ if (ref.startsWith("#"))
144
+ return false;
145
+ // Skip URLs
146
+ if (ref.startsWith("http://") || ref.startsWith("https://"))
147
+ return false;
148
+ // It's a local file ref
149
+ return true;
150
+ }
151
+ // Check if a ref is an HTTP URL
152
+ function isNetworkRef(ref) {
153
+ return ref.startsWith("http://") || ref.startsWith("https://");
154
+ }
155
+ // Resolve a local file ref relative to the schema's directory
156
+ function resolveLocalRef(ref, schemaDir) {
157
+ // Remove any JSON pointer fragment
158
+ const [filePath] = ref.split("#");
159
+ if (!filePath)
160
+ return ref;
161
+ if (isAbsolute(filePath)) {
162
+ return filePath;
163
+ }
164
+ return resolve(schemaDir, filePath);
165
+ }
166
+ // Recursively load all local schema refs
167
+ async function loadLocalRefs(schemaObj, schemaDir, loaded = new Map()) {
168
+ const refs = findRefs(schemaObj);
169
+ for (const ref of refs) {
170
+ if (!isLocalFileRef(ref))
171
+ continue;
172
+ const filePath = resolveLocalRef(ref, schemaDir);
173
+ if (loaded.has(filePath))
174
+ continue;
175
+ try {
176
+ const refSchema = await loadJson(filePath);
177
+ // Validate that it's actually a JSON Schema
178
+ if (!isValidJsonSchema(refSchema)) {
179
+ throw new Error(`File ${filePath} is not a valid JSON Schema`);
180
+ }
181
+ loaded.set(filePath, refSchema);
182
+ // Recursively load refs from this schema
183
+ const refDir = dirname(filePath);
184
+ await loadLocalRefs(refSchema, refDir, loaded);
185
+ }
186
+ catch (err) {
187
+ // Re-throw validation errors, skip other errors (file not found, etc.)
188
+ if (err instanceof Error && err.message.includes("not a valid JSON Schema")) {
189
+ throw err;
190
+ }
191
+ // Skip refs we can't load - ajv will report the error
192
+ }
193
+ }
194
+ return loaded;
195
+ }
196
+ // Validate that an object looks like a JSON Schema
197
+ function isValidJsonSchema(obj) {
198
+ if (obj === null || typeof obj !== "object" || Array.isArray(obj)) {
199
+ return false;
200
+ }
201
+ const schema = obj;
202
+ // A valid JSON Schema should be an object with schema-like properties
203
+ // Check for common JSON Schema keywords
204
+ const schemaKeywords = [
205
+ "type", "properties", "items", "required", "enum", "const",
206
+ "allOf", "anyOf", "oneOf", "not", "$ref", "$id", "$schema",
207
+ "definitions", "$defs", "additionalProperties", "patternProperties",
208
+ "minimum", "maximum", "minLength", "maxLength", "pattern", "format"
209
+ ];
210
+ // Must have at least one schema keyword, OR be a boolean schema (but we already checked it's an object)
211
+ // Empty objects {} are valid schemas (match anything), so we allow those too
212
+ const hasSchemaKeyword = schemaKeywords.some(keyword => keyword in schema);
213
+ const isEmpty = Object.keys(schema).length === 0;
214
+ return hasSchemaKeyword || isEmpty;
215
+ }
216
+ // Fetch a schema from a URL
217
+ async function fetchNetworkSchema(url) {
218
+ const response = await fetch(url);
219
+ if (!response.ok) {
220
+ throw new Error(`Failed to fetch schema from ${url}: ${response.status} ${response.statusText}`);
221
+ }
222
+ let data;
223
+ try {
224
+ data = await response.json();
225
+ }
226
+ catch {
227
+ throw new Error(`Invalid JSON received from ${url}`);
228
+ }
229
+ if (!isValidJsonSchema(data)) {
230
+ throw new Error(`URL ${url} did not return a valid JSON Schema`);
231
+ }
232
+ return data;
233
+ }
234
+ export async function jsonValidate(filePath, schema, path, options = {}) {
235
+ // Handle legacy signature where 4th param was errorLimit number
236
+ const opts = typeof options === "number"
237
+ ? { errorLimit: options }
238
+ : options;
239
+ // Environment variable can completely disable network refs for security
240
+ const networkRefsDisabled = process.env.JSON_EXPLORER_NO_NETWORK === "1";
241
+ const { errorLimit = 10, resolveLocalRefs = true, resolveNetworkRefs = false, } = opts;
242
+ const effectiveResolveNetworkRefs = resolveNetworkRefs && !networkRefsDisabled;
119
243
  const data = await loadJson(filePath);
120
244
  const targetData = path ? getValueAtPath(data, path) : data;
121
245
  if (targetData === undefined) {
@@ -123,26 +247,85 @@ export async function jsonValidate(filePath, schema, path) {
123
247
  }
124
248
  // Load schema from file if it's a string path
125
249
  let schemaObj;
250
+ let schemaDir;
126
251
  if (typeof schema === "string") {
127
252
  schemaObj = (await loadJson(schema));
253
+ schemaDir = dirname(resolve(schema));
128
254
  }
129
255
  else {
130
256
  schemaObj = schema;
257
+ // For inline schemas, use current working directory
258
+ schemaDir = process.cwd();
131
259
  }
132
260
  const { Ajv, addFormats } = await getAjv();
133
- const ajv = new Ajv({ allErrors: true });
261
+ // Configure ajv options
262
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
263
+ const ajvOptions = { allErrors: true };
264
+ // Set up network schema loading if enabled
265
+ if (effectiveResolveNetworkRefs) {
266
+ ajvOptions.loadSchema = async (uri) => {
267
+ if (isNetworkRef(uri)) {
268
+ return fetchNetworkSchema(uri);
269
+ }
270
+ throw new Error(`Cannot load non-network ref: ${uri}`);
271
+ };
272
+ }
273
+ const ajv = new Ajv(ajvOptions);
134
274
  addFormats(ajv);
135
- const validate = ajv.compile(schemaObj);
275
+ // Pre-load local refs if enabled
276
+ const resolvedRefs = [];
277
+ if (resolveLocalRefs) {
278
+ const localRefs = await loadLocalRefs(schemaObj, schemaDir);
279
+ for (const [refPath, refSchema] of localRefs) {
280
+ // If schema has $id, ajv will use that for resolution
281
+ // Otherwise we add it by its relative filename
282
+ const refRecord = refSchema;
283
+ if (refRecord.$id) {
284
+ // Schema has its own $id - ajv will register it by that
285
+ ajv.addSchema(refSchema);
286
+ }
287
+ else {
288
+ // No $id - register by filename (last component of path)
289
+ const filename = refPath.split("/").pop() || refPath;
290
+ ajv.addSchema(refSchema, filename);
291
+ }
292
+ resolvedRefs.push(refPath);
293
+ }
294
+ }
295
+ // Compile and validate
296
+ let validate;
297
+ try {
298
+ if (effectiveResolveNetworkRefs) {
299
+ // Use compileAsync for network refs
300
+ validate = await ajv.compileAsync(schemaObj);
301
+ }
302
+ else {
303
+ validate = ajv.compile(schemaObj);
304
+ }
305
+ }
306
+ catch (err) {
307
+ throw new Error(`Schema compilation failed: ${err instanceof Error ? err.message : String(err)}`);
308
+ }
136
309
  const valid = validate(targetData);
137
- const errors = (validate.errors || []).map((err) => ({
310
+ const allErrors = (validate.errors || []).map((err) => ({
138
311
  path: err.instancePath || "$",
139
312
  message: err.message || "Unknown error",
140
313
  keyword: err.keyword,
141
314
  params: err.params,
142
315
  }));
143
- return {
316
+ const totalErrorCount = allErrors.length;
317
+ const truncated = totalErrorCount > errorLimit;
318
+ const errors = truncated ? allErrors.slice(0, errorLimit) : allErrors;
319
+ const result = {
144
320
  valid: valid === true,
145
321
  errors,
146
- errorCount: errors.length,
322
+ errorCount: totalErrorCount,
147
323
  };
324
+ if (truncated) {
325
+ result.truncatedErrorCount = totalErrorCount - errorLimit;
326
+ }
327
+ if (resolvedRefs.length > 0) {
328
+ result.resolvedRefs = resolvedRefs;
329
+ }
330
+ return result;
148
331
  }
@@ -27,7 +27,14 @@ export async function loadJson(filePath) {
27
27
  }
28
28
  // Read and parse file
29
29
  const content = await readFile(filePath, "utf-8");
30
- const data = JSON.parse(content);
30
+ let data;
31
+ try {
32
+ data = JSON.parse(content);
33
+ }
34
+ catch (err) {
35
+ const message = err instanceof SyntaxError ? err.message : "Unknown parse error";
36
+ throw new Error(`Invalid JSON in ${filePath}: ${message}`);
37
+ }
31
38
  // Update cache (with size management)
32
39
  if (info.size < MAX_CACHE_SIZE / 2) {
33
40
  // Only cache files smaller than half the max cache size
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "json-explorer-mcp",
3
- "version": "1.0.0",
3
+ "version": "1.0.2",
4
4
  "description": "MCP server for efficiently exploring large JSON files",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",