@anyshift/mcp-proxy 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -31
- package/dist/__tests__/helpers/testUtils.d.ts +127 -0
- package/dist/__tests__/helpers/testUtils.js +122 -0
- package/dist/__tests__/unit/queryAssistSchema.test.d.ts +1 -0
- package/dist/__tests__/unit/queryAssistSchema.test.js +267 -0
- package/dist/__tests__/unit/truncation.test.d.ts +5 -0
- package/dist/__tests__/unit/truncation.test.js +204 -0
- package/dist/fileWriter/index.d.ts +1 -1
- package/dist/fileWriter/index.js +1 -1
- package/dist/fileWriter/schema.d.ts +38 -11
- package/dist/fileWriter/schema.js +248 -98
- package/dist/fileWriter/writer.js +66 -110
- package/dist/index.js +64 -28
- package/dist/jq/tool.js +15 -0
- package/dist/types/index.d.ts +6 -0
- package/package.json +11 -2
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for truncation module
|
|
3
|
+
* Tests token estimation and truncation logic
|
|
4
|
+
*/
|
|
5
|
+
import { describe, test, expect } from '@jest/globals';
|
|
6
|
+
import { estimateTokens, wouldBeTruncated, truncateResponseIfNeeded } from '../../truncation/truncate.js';
|
|
7
|
+
describe('Truncation - Token Estimation', () => {
|
|
8
|
+
test('estimateTokens with default ratio (4 chars/token)', () => {
|
|
9
|
+
expect(estimateTokens('x'.repeat(100))).toBe(25); // 100/4 = 25
|
|
10
|
+
expect(estimateTokens('x'.repeat(1000))).toBe(250); // 1000/4 = 250
|
|
11
|
+
expect(estimateTokens('x'.repeat(40000))).toBe(10000); // 40000/4 = 10000
|
|
12
|
+
});
|
|
13
|
+
test('estimateTokens with custom ratio', () => {
|
|
14
|
+
expect(estimateTokens('x'.repeat(100), 3)).toBe(34); // 100/3 = 33.33 -> 34 (ceil)
|
|
15
|
+
expect(estimateTokens('x'.repeat(100), 5)).toBe(20); // 100/5 = 20
|
|
16
|
+
});
|
|
17
|
+
test('estimateTokens handles empty string', () => {
|
|
18
|
+
expect(estimateTokens('')).toBe(0);
|
|
19
|
+
});
|
|
20
|
+
test('estimateTokens rounds up (ceiling)', () => {
|
|
21
|
+
expect(estimateTokens('xxx', 4)).toBe(1); // 3/4 = 0.75 -> 1
|
|
22
|
+
expect(estimateTokens('xxxxx', 4)).toBe(2); // 5/4 = 1.25 -> 2
|
|
23
|
+
});
|
|
24
|
+
});
|
|
25
|
+
describe('Truncation - Would Be Truncated Check', () => {
|
|
26
|
+
test('returns false when content is under limit', () => {
|
|
27
|
+
const content = 'x'.repeat(30000); // 7500 tokens
|
|
28
|
+
expect(wouldBeTruncated(content, 10000, 4)).toBe(false);
|
|
29
|
+
});
|
|
30
|
+
test('returns true when content exceeds limit', () => {
|
|
31
|
+
const content = 'x'.repeat(50000); // 12500 tokens
|
|
32
|
+
expect(wouldBeTruncated(content, 10000, 4)).toBe(true);
|
|
33
|
+
});
|
|
34
|
+
test('returns false when exactly at limit', () => {
|
|
35
|
+
const content = 'x'.repeat(40000); // exactly 10000 tokens
|
|
36
|
+
expect(wouldBeTruncated(content, 10000, 4)).toBe(false);
|
|
37
|
+
});
|
|
38
|
+
test('returns true when 1 char over limit', () => {
|
|
39
|
+
const content = 'x'.repeat(40001); // 10000.25 -> 10001 tokens
|
|
40
|
+
expect(wouldBeTruncated(content, 10000, 4)).toBe(true);
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
describe('Truncation - Truncate Response If Needed', () => {
|
|
44
|
+
let config;
|
|
45
|
+
beforeEach(() => {
|
|
46
|
+
config = {
|
|
47
|
+
maxTokens: 10000,
|
|
48
|
+
charsPerToken: 4,
|
|
49
|
+
enableLogging: false,
|
|
50
|
+
messagePrefix: 'RESPONSE TRUNCATED'
|
|
51
|
+
};
|
|
52
|
+
});
|
|
53
|
+
test('Does not truncate when under limit', () => {
|
|
54
|
+
const content = 'x'.repeat(30000); // 7500 tokens
|
|
55
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
56
|
+
expect(result).toBe(content);
|
|
57
|
+
expect(result).not.toContain('=== RESPONSE TRUNCATED ===');
|
|
58
|
+
expect(result.length).toBe(30000);
|
|
59
|
+
});
|
|
60
|
+
test('Truncates when over limit', () => {
|
|
61
|
+
const content = 'x'.repeat(100000); // 25000 tokens
|
|
62
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
63
|
+
expect(result).toContain('=== RESPONSE TRUNCATED ===');
|
|
64
|
+
expect(result).toContain('Estimated tokens: 25000 (limit: 10000)');
|
|
65
|
+
expect(result).toContain('Please refine your query');
|
|
66
|
+
expect(result).toContain('=== END TRUNCATION NOTICE ===');
|
|
67
|
+
});
|
|
68
|
+
test('Truncated content is under maxChars', () => {
|
|
69
|
+
const content = 'x'.repeat(100000); // 25000 tokens
|
|
70
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
71
|
+
// Result should be truncation notice + 40000 chars of content
|
|
72
|
+
const maxChars = config.maxTokens * (config.charsPerToken || 4);
|
|
73
|
+
expect(result.length).toBeLessThan(maxChars + 500); // +500 for notice
|
|
74
|
+
});
|
|
75
|
+
test('Exactly at limit: no truncation', () => {
|
|
76
|
+
const content = 'x'.repeat(40000); // exactly 10000 tokens
|
|
77
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
78
|
+
expect(result).toBe(content);
|
|
79
|
+
expect(result).not.toContain('=== RESPONSE TRUNCATED ===');
|
|
80
|
+
});
|
|
81
|
+
test('One char over limit: truncates', () => {
|
|
82
|
+
const content = 'x'.repeat(40001); // 10000.25 tokens
|
|
83
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
84
|
+
expect(result).toContain('=== RESPONSE TRUNCATED ===');
|
|
85
|
+
});
|
|
86
|
+
test('Custom message prefix', () => {
|
|
87
|
+
config.messagePrefix = 'CUSTOM TRUNCATION MESSAGE';
|
|
88
|
+
const content = 'x'.repeat(100000);
|
|
89
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
90
|
+
expect(result).toContain('=== CUSTOM TRUNCATION MESSAGE ===');
|
|
91
|
+
});
|
|
92
|
+
test('Custom charsPerToken ratio', () => {
|
|
93
|
+
config.charsPerToken = 3; // Tighter ratio
|
|
94
|
+
const content = 'x'.repeat(35000); // 11666 tokens with ratio of 3
|
|
95
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
96
|
+
// Should truncate (11666 > 10000)
|
|
97
|
+
expect(result).toContain('=== RESPONSE TRUNCATED ===');
|
|
98
|
+
expect(result).toContain('limit: 10000');
|
|
99
|
+
// Truncated to 30000 chars (10000 * 3)
|
|
100
|
+
const truncatedContent = result.split('=== END TRUNCATION NOTICE ===')[1];
|
|
101
|
+
expect(truncatedContent.trim().length).toBeLessThanOrEqual(30000);
|
|
102
|
+
});
|
|
103
|
+
test('Truncation preserves original content up to limit', () => {
|
|
104
|
+
const content = 'ABCD'.repeat(25000); // 100000 chars, 25000 tokens
|
|
105
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
106
|
+
// After truncation notice, should have first 40000 chars
|
|
107
|
+
const truncatedContent = result.split('=== END TRUNCATION NOTICE ===\n\n')[1];
|
|
108
|
+
expect(truncatedContent).toBe('ABCD'.repeat(10000)); // 40000 chars
|
|
109
|
+
});
|
|
110
|
+
test('Small content is not affected', () => {
|
|
111
|
+
const content = 'Hello, world!';
|
|
112
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
113
|
+
expect(result).toBe(content);
|
|
114
|
+
});
|
|
115
|
+
test('Empty content is not affected', () => {
|
|
116
|
+
const content = '';
|
|
117
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
118
|
+
expect(result).toBe('');
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
describe('Truncation - Edge Cases', () => {
|
|
122
|
+
let config;
|
|
123
|
+
beforeEach(() => {
|
|
124
|
+
config = {
|
|
125
|
+
maxTokens: 10000,
|
|
126
|
+
charsPerToken: 4,
|
|
127
|
+
enableLogging: false,
|
|
128
|
+
messagePrefix: 'RESPONSE TRUNCATED'
|
|
129
|
+
};
|
|
130
|
+
});
|
|
131
|
+
test('Handles very large content', () => {
|
|
132
|
+
const content = 'x'.repeat(1000000); // 1 million chars
|
|
133
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
134
|
+
expect(result).toContain('=== RESPONSE TRUNCATED ===');
|
|
135
|
+
expect(result).toContain('Estimated tokens: 250000');
|
|
136
|
+
expect(result.length).toBeLessThan(50000); // Truncated + notice
|
|
137
|
+
});
|
|
138
|
+
test('Handles special characters', () => {
|
|
139
|
+
const content = '🚀'.repeat(25000); // Emoji might be 2 chars each in JS
|
|
140
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
141
|
+
// Should still apply truncation logic based on string length
|
|
142
|
+
expect(result).toContain('=== RESPONSE TRUNCATED ===');
|
|
143
|
+
});
|
|
144
|
+
test('Handles newlines and whitespace', () => {
|
|
145
|
+
const content = 'line\n'.repeat(20000); // 100000 chars with newlines
|
|
146
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
147
|
+
expect(result).toContain('=== RESPONSE TRUNCATED ===');
|
|
148
|
+
});
|
|
149
|
+
test('Truncation notice format is consistent', () => {
|
|
150
|
+
const content = 'x'.repeat(100000);
|
|
151
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
152
|
+
// Verify notice structure
|
|
153
|
+
expect(result).toMatch(/=== RESPONSE TRUNCATED ===/);
|
|
154
|
+
expect(result).toMatch(/Estimated tokens: \d+ \(limit: \d+\)/);
|
|
155
|
+
expect(result).toMatch(/Response truncated to prevent context overflow\./);
|
|
156
|
+
expect(result).toMatch(/=== END TRUNCATION NOTICE ===/);
|
|
157
|
+
// Notice should be at the beginning
|
|
158
|
+
expect(result.indexOf('=== RESPONSE TRUNCATED ===')).toBe(0);
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
describe('Truncation - Different Token Limits', () => {
|
|
162
|
+
test('5000 token limit', () => {
|
|
163
|
+
const config = {
|
|
164
|
+
maxTokens: 5000,
|
|
165
|
+
charsPerToken: 4,
|
|
166
|
+
enableLogging: false
|
|
167
|
+
};
|
|
168
|
+
const content = 'x'.repeat(30000); // 7500 tokens
|
|
169
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
170
|
+
expect(result).toContain('=== RESPONSE TRUNCATED ===');
|
|
171
|
+
expect(result).toContain('limit: 5000');
|
|
172
|
+
// Should truncate to 20000 chars (5000 * 4)
|
|
173
|
+
const truncatedContent = result.split('=== END TRUNCATION NOTICE ===\n\n')[1];
|
|
174
|
+
expect(truncatedContent.length).toBe(20000);
|
|
175
|
+
});
|
|
176
|
+
test('20000 token limit', () => {
|
|
177
|
+
const config = {
|
|
178
|
+
maxTokens: 20000,
|
|
179
|
+
charsPerToken: 4,
|
|
180
|
+
enableLogging: false
|
|
181
|
+
};
|
|
182
|
+
const content = 'x'.repeat(100000); // 25000 tokens
|
|
183
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
184
|
+
expect(result).toContain('=== RESPONSE TRUNCATED ===');
|
|
185
|
+
expect(result).toContain('limit: 20000');
|
|
186
|
+
// Should truncate to 80000 chars (20000 * 4)
|
|
187
|
+
const truncatedContent = result.split('=== END TRUNCATION NOTICE ===\n\n')[1];
|
|
188
|
+
expect(truncatedContent.length).toBe(80000);
|
|
189
|
+
});
|
|
190
|
+
test('Very low token limit (100)', () => {
|
|
191
|
+
const config = {
|
|
192
|
+
maxTokens: 100,
|
|
193
|
+
charsPerToken: 4,
|
|
194
|
+
enableLogging: false
|
|
195
|
+
};
|
|
196
|
+
const content = 'x'.repeat(1000); // 250 tokens
|
|
197
|
+
const result = truncateResponseIfNeeded(config, content);
|
|
198
|
+
expect(result).toContain('=== RESPONSE TRUNCATED ===');
|
|
199
|
+
expect(result).toContain('limit: 100');
|
|
200
|
+
// Should truncate to 400 chars (100 * 4)
|
|
201
|
+
const truncatedContent = result.split('=== END TRUNCATION NOTICE ===\n\n')[1];
|
|
202
|
+
expect(truncatedContent.length).toBe(400);
|
|
203
|
+
});
|
|
204
|
+
});
|
|
@@ -15,4 +15,4 @@ export declare function createFileWriter(config: FileWriterConfig): {
|
|
|
15
15
|
handleResponse: (toolName: string, args: Record<string, unknown>, responseData: unknown) => Promise<FileWriterResult | unknown>;
|
|
16
16
|
};
|
|
17
17
|
export type { FileWriterConfig, FileWriterResult } from './types.js';
|
|
18
|
-
export {
|
|
18
|
+
export { generateQueryAssistSchema } from './schema.js';
|
package/dist/fileWriter/index.js
CHANGED
|
@@ -1,15 +1,42 @@
|
|
|
1
|
-
import { JsonSchema, NullableFields } from '../types/index.js';
|
|
2
1
|
/**
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
2
|
+
* Query-Assist Schema Generator
|
|
3
|
+
*
|
|
4
|
+
* Generates compact, LLM-friendly schemas optimized for crafting JQ queries.
|
|
5
|
+
* Uses JQ-style path notation (.items[].price) instead of JSON Schema.
|
|
6
|
+
* Includes exploration prompts when limits are reached.
|
|
7
7
|
*/
|
|
8
|
-
export
|
|
8
|
+
export interface PathInfo {
|
|
9
|
+
path: string;
|
|
10
|
+
type: string;
|
|
11
|
+
depth: number;
|
|
12
|
+
nullable?: boolean;
|
|
13
|
+
arrayLength?: number;
|
|
14
|
+
keyCount?: number;
|
|
15
|
+
mixed?: boolean;
|
|
16
|
+
}
|
|
17
|
+
export interface LimitMetadata {
|
|
18
|
+
depthLimitHit: boolean;
|
|
19
|
+
keyLimitHit: boolean;
|
|
20
|
+
pathLimitHit: boolean;
|
|
21
|
+
mixedSchemasDetected: boolean;
|
|
22
|
+
maxDepth: number;
|
|
23
|
+
maxKeys: number;
|
|
24
|
+
maxPaths: number;
|
|
25
|
+
totalPathsFound: number;
|
|
26
|
+
deepestPathTruncated?: string;
|
|
27
|
+
truncatedKeyCount?: number;
|
|
28
|
+
}
|
|
29
|
+
export interface QueryAssistOptions {
|
|
30
|
+
maxDepth?: number;
|
|
31
|
+
maxPaths?: number;
|
|
32
|
+
maxKeys?: number;
|
|
33
|
+
dataSize?: number;
|
|
34
|
+
}
|
|
9
35
|
/**
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
* @param
|
|
13
|
-
* @
|
|
36
|
+
* Generate query-assist schema for JSON data
|
|
37
|
+
* Main entry point for schema generation
|
|
38
|
+
* @param data - JSON data to analyze
|
|
39
|
+
* @param options - Configuration options
|
|
40
|
+
* @returns Compact text schema optimized for JQ queries
|
|
14
41
|
*/
|
|
15
|
-
export declare function
|
|
42
|
+
export declare function generateQueryAssistSchema(data: unknown, options?: QueryAssistOptions): string;
|
|
@@ -1,120 +1,270 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
2
|
+
* Query-Assist Schema Generator
|
|
3
|
+
*
|
|
4
|
+
* Generates compact, LLM-friendly schemas optimized for crafting JQ queries.
|
|
5
|
+
* Uses JQ-style path notation (.items[].price) instead of JSON Schema.
|
|
6
|
+
* Includes exploration prompts when limits are reached.
|
|
6
7
|
*/
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
8
|
+
/**
|
|
9
|
+
* Collect all paths from JSON data with limits applied
|
|
10
|
+
* @param data - The JSON data to analyze
|
|
11
|
+
* @param maxDepth - Maximum depth to traverse (default: 2)
|
|
12
|
+
* @param maxKeys - Maximum keys to analyze per object (default: 50)
|
|
13
|
+
* @returns Array of path information and limit metadata
|
|
14
|
+
*/
|
|
15
|
+
function collectPaths(data, maxDepth = 2, maxKeys = 50) {
|
|
16
|
+
const paths = [];
|
|
17
|
+
const limits = {
|
|
18
|
+
depthLimitHit: false,
|
|
19
|
+
keyLimitHit: false,
|
|
20
|
+
pathLimitHit: false,
|
|
21
|
+
mixedSchemasDetected: false,
|
|
22
|
+
maxDepth,
|
|
23
|
+
maxKeys,
|
|
24
|
+
maxPaths: 0, // Will be set later
|
|
25
|
+
totalPathsFound: 0
|
|
26
|
+
};
|
|
27
|
+
function traverse(val, path, depth) {
|
|
28
|
+
// Hard stop at max depth
|
|
29
|
+
if (depth > maxDepth) {
|
|
30
|
+
limits.depthLimitHit = true;
|
|
31
|
+
if (!limits.deepestPathTruncated) {
|
|
32
|
+
limits.deepestPathTruncated = path;
|
|
23
33
|
}
|
|
34
|
+
return;
|
|
24
35
|
}
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
if (hasNumericKeys) {
|
|
28
|
-
schema._keysAreNumeric = true;
|
|
29
|
-
schema._accessPattern = 'Use .["0"] not .[0]';
|
|
36
|
+
if (val === null) {
|
|
37
|
+
paths.push({ path, type: 'null', depth, nullable: true });
|
|
30
38
|
}
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
39
|
+
else if (Array.isArray(val)) {
|
|
40
|
+
paths.push({ path, type: 'array', depth, arrayLength: val.length });
|
|
41
|
+
if (val.length === 0) {
|
|
42
|
+
return; // Empty array, nothing to explore
|
|
43
|
+
}
|
|
44
|
+
// Sample first 5 items to detect schema variance
|
|
45
|
+
const sample = val.slice(0, Math.min(5, val.length));
|
|
46
|
+
const types = new Set(sample.map(item => item === null
|
|
47
|
+
? 'null'
|
|
48
|
+
: Array.isArray(item)
|
|
49
|
+
? 'array'
|
|
50
|
+
: typeof item));
|
|
51
|
+
// Detect mixed schemas (heterogeneous arrays)
|
|
52
|
+
// Mixed if we have more than 1 distinct type
|
|
53
|
+
const mixed = types.size > 1;
|
|
54
|
+
if (mixed) {
|
|
55
|
+
limits.mixedSchemasDetected = true;
|
|
56
|
+
}
|
|
57
|
+
// Traverse first non-null item
|
|
58
|
+
const first = sample.find(v => v !== null);
|
|
59
|
+
if (first !== undefined) {
|
|
60
|
+
const arrayPath = `${path}[]`;
|
|
61
|
+
if (mixed) {
|
|
62
|
+
// For mixed arrays, only show the mixed marker (don't traverse to avoid duplicate paths)
|
|
63
|
+
paths.push({ path: arrayPath, type: 'mixed', depth: depth + 1, mixed: true });
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
// For uniform arrays, traverse to show the structure
|
|
67
|
+
traverse(first, arrayPath, depth + 1);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
// For objects in arrays, also check if they have different keys
|
|
71
|
+
if (types.has('object')) {
|
|
72
|
+
const objects = sample.filter(v => v && typeof v === 'object' && !Array.isArray(v));
|
|
73
|
+
if (objects.length >= 2) {
|
|
74
|
+
const keySets = objects.map(o => new Set(Object.keys(o)));
|
|
75
|
+
// Check if any two objects have different keys
|
|
76
|
+
for (let i = 0; i < keySets.length - 1; i++) {
|
|
77
|
+
const keys1 = Array.from(keySets[i]);
|
|
78
|
+
const keys2 = Array.from(keySets[i + 1]);
|
|
79
|
+
if (keys1.length !== keys2.length || !keys1.every(k => keySets[i + 1].has(k))) {
|
|
80
|
+
limits.mixedSchemasDetected = true;
|
|
81
|
+
break;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
37
86
|
}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
87
|
+
else if (typeof val === 'object') {
|
|
88
|
+
const keys = Object.keys(val).sort();
|
|
89
|
+
// Check for numeric keys (common pattern - treat as collection like arrays)
|
|
90
|
+
const numericKeys = keys.filter(k => /^\d+$/.test(k));
|
|
91
|
+
const hasNumericKeys = keys.length > 0 && numericKeys.length >= keys.length * 0.8;
|
|
92
|
+
// Always show key count (including 0)
|
|
93
|
+
paths.push({
|
|
94
|
+
path,
|
|
95
|
+
type: hasNumericKeys ? 'object (numeric keys)' : 'object',
|
|
96
|
+
depth,
|
|
97
|
+
keyCount: keys.length
|
|
98
|
+
});
|
|
99
|
+
if (hasNumericKeys) {
|
|
100
|
+
// Treat as collection - show ONE representative item structure
|
|
101
|
+
// This avoids enumerating .0, .1, .2, ... which is repetitive and wastes space
|
|
102
|
+
const representativePath = path ? `${path}.[<idx>]` : `.[<idx>]`;
|
|
103
|
+
// Pick first key to show structure
|
|
104
|
+
if (keys.length > 0) {
|
|
105
|
+
const firstKey = keys[0];
|
|
106
|
+
traverse(val[firstKey], representativePath, depth + 1);
|
|
107
|
+
}
|
|
47
108
|
}
|
|
48
109
|
else {
|
|
49
|
-
|
|
110
|
+
// Normal object - traverse keys individually
|
|
111
|
+
const keysToAnalyze = keys.slice(0, maxKeys);
|
|
112
|
+
if (keys.length > maxKeys) {
|
|
113
|
+
limits.keyLimitHit = true;
|
|
114
|
+
limits.truncatedKeyCount = keys.length - maxKeys;
|
|
115
|
+
}
|
|
116
|
+
// Traverse child keys
|
|
117
|
+
for (const key of keysToAnalyze) {
|
|
118
|
+
const childPath = path ? `${path}.${key}` : `.${key}`;
|
|
119
|
+
traverse(val[key], childPath, depth + 1);
|
|
120
|
+
}
|
|
50
121
|
}
|
|
51
122
|
}
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
? analyzeJsonSchema(arr[0], `${path}[0]`)
|
|
56
|
-
: { types: Array.from(itemTypes) },
|
|
57
|
-
length: arr.length,
|
|
58
|
-
};
|
|
59
|
-
// Add hints for null handling
|
|
60
|
-
if (hasNulls) {
|
|
61
|
-
schema._hasNulls = true;
|
|
123
|
+
else {
|
|
124
|
+
// Primitive types
|
|
125
|
+
paths.push({ path, type: typeof val, depth });
|
|
62
126
|
}
|
|
63
|
-
return schema;
|
|
64
|
-
}
|
|
65
|
-
else {
|
|
66
|
-
return { type };
|
|
67
127
|
}
|
|
128
|
+
traverse(data, '', 0);
|
|
129
|
+
limits.totalPathsFound = paths.length;
|
|
130
|
+
return { paths, limits };
|
|
68
131
|
}
|
|
69
132
|
/**
|
|
70
|
-
*
|
|
71
|
-
* @param
|
|
72
|
-
* @param
|
|
73
|
-
* @returns
|
|
133
|
+
* Select top N most relevant paths using scoring
|
|
134
|
+
* @param paths - All collected paths
|
|
135
|
+
* @param maxPaths - Maximum paths to return
|
|
136
|
+
* @returns Prioritized subset of paths
|
|
74
137
|
*/
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
const
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
if (
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
138
|
+
function selectTopPaths(paths, maxPaths) {
|
|
139
|
+
// Score each path based on relevance
|
|
140
|
+
const scored = paths.map(p => ({
|
|
141
|
+
...p,
|
|
142
|
+
score: (p.nullable ? 0 : 10) + // Non-null = higher priority
|
|
143
|
+
(3 - p.depth) * 5 + // Shallower = higher priority
|
|
144
|
+
(p.type === 'array' ? 5 : 0) + // Arrays = interesting
|
|
145
|
+
(p.type === 'object' || p.type === 'object (numeric keys)' ? 3 : 0) + // Objects = interesting
|
|
146
|
+
(p.mixed ? 2 : 0) // Mixed types = interesting
|
|
147
|
+
}));
|
|
148
|
+
// Sort by score (desc), then by path length (asc) for stability
|
|
149
|
+
return scored
|
|
150
|
+
.sort((a, b) => {
|
|
151
|
+
if (b.score !== a.score)
|
|
152
|
+
return b.score - a.score;
|
|
153
|
+
return a.path.length - b.path.length;
|
|
154
|
+
})
|
|
155
|
+
.slice(0, maxPaths);
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Format paths as query-assist text with exploration prompts
|
|
159
|
+
* @param paths - Selected paths to display
|
|
160
|
+
* @param limits - Limit metadata for generating prompts
|
|
161
|
+
* @param dataSize - Size of original data in characters
|
|
162
|
+
* @returns Formatted text schema
|
|
163
|
+
*/
|
|
164
|
+
function formatQueryAssist(paths, limits, dataSize) {
|
|
165
|
+
let output = '📊 STRUCTURE GUIDE (for JQ queries)\n\n';
|
|
166
|
+
output += `Size: ${dataSize.toLocaleString()} characters\n\n`;
|
|
167
|
+
// Group paths by depth
|
|
168
|
+
const byDepth = {};
|
|
169
|
+
for (const p of paths) {
|
|
170
|
+
if (!byDepth[p.depth])
|
|
171
|
+
byDepth[p.depth] = [];
|
|
172
|
+
byDepth[p.depth].push(p);
|
|
173
|
+
}
|
|
174
|
+
// Format paths by depth levels
|
|
175
|
+
const depths = Object.keys(byDepth)
|
|
176
|
+
.map(Number)
|
|
177
|
+
.sort((a, b) => a - b);
|
|
178
|
+
for (const depth of depths) {
|
|
179
|
+
const depthPaths = byDepth[depth];
|
|
180
|
+
const label = depth === 0 ? 'ROOT' : depth === 1 ? 'TOP-LEVEL' : `NESTED (depth ${depth})`;
|
|
181
|
+
output += `${label}:\n`;
|
|
182
|
+
for (const p of depthPaths) {
|
|
183
|
+
const pathStr = p.path || '(root)';
|
|
184
|
+
output += ` ${pathStr.padEnd(35)}`;
|
|
185
|
+
output += ` → ${p.type}`;
|
|
186
|
+
if (p.arrayLength !== undefined) {
|
|
187
|
+
output += `[${p.arrayLength}]`;
|
|
94
188
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
if (schemaObj.type === 'object' && schemaObj.properties) {
|
|
98
|
-
const props = schemaObj.properties;
|
|
99
|
-
for (const [key, value] of Object.entries(props)) {
|
|
100
|
-
const newPath = path ? `${path}.${key}` : key;
|
|
101
|
-
traverse(value, newPath);
|
|
189
|
+
if (p.keyCount !== undefined) {
|
|
190
|
+
output += ` (${p.keyCount} keys)`;
|
|
102
191
|
}
|
|
192
|
+
if (p.nullable) {
|
|
193
|
+
output += ' (nullable)';
|
|
194
|
+
}
|
|
195
|
+
if (p.mixed) {
|
|
196
|
+
output += ' ⚠️ MIXED SCHEMAS';
|
|
197
|
+
}
|
|
198
|
+
if (depth === limits.maxDepth && (p.type === 'object' || p.type === 'object (numeric keys)' || p.type === 'array')) {
|
|
199
|
+
output += ' ⚠️ DEPTH LIMIT';
|
|
200
|
+
}
|
|
201
|
+
output += '\n';
|
|
103
202
|
}
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
203
|
+
output += '\n';
|
|
204
|
+
}
|
|
205
|
+
// Check if we have numeric-keyed objects in the output
|
|
206
|
+
const hasNumericKeys = paths.some(p => p.path.includes('.[<idx>]'));
|
|
207
|
+
// Build exploration guide if any limits were hit or special patterns detected
|
|
208
|
+
const hasLimits = limits.depthLimitHit || limits.keyLimitHit || limits.pathLimitHit || limits.mixedSchemasDetected;
|
|
209
|
+
if (hasNumericKeys || hasLimits) {
|
|
210
|
+
output += '💡 EXPLORATION GUIDE\n\n';
|
|
211
|
+
// Numeric keys note (data-specific, keep separate)
|
|
212
|
+
if (hasNumericKeys) {
|
|
213
|
+
output += 'NUMERIC KEYS: .[<idx>] represents structure shared by all numeric keys\n';
|
|
214
|
+
output += ' Access: .["0"], .["1"] or .[0], .[1] (array-style) | List: keys\n\n';
|
|
215
|
+
}
|
|
216
|
+
// Show which limits were hit
|
|
217
|
+
if (hasLimits) {
|
|
218
|
+
const limitWarnings = [];
|
|
219
|
+
if (limits.depthLimitHit) {
|
|
220
|
+
limitWarnings.push(`DEPTH (max: ${limits.maxDepth})`);
|
|
221
|
+
}
|
|
222
|
+
if (limits.keyLimitHit && limits.truncatedKeyCount) {
|
|
223
|
+
limitWarnings.push(`KEYS (${limits.maxKeys} shown, ${limits.truncatedKeyCount} more)`);
|
|
224
|
+
}
|
|
225
|
+
if (limits.pathLimitHit) {
|
|
226
|
+
limitWarnings.push(`PATHS (${limits.maxPaths} of ${limits.totalPathsFound})`);
|
|
227
|
+
}
|
|
228
|
+
if (limits.mixedSchemasDetected) {
|
|
229
|
+
limitWarnings.push('MIXED SCHEMAS');
|
|
115
230
|
}
|
|
231
|
+
output += `⚠️ Limits: ${limitWarnings.join(' | ')}\n\n`;
|
|
232
|
+
// Generic JQ exploration patterns
|
|
233
|
+
output += 'Common JQ patterns:\n';
|
|
234
|
+
output += ' • View keys: <path> | keys\n';
|
|
235
|
+
output += ' • Check type: <path> | type\n';
|
|
236
|
+
output += ' • Count items: <path> | length\n';
|
|
237
|
+
output += ' • Search keys: keys | map(select(contains("term")))\n';
|
|
238
|
+
output += ' • List all paths: paths(scalars) | map(join("."))\n';
|
|
239
|
+
output += ' • Filter arrays: .[] | select(type == "object")\n';
|
|
240
|
+
if (limits.mixedSchemasDetected) {
|
|
241
|
+
output += ' • Check variance: .[] | type or [:3] | map(keys)\n';
|
|
242
|
+
}
|
|
243
|
+
output += '\n';
|
|
116
244
|
}
|
|
117
245
|
}
|
|
118
|
-
|
|
119
|
-
|
|
246
|
+
return output;
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Generate query-assist schema for JSON data
|
|
250
|
+
* Main entry point for schema generation
|
|
251
|
+
* @param data - JSON data to analyze
|
|
252
|
+
* @param options - Configuration options
|
|
253
|
+
* @returns Compact text schema optimized for JQ queries
|
|
254
|
+
*/
|
|
255
|
+
export function generateQueryAssistSchema(data, options = {}) {
|
|
256
|
+
const maxDepth = options.maxDepth ?? 2;
|
|
257
|
+
const maxPaths = options.maxPaths ?? 20;
|
|
258
|
+
const maxKeys = options.maxKeys ?? 50;
|
|
259
|
+
// Collect paths with limits
|
|
260
|
+
const { paths, limits } = collectPaths(data, maxDepth, maxKeys);
|
|
261
|
+
// Select top paths
|
|
262
|
+
const selectedPaths = selectTopPaths(paths, maxPaths);
|
|
263
|
+
// Update limit metadata
|
|
264
|
+
limits.maxPaths = maxPaths;
|
|
265
|
+
limits.pathLimitHit = paths.length > maxPaths;
|
|
266
|
+
// Calculate data size (use provided size if available to avoid re-stringifying)
|
|
267
|
+
const dataSize = options.dataSize ?? JSON.stringify(data).length;
|
|
268
|
+
// Format as text
|
|
269
|
+
return formatQueryAssist(selectedPaths, limits, dataSize);
|
|
120
270
|
}
|