@anyshift/mcp-proxy 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,204 @@
1
+ /**
2
+ * Unit tests for truncation module
3
+ * Tests token estimation and truncation logic
4
+ */
5
+ import { describe, test, expect } from '@jest/globals';
6
+ import { estimateTokens, wouldBeTruncated, truncateResponseIfNeeded } from '../../truncation/truncate.js';
7
+ describe('Truncation - Token Estimation', () => {
8
+ test('estimateTokens with default ratio (4 chars/token)', () => {
9
+ expect(estimateTokens('x'.repeat(100))).toBe(25); // 100/4 = 25
10
+ expect(estimateTokens('x'.repeat(1000))).toBe(250); // 1000/4 = 250
11
+ expect(estimateTokens('x'.repeat(40000))).toBe(10000); // 40000/4 = 10000
12
+ });
13
+ test('estimateTokens with custom ratio', () => {
14
+ expect(estimateTokens('x'.repeat(100), 3)).toBe(34); // 100/3 = 33.33 -> 34 (ceil)
15
+ expect(estimateTokens('x'.repeat(100), 5)).toBe(20); // 100/5 = 20
16
+ });
17
+ test('estimateTokens handles empty string', () => {
18
+ expect(estimateTokens('')).toBe(0);
19
+ });
20
+ test('estimateTokens rounds up (ceiling)', () => {
21
+ expect(estimateTokens('xxx', 4)).toBe(1); // 3/4 = 0.75 -> 1
22
+ expect(estimateTokens('xxxxx', 4)).toBe(2); // 5/4 = 1.25 -> 2
23
+ });
24
+ });
25
+ describe('Truncation - Would Be Truncated Check', () => {
26
+ test('returns false when content is under limit', () => {
27
+ const content = 'x'.repeat(30000); // 7500 tokens
28
+ expect(wouldBeTruncated(content, 10000, 4)).toBe(false);
29
+ });
30
+ test('returns true when content exceeds limit', () => {
31
+ const content = 'x'.repeat(50000); // 12500 tokens
32
+ expect(wouldBeTruncated(content, 10000, 4)).toBe(true);
33
+ });
34
+ test('returns false when exactly at limit', () => {
35
+ const content = 'x'.repeat(40000); // exactly 10000 tokens
36
+ expect(wouldBeTruncated(content, 10000, 4)).toBe(false);
37
+ });
38
+ test('returns true when 1 char over limit', () => {
39
+ const content = 'x'.repeat(40001); // 10000.25 -> 10001 tokens
40
+ expect(wouldBeTruncated(content, 10000, 4)).toBe(true);
41
+ });
42
+ });
43
+ describe('Truncation - Truncate Response If Needed', () => {
44
+ let config;
45
+ beforeEach(() => {
46
+ config = {
47
+ maxTokens: 10000,
48
+ charsPerToken: 4,
49
+ enableLogging: false,
50
+ messagePrefix: 'RESPONSE TRUNCATED'
51
+ };
52
+ });
53
+ test('Does not truncate when under limit', () => {
54
+ const content = 'x'.repeat(30000); // 7500 tokens
55
+ const result = truncateResponseIfNeeded(config, content);
56
+ expect(result).toBe(content);
57
+ expect(result).not.toContain('=== RESPONSE TRUNCATED ===');
58
+ expect(result.length).toBe(30000);
59
+ });
60
+ test('Truncates when over limit', () => {
61
+ const content = 'x'.repeat(100000); // 25000 tokens
62
+ const result = truncateResponseIfNeeded(config, content);
63
+ expect(result).toContain('=== RESPONSE TRUNCATED ===');
64
+ expect(result).toContain('Estimated tokens: 25000 (limit: 10000)');
65
+ expect(result).toContain('Please refine your query');
66
+ expect(result).toContain('=== END TRUNCATION NOTICE ===');
67
+ });
68
+ test('Truncated content is under maxChars', () => {
69
+ const content = 'x'.repeat(100000); // 25000 tokens
70
+ const result = truncateResponseIfNeeded(config, content);
71
+ // Result should be truncation notice + 40000 chars of content
72
+ const maxChars = config.maxTokens * (config.charsPerToken || 4);
73
+ expect(result.length).toBeLessThan(maxChars + 500); // +500 for notice
74
+ });
75
+ test('Exactly at limit: no truncation', () => {
76
+ const content = 'x'.repeat(40000); // exactly 10000 tokens
77
+ const result = truncateResponseIfNeeded(config, content);
78
+ expect(result).toBe(content);
79
+ expect(result).not.toContain('=== RESPONSE TRUNCATED ===');
80
+ });
81
+ test('One char over limit: truncates', () => {
82
+ const content = 'x'.repeat(40001); // 10000.25 tokens
83
+ const result = truncateResponseIfNeeded(config, content);
84
+ expect(result).toContain('=== RESPONSE TRUNCATED ===');
85
+ });
86
+ test('Custom message prefix', () => {
87
+ config.messagePrefix = 'CUSTOM TRUNCATION MESSAGE';
88
+ const content = 'x'.repeat(100000);
89
+ const result = truncateResponseIfNeeded(config, content);
90
+ expect(result).toContain('=== CUSTOM TRUNCATION MESSAGE ===');
91
+ });
92
+ test('Custom charsPerToken ratio', () => {
93
+ config.charsPerToken = 3; // Tighter ratio
94
+ const content = 'x'.repeat(35000); // 11666 tokens with ratio of 3
95
+ const result = truncateResponseIfNeeded(config, content);
96
+ // Should truncate (11666 > 10000)
97
+ expect(result).toContain('=== RESPONSE TRUNCATED ===');
98
+ expect(result).toContain('limit: 10000');
99
+ // Truncated to 30000 chars (10000 * 3)
100
+ const truncatedContent = result.split('=== END TRUNCATION NOTICE ===')[1];
101
+ expect(truncatedContent.trim().length).toBeLessThanOrEqual(30000);
102
+ });
103
+ test('Truncation preserves original content up to limit', () => {
104
+ const content = 'ABCD'.repeat(25000); // 100000 chars, 25000 tokens
105
+ const result = truncateResponseIfNeeded(config, content);
106
+ // After truncation notice, should have first 40000 chars
107
+ const truncatedContent = result.split('=== END TRUNCATION NOTICE ===\n\n')[1];
108
+ expect(truncatedContent).toBe('ABCD'.repeat(10000)); // 40000 chars
109
+ });
110
+ test('Small content is not affected', () => {
111
+ const content = 'Hello, world!';
112
+ const result = truncateResponseIfNeeded(config, content);
113
+ expect(result).toBe(content);
114
+ });
115
+ test('Empty content is not affected', () => {
116
+ const content = '';
117
+ const result = truncateResponseIfNeeded(config, content);
118
+ expect(result).toBe('');
119
+ });
120
+ });
121
+ describe('Truncation - Edge Cases', () => {
122
+ let config;
123
+ beforeEach(() => {
124
+ config = {
125
+ maxTokens: 10000,
126
+ charsPerToken: 4,
127
+ enableLogging: false,
128
+ messagePrefix: 'RESPONSE TRUNCATED'
129
+ };
130
+ });
131
+ test('Handles very large content', () => {
132
+ const content = 'x'.repeat(1000000); // 1 million chars
133
+ const result = truncateResponseIfNeeded(config, content);
134
+ expect(result).toContain('=== RESPONSE TRUNCATED ===');
135
+ expect(result).toContain('Estimated tokens: 250000');
136
+ expect(result.length).toBeLessThan(50000); // Truncated + notice
137
+ });
138
+ test('Handles special characters', () => {
139
+ const content = '🚀'.repeat(25000); // Emoji might be 2 chars each in JS
140
+ const result = truncateResponseIfNeeded(config, content);
141
+ // Should still apply truncation logic based on string length
142
+ expect(result).toContain('=== RESPONSE TRUNCATED ===');
143
+ });
144
+ test('Handles newlines and whitespace', () => {
145
+ const content = 'line\n'.repeat(20000); // 100000 chars with newlines
146
+ const result = truncateResponseIfNeeded(config, content);
147
+ expect(result).toContain('=== RESPONSE TRUNCATED ===');
148
+ });
149
+ test('Truncation notice format is consistent', () => {
150
+ const content = 'x'.repeat(100000);
151
+ const result = truncateResponseIfNeeded(config, content);
152
+ // Verify notice structure
153
+ expect(result).toMatch(/=== RESPONSE TRUNCATED ===/);
154
+ expect(result).toMatch(/Estimated tokens: \d+ \(limit: \d+\)/);
155
+ expect(result).toMatch(/Response truncated to prevent context overflow\./);
156
+ expect(result).toMatch(/=== END TRUNCATION NOTICE ===/);
157
+ // Notice should be at the beginning
158
+ expect(result.indexOf('=== RESPONSE TRUNCATED ===')).toBe(0);
159
+ });
160
+ });
161
+ describe('Truncation - Different Token Limits', () => {
162
+ test('5000 token limit', () => {
163
+ const config = {
164
+ maxTokens: 5000,
165
+ charsPerToken: 4,
166
+ enableLogging: false
167
+ };
168
+ const content = 'x'.repeat(30000); // 7500 tokens
169
+ const result = truncateResponseIfNeeded(config, content);
170
+ expect(result).toContain('=== RESPONSE TRUNCATED ===');
171
+ expect(result).toContain('limit: 5000');
172
+ // Should truncate to 20000 chars (5000 * 4)
173
+ const truncatedContent = result.split('=== END TRUNCATION NOTICE ===\n\n')[1];
174
+ expect(truncatedContent.length).toBe(20000);
175
+ });
176
+ test('20000 token limit', () => {
177
+ const config = {
178
+ maxTokens: 20000,
179
+ charsPerToken: 4,
180
+ enableLogging: false
181
+ };
182
+ const content = 'x'.repeat(100000); // 25000 tokens
183
+ const result = truncateResponseIfNeeded(config, content);
184
+ expect(result).toContain('=== RESPONSE TRUNCATED ===');
185
+ expect(result).toContain('limit: 20000');
186
+ // Should truncate to 80000 chars (20000 * 4)
187
+ const truncatedContent = result.split('=== END TRUNCATION NOTICE ===\n\n')[1];
188
+ expect(truncatedContent.length).toBe(80000);
189
+ });
190
+ test('Very low token limit (100)', () => {
191
+ const config = {
192
+ maxTokens: 100,
193
+ charsPerToken: 4,
194
+ enableLogging: false
195
+ };
196
+ const content = 'x'.repeat(1000); // 250 tokens
197
+ const result = truncateResponseIfNeeded(config, content);
198
+ expect(result).toContain('=== RESPONSE TRUNCATED ===');
199
+ expect(result).toContain('limit: 100');
200
+ // Should truncate to 400 chars (100 * 4)
201
+ const truncatedContent = result.split('=== END TRUNCATION NOTICE ===\n\n')[1];
202
+ expect(truncatedContent.length).toBe(400);
203
+ });
204
+ });
@@ -15,4 +15,4 @@ export declare function createFileWriter(config: FileWriterConfig): {
15
15
  handleResponse: (toolName: string, args: Record<string, unknown>, responseData: unknown) => Promise<FileWriterResult | unknown>;
16
16
  };
17
17
  export type { FileWriterConfig, FileWriterResult } from './types.js';
18
- export { analyzeJsonSchema, extractNullableFields } from './schema.js';
18
+ export { generateQueryAssistSchema } from './schema.js';
@@ -18,4 +18,4 @@ export function createFileWriter(config) {
18
18
  },
19
19
  };
20
20
  }
21
- export { analyzeJsonSchema, extractNullableFields } from './schema.js';
21
+ export { generateQueryAssistSchema } from './schema.js';
@@ -1,15 +1,42 @@
1
- import { JsonSchema, NullableFields } from '../types/index.js';
2
1
  /**
3
- * Analyze JSON structure and generate enhanced schema
4
- * @param obj - The object to analyze
5
- * @param path - Current path in the object (for debugging)
6
- * @returns Schema representation of the object
2
+ * Query-Assist Schema Generator
3
+ *
4
+ * Generates compact, LLM-friendly schemas optimized for crafting JQ queries.
5
+ * Uses JQ-style path notation (.items[].price) instead of JSON Schema.
6
+ * Includes exploration prompts when limits are reached.
7
7
  */
8
- export declare function analyzeJsonSchema(obj: unknown, path?: string): JsonSchema;
8
+ export interface PathInfo {
9
+ path: string;
10
+ type: string;
11
+ depth: number;
12
+ nullable?: boolean;
13
+ arrayLength?: number;
14
+ keyCount?: number;
15
+ mixed?: boolean;
16
+ }
17
+ export interface LimitMetadata {
18
+ depthLimitHit: boolean;
19
+ keyLimitHit: boolean;
20
+ pathLimitHit: boolean;
21
+ mixedSchemasDetected: boolean;
22
+ maxDepth: number;
23
+ maxKeys: number;
24
+ maxPaths: number;
25
+ totalPathsFound: number;
26
+ deepestPathTruncated?: string;
27
+ truncatedKeyCount?: number;
28
+ }
29
+ export interface QueryAssistOptions {
30
+ maxDepth?: number;
31
+ maxPaths?: number;
32
+ maxKeys?: number;
33
+ dataSize?: number;
34
+ }
9
35
  /**
10
- * Extract nullable and always-null fields from schema
11
- * @param schema - The schema to analyze
12
- * @param basePath - Base path for field names
13
- * @returns Object containing arrays of always-null and nullable field paths
36
+ * Generate query-assist schema for JSON data
37
+ * Main entry point for schema generation
38
+ * @param data - JSON data to analyze
39
+ * @param options - Configuration options
40
+ * @returns Compact text schema optimized for JQ queries
14
41
  */
15
- export declare function extractNullableFields(schema: unknown, basePath?: string): NullableFields;
42
+ export declare function generateQueryAssistSchema(data: unknown, options?: QueryAssistOptions): string;
@@ -1,120 +1,270 @@
1
1
  /**
2
- * Analyze JSON structure and generate enhanced schema
3
- * @param obj - The object to analyze
4
- * @param path - Current path in the object (for debugging)
5
- * @returns Schema representation of the object
2
+ * Query-Assist Schema Generator
3
+ *
4
+ * Generates compact, LLM-friendly schemas optimized for crafting JQ queries.
5
+ * Uses JQ-style path notation (.items[].price) instead of JSON Schema.
6
+ * Includes exploration prompts when limits are reached.
6
7
  */
7
- export function analyzeJsonSchema(obj, path = 'root') {
8
- if (obj === null)
9
- return { type: 'null' };
10
- if (obj === undefined)
11
- return { type: 'undefined' };
12
- const type = Array.isArray(obj) ? 'array' : typeof obj;
13
- if (type === 'object') {
14
- const properties = {};
15
- const objRecord = obj;
16
- const keys = Object.keys(objRecord);
17
- // Detect numeric string keys (common in Cypher results)
18
- const numericKeys = keys.filter((k) => /^\d+$/.test(k));
19
- const hasNumericKeys = keys.length > 0 && numericKeys.length >= keys.length * 0.8;
20
- for (const key in objRecord) {
21
- if (Object.prototype.hasOwnProperty.call(objRecord, key)) {
22
- properties[key] = analyzeJsonSchema(objRecord[key], `${path}.${key}`);
8
+ /**
9
+ * Collect all paths from JSON data with limits applied
10
+ * @param data - The JSON data to analyze
11
+ * @param maxDepth - Maximum depth to traverse (default: 2)
12
+ * @param maxKeys - Maximum keys to analyze per object (default: 50)
13
+ * @returns Array of path information and limit metadata
14
+ */
15
+ function collectPaths(data, maxDepth = 2, maxKeys = 50) {
16
+ const paths = [];
17
+ const limits = {
18
+ depthLimitHit: false,
19
+ keyLimitHit: false,
20
+ pathLimitHit: false,
21
+ mixedSchemasDetected: false,
22
+ maxDepth,
23
+ maxKeys,
24
+ maxPaths: 0, // Will be set later
25
+ totalPathsFound: 0
26
+ };
27
+ function traverse(val, path, depth) {
28
+ // Hard stop at max depth
29
+ if (depth > maxDepth) {
30
+ limits.depthLimitHit = true;
31
+ if (!limits.deepestPathTruncated) {
32
+ limits.deepestPathTruncated = path;
23
33
  }
34
+ return;
24
35
  }
25
- const schema = { type: 'object', properties };
26
- // Add metadata hints for numeric keys
27
- if (hasNumericKeys) {
28
- schema._keysAreNumeric = true;
29
- schema._accessPattern = 'Use .["0"] not .[0]';
36
+ if (val === null) {
37
+ paths.push({ path, type: 'null', depth, nullable: true });
30
38
  }
31
- return schema;
32
- }
33
- else if (type === 'array') {
34
- const arr = obj;
35
- if (arr.length === 0) {
36
- return { type: 'array', items: { type: 'unknown' }, length: 0 };
39
+ else if (Array.isArray(val)) {
40
+ paths.push({ path, type: 'array', depth, arrayLength: val.length });
41
+ if (val.length === 0) {
42
+ return; // Empty array, nothing to explore
43
+ }
44
+ // Sample first 5 items to detect schema variance
45
+ const sample = val.slice(0, Math.min(5, val.length));
46
+ const types = new Set(sample.map(item => item === null
47
+ ? 'null'
48
+ : Array.isArray(item)
49
+ ? 'array'
50
+ : typeof item));
51
+ // Detect mixed schemas (heterogeneous arrays)
52
+ // Mixed if we have more than 1 distinct type
53
+ const mixed = types.size > 1;
54
+ if (mixed) {
55
+ limits.mixedSchemasDetected = true;
56
+ }
57
+ // Traverse first non-null item
58
+ const first = sample.find(v => v !== null);
59
+ if (first !== undefined) {
60
+ const arrayPath = `${path}[]`;
61
+ if (mixed) {
62
+ // For mixed arrays, only show the mixed marker (don't traverse to avoid duplicate paths)
63
+ paths.push({ path: arrayPath, type: 'mixed', depth: depth + 1, mixed: true });
64
+ }
65
+ else {
66
+ // For uniform arrays, traverse to show the structure
67
+ traverse(first, arrayPath, depth + 1);
68
+ }
69
+ }
70
+ // For objects in arrays, also check if they have different keys
71
+ if (types.has('object')) {
72
+ const objects = sample.filter(v => v && typeof v === 'object' && !Array.isArray(v));
73
+ if (objects.length >= 2) {
74
+ const keySets = objects.map(o => new Set(Object.keys(o)));
75
+ // Check if any two objects have different keys
76
+ for (let i = 0; i < keySets.length - 1; i++) {
77
+ const keys1 = Array.from(keySets[i]);
78
+ const keys2 = Array.from(keySets[i + 1]);
79
+ if (keys1.length !== keys2.length || !keys1.every(k => keySets[i + 1].has(k))) {
80
+ limits.mixedSchemasDetected = true;
81
+ break;
82
+ }
83
+ }
84
+ }
85
+ }
37
86
  }
38
- // Analyze array items for mixed types and nulls
39
- const itemTypes = new Set();
40
- let hasNulls = false;
41
- // Sample first 10 items to detect type variance
42
- const sampled = arr.slice(0, Math.min(10, arr.length));
43
- for (const item of sampled) {
44
- if (item === null) {
45
- hasNulls = true;
46
- itemTypes.add('null');
87
+ else if (typeof val === 'object') {
88
+ const keys = Object.keys(val).sort();
89
+ // Check for numeric keys (common pattern - treat as collection like arrays)
90
+ const numericKeys = keys.filter(k => /^\d+$/.test(k));
91
+ const hasNumericKeys = keys.length > 0 && numericKeys.length >= keys.length * 0.8;
92
+ // Always show key count (including 0)
93
+ paths.push({
94
+ path,
95
+ type: hasNumericKeys ? 'object (numeric keys)' : 'object',
96
+ depth,
97
+ keyCount: keys.length
98
+ });
99
+ if (hasNumericKeys) {
100
+ // Treat as collection - show ONE representative item structure
101
+ // This avoids enumerating .0, .1, .2, ... which is repetitive and wastes space
102
+ const representativePath = path ? `${path}.[<idx>]` : `.[<idx>]`;
103
+ // Pick first key to show structure
104
+ if (keys.length > 0) {
105
+ const firstKey = keys[0];
106
+ traverse(val[firstKey], representativePath, depth + 1);
107
+ }
47
108
  }
48
109
  else {
49
- itemTypes.add(Array.isArray(item) ? 'array' : typeof item);
110
+ // Normal object - traverse keys individually
111
+ const keysToAnalyze = keys.slice(0, maxKeys);
112
+ if (keys.length > maxKeys) {
113
+ limits.keyLimitHit = true;
114
+ limits.truncatedKeyCount = keys.length - maxKeys;
115
+ }
116
+ // Traverse child keys
117
+ for (const key of keysToAnalyze) {
118
+ const childPath = path ? `${path}.${key}` : `.${key}`;
119
+ traverse(val[key], childPath, depth + 1);
120
+ }
50
121
  }
51
122
  }
52
- const schema = {
53
- type: 'array',
54
- items: itemTypes.size === 1 && !hasNulls
55
- ? analyzeJsonSchema(arr[0], `${path}[0]`)
56
- : { types: Array.from(itemTypes) },
57
- length: arr.length,
58
- };
59
- // Add hints for null handling
60
- if (hasNulls) {
61
- schema._hasNulls = true;
123
+ else {
124
+ // Primitive types
125
+ paths.push({ path, type: typeof val, depth });
62
126
  }
63
- return schema;
64
- }
65
- else {
66
- return { type };
67
127
  }
128
+ traverse(data, '', 0);
129
+ limits.totalPathsFound = paths.length;
130
+ return { paths, limits };
68
131
  }
69
132
  /**
70
- * Extract nullable and always-null fields from schema
71
- * @param schema - The schema to analyze
72
- * @param basePath - Base path for field names
73
- * @returns Object containing arrays of always-null and nullable field paths
133
+ * Select top N most relevant paths using scoring
134
+ * @param paths - All collected paths
135
+ * @param maxPaths - Maximum paths to return
136
+ * @returns Prioritized subset of paths
74
137
  */
75
- export function extractNullableFields(schema, basePath = '') {
76
- const alwaysNull = [];
77
- const nullable = [];
78
- function traverse(s, path) {
79
- if (!s || typeof s !== 'object')
80
- return;
81
- const schemaObj = s;
82
- // Check if this field is always null
83
- if (schemaObj.type === 'null') {
84
- alwaysNull.push(path);
85
- return;
86
- }
87
- // Check if this field can be null (mixed types)
88
- if (schemaObj.items && typeof schemaObj.items === 'object') {
89
- const items = schemaObj.items;
90
- if (items.types &&
91
- Array.isArray(items.types) &&
92
- items.types.includes('null')) {
93
- nullable.push(path);
138
+ function selectTopPaths(paths, maxPaths) {
139
+ // Score each path based on relevance
140
+ const scored = paths.map(p => ({
141
+ ...p,
142
+ score: (p.nullable ? 0 : 10) + // Non-null = higher priority
143
+ (3 - p.depth) * 5 + // Shallower = higher priority
144
+ (p.type === 'array' ? 5 : 0) + // Arrays = interesting
145
+ (p.type === 'object' || p.type === 'object (numeric keys)' ? 3 : 0) + // Objects = interesting
146
+ (p.mixed ? 2 : 0) // Mixed types = interesting
147
+ }));
148
+ // Sort by score (desc), then by path length (asc) for stability
149
+ return scored
150
+ .sort((a, b) => {
151
+ if (b.score !== a.score)
152
+ return b.score - a.score;
153
+ return a.path.length - b.path.length;
154
+ })
155
+ .slice(0, maxPaths);
156
+ }
157
+ /**
158
+ * Format paths as query-assist text with exploration prompts
159
+ * @param paths - Selected paths to display
160
+ * @param limits - Limit metadata for generating prompts
161
+ * @param dataSize - Size of original data in characters
162
+ * @returns Formatted text schema
163
+ */
164
+ function formatQueryAssist(paths, limits, dataSize) {
165
+ let output = '📊 STRUCTURE GUIDE (for JQ queries)\n\n';
166
+ output += `Size: ${dataSize.toLocaleString()} characters\n\n`;
167
+ // Group paths by depth
168
+ const byDepth = {};
169
+ for (const p of paths) {
170
+ if (!byDepth[p.depth])
171
+ byDepth[p.depth] = [];
172
+ byDepth[p.depth].push(p);
173
+ }
174
+ // Format paths by depth levels
175
+ const depths = Object.keys(byDepth)
176
+ .map(Number)
177
+ .sort((a, b) => a - b);
178
+ for (const depth of depths) {
179
+ const depthPaths = byDepth[depth];
180
+ const label = depth === 0 ? 'ROOT' : depth === 1 ? 'TOP-LEVEL' : `NESTED (depth ${depth})`;
181
+ output += `${label}:\n`;
182
+ for (const p of depthPaths) {
183
+ const pathStr = p.path || '(root)';
184
+ output += ` ${pathStr.padEnd(35)}`;
185
+ output += ` → ${p.type}`;
186
+ if (p.arrayLength !== undefined) {
187
+ output += `[${p.arrayLength}]`;
94
188
  }
95
- }
96
- // Recurse into object properties
97
- if (schemaObj.type === 'object' && schemaObj.properties) {
98
- const props = schemaObj.properties;
99
- for (const [key, value] of Object.entries(props)) {
100
- const newPath = path ? `${path}.${key}` : key;
101
- traverse(value, newPath);
189
+ if (p.keyCount !== undefined) {
190
+ output += ` (${p.keyCount} keys)`;
102
191
  }
192
+ if (p.nullable) {
193
+ output += ' (nullable)';
194
+ }
195
+ if (p.mixed) {
196
+ output += ' ⚠️ MIXED SCHEMAS';
197
+ }
198
+ if (depth === limits.maxDepth && (p.type === 'object' || p.type === 'object (numeric keys)' || p.type === 'array')) {
199
+ output += ' ⚠️ DEPTH LIMIT';
200
+ }
201
+ output += '\n';
103
202
  }
104
- // Recurse into array items
105
- if (schemaObj.type === 'array' &&
106
- schemaObj.items &&
107
- typeof schemaObj.items === 'object') {
108
- const items = schemaObj.items;
109
- if (items.type === 'object' && items.properties) {
110
- const props = items.properties;
111
- for (const [key, value] of Object.entries(props)) {
112
- const newPath = path ? `${path}[].${key}` : `[].${key}`;
113
- traverse(value, newPath);
114
- }
203
+ output += '\n';
204
+ }
205
+ // Check if we have numeric-keyed objects in the output
206
+ const hasNumericKeys = paths.some(p => p.path.includes('.[<idx>]'));
207
+ // Build exploration guide if any limits were hit or special patterns detected
208
+ const hasLimits = limits.depthLimitHit || limits.keyLimitHit || limits.pathLimitHit || limits.mixedSchemasDetected;
209
+ if (hasNumericKeys || hasLimits) {
210
+ output += '💡 EXPLORATION GUIDE\n\n';
211
+ // Numeric keys note (data-specific, keep separate)
212
+ if (hasNumericKeys) {
213
+ output += 'NUMERIC KEYS: .[<idx>] represents structure shared by all numeric keys\n';
214
+ output += ' Access: .["0"], .["1"] or .[0], .[1] (array-style) | List: keys\n\n';
215
+ }
216
+ // Show which limits were hit
217
+ if (hasLimits) {
218
+ const limitWarnings = [];
219
+ if (limits.depthLimitHit) {
220
+ limitWarnings.push(`DEPTH (max: ${limits.maxDepth})`);
221
+ }
222
+ if (limits.keyLimitHit && limits.truncatedKeyCount) {
223
+ limitWarnings.push(`KEYS (${limits.maxKeys} shown, ${limits.truncatedKeyCount} more)`);
224
+ }
225
+ if (limits.pathLimitHit) {
226
+ limitWarnings.push(`PATHS (${limits.maxPaths} of ${limits.totalPathsFound})`);
227
+ }
228
+ if (limits.mixedSchemasDetected) {
229
+ limitWarnings.push('MIXED SCHEMAS');
115
230
  }
231
+ output += `⚠️ Limits: ${limitWarnings.join(' | ')}\n\n`;
232
+ // Generic JQ exploration patterns
233
+ output += 'Common JQ patterns:\n';
234
+ output += ' • View keys: <path> | keys\n';
235
+ output += ' • Check type: <path> | type\n';
236
+ output += ' • Count items: <path> | length\n';
237
+ output += ' • Search keys: keys | map(select(contains("term")))\n';
238
+ output += ' • List all paths: paths(scalars) | map(join("."))\n';
239
+ output += ' • Filter arrays: .[] | select(type == "object")\n';
240
+ if (limits.mixedSchemasDetected) {
241
+ output += ' • Check variance: .[] | type or [:3] | map(keys)\n';
242
+ }
243
+ output += '\n';
116
244
  }
117
245
  }
118
- traverse(schema, basePath);
119
- return { alwaysNull, nullable };
246
+ return output;
247
+ }
248
+ /**
249
+ * Generate query-assist schema for JSON data
250
+ * Main entry point for schema generation
251
+ * @param data - JSON data to analyze
252
+ * @param options - Configuration options
253
+ * @returns Compact text schema optimized for JQ queries
254
+ */
255
+ export function generateQueryAssistSchema(data, options = {}) {
256
+ const maxDepth = options.maxDepth ?? 2;
257
+ const maxPaths = options.maxPaths ?? 20;
258
+ const maxKeys = options.maxKeys ?? 50;
259
+ // Collect paths with limits
260
+ const { paths, limits } = collectPaths(data, maxDepth, maxKeys);
261
+ // Select top paths
262
+ const selectedPaths = selectTopPaths(paths, maxPaths);
263
+ // Update limit metadata
264
+ limits.maxPaths = maxPaths;
265
+ limits.pathLimitHit = paths.length > maxPaths;
266
+ // Calculate data size (use provided size if available to avoid re-stringifying)
267
+ const dataSize = options.dataSize ?? JSON.stringify(data).length;
268
+ // Format as text
269
+ return formatQueryAssist(selectedPaths, limits, dataSize);
120
270
  }