@comprehend/telemetry-node 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +9 -0
- package/.idea/telemetry-node.iml +0 -1
- package/dist/ComprehendDevSpanProcessor.js +2 -1
- package/dist/sql-analyzer.d.ts +2 -2
- package/dist/sql-analyzer.js +118 -2
- package/dist/sql-analyzer.test.js +125 -3
- package/package.json +1 -1
- package/src/ComprehendDevSpanProcessor.ts +2 -1
- package/src/sql-analyzer.test.ts +150 -3
- package/src/sql-analyzer.ts +116 -2
package/.idea/telemetry-node.iml
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
<component name="NewModuleRootManager">
|
|
4
4
|
<content url="file://$MODULE_DIR$">
|
|
5
5
|
<excludeFolder url="file://$MODULE_DIR$/.tmp" />
|
|
6
|
-
<excludeFolder url="file://$MODULE_DIR$/dist" />
|
|
7
6
|
<excludeFolder url="file://$MODULE_DIR$/temp" />
|
|
8
7
|
<excludeFolder url="file://$MODULE_DIR$/tmp" />
|
|
9
8
|
</content>
|
|
@@ -427,7 +427,8 @@ function extractErrorInfo(span) {
|
|
|
427
427
|
const message = attrs['exception.message'] ??
|
|
428
428
|
attrs['http.error_message'] ??
|
|
429
429
|
attrs['db.response.status_code'] ??
|
|
430
|
-
(isError ? attrs['otel.status_description'] : undefined)
|
|
430
|
+
(isError ? attrs['otel.status_description'] : undefined) ??
|
|
431
|
+
(isError ? span.status.message : undefined);
|
|
431
432
|
const type = attrs['exception.type'] ??
|
|
432
433
|
attrs['error.type'] ??
|
|
433
434
|
attrs['http.error_name'];
|
package/dist/sql-analyzer.d.ts
CHANGED
|
@@ -10,8 +10,8 @@ export interface SQLAnalysisResult {
|
|
|
10
10
|
/** Performs a rough tokenization of the SQL, extracts the tables involved and the operations on them, and
|
|
11
11
|
* produces two versions of the query:
|
|
12
12
|
* - A normalized version for hashing purposes that does not account for whitespace, comments, and collapses
|
|
13
|
-
* IN clauses that might cause a cardinality explosion.
|
|
14
|
-
* - A presentable version that only does the IN clause collapsing */
|
|
13
|
+
* IN clauses and VALUES clauses that might cause a cardinality explosion.
|
|
14
|
+
* - A presentable version that only does the IN clause and VALUES clause collapsing */
|
|
15
15
|
export declare function analyzeSQL(sql: string): SQLAnalysisResult;
|
|
16
16
|
export declare function analyzeSQLTokens(tokens: Token[]): {
|
|
17
17
|
tableOperations: {
|
package/dist/sql-analyzer.js
CHANGED
|
@@ -11,14 +11,18 @@ const KEYWORDS = new Set([
|
|
|
11
11
|
/** Performs a rough tokenization of the SQL, extracts the tables involved and the operations on them, and
|
|
12
12
|
* produces two versions of the query:
|
|
13
13
|
* - A normalized version for hashing purposes that does not account for whitespace, comments, and collapses
|
|
14
|
-
* IN clauses that might cause a cardinality explosion.
|
|
15
|
-
* - A presentable version that only does the IN clause collapsing */
|
|
14
|
+
* IN clauses and VALUES clauses that might cause a cardinality explosion.
|
|
15
|
+
* - A presentable version that only does the IN clause and VALUES clause collapsing */
|
|
16
16
|
function analyzeSQL(sql) {
|
|
17
17
|
let semanticTokens = new Array();
|
|
18
18
|
let presentableTokens = new Array();
|
|
19
19
|
let seekingInParen = false;
|
|
20
20
|
let analyzingIn = false;
|
|
21
21
|
let skippingIn = false;
|
|
22
|
+
let seekingValuesParen = false;
|
|
23
|
+
let skippingValues = false;
|
|
24
|
+
let lookingForCommaOrEnd = false;
|
|
25
|
+
let valuesDepth = 0;
|
|
22
26
|
for (let token of tokenizeSQL(sql)) {
|
|
23
27
|
switch (token.type) {
|
|
24
28
|
case "whitespace":
|
|
@@ -81,9 +85,82 @@ function analyzeSQL(sql) {
|
|
|
81
85
|
skippingIn = false;
|
|
82
86
|
}
|
|
83
87
|
}
|
|
88
|
+
else if (seekingValuesParen) {
|
|
89
|
+
// We saw VALUES, and now look for an opening (. Skip whitespace/comments, bail if anything else.
|
|
90
|
+
switch (token.type) {
|
|
91
|
+
case "comment":
|
|
92
|
+
case "whitespace":
|
|
93
|
+
presentableTokens.push(token);
|
|
94
|
+
break;
|
|
95
|
+
case "punct":
|
|
96
|
+
if (token.value === "(") {
|
|
97
|
+
// Just add the opening paren, "..." and closing paren - preserve original spacing
|
|
98
|
+
presentableTokens.push(token);
|
|
99
|
+
presentableTokens.push({ type: "unknown", value: "..." });
|
|
100
|
+
presentableTokens.push({ type: "punct", value: ")" });
|
|
101
|
+
seekingValuesParen = false;
|
|
102
|
+
skippingValues = true;
|
|
103
|
+
valuesDepth = 1;
|
|
104
|
+
}
|
|
105
|
+
else {
|
|
106
|
+
// Not what we expected, go back to normal processing
|
|
107
|
+
presentableTokens.push(token);
|
|
108
|
+
seekingValuesParen = false;
|
|
109
|
+
}
|
|
110
|
+
break;
|
|
111
|
+
default:
|
|
112
|
+
// Not what we expected, go back to normal processing
|
|
113
|
+
presentableTokens.push(token);
|
|
114
|
+
seekingValuesParen = false;
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
else if (skippingValues) {
|
|
119
|
+
// Skip everything until we've consumed all VALUES tuples
|
|
120
|
+
if (token.type === "punct") {
|
|
121
|
+
if (token.value === "(") {
|
|
122
|
+
valuesDepth++;
|
|
123
|
+
}
|
|
124
|
+
else if (token.value === ")") {
|
|
125
|
+
valuesDepth--;
|
|
126
|
+
if (valuesDepth === 0) {
|
|
127
|
+
// This closes a tuple, check for comma indicating more tuples
|
|
128
|
+
lookingForCommaOrEnd = true;
|
|
129
|
+
skippingValues = false;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
else if (lookingForCommaOrEnd) {
|
|
135
|
+
// After closing a VALUES tuple, look for comma (more tuples) or end of VALUES
|
|
136
|
+
switch (token.type) {
|
|
137
|
+
case "comment":
|
|
138
|
+
case "whitespace":
|
|
139
|
+
// Skip whitespace/comments while looking for comma or end
|
|
140
|
+
break;
|
|
141
|
+
case "punct":
|
|
142
|
+
if (token.value === ",") {
|
|
143
|
+
// More tuples coming, continue skipping
|
|
144
|
+
lookingForCommaOrEnd = false;
|
|
145
|
+
skippingValues = true;
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
// Not a comma, so VALUES clause is done
|
|
149
|
+
presentableTokens.push(token);
|
|
150
|
+
lookingForCommaOrEnd = false;
|
|
151
|
+
}
|
|
152
|
+
break;
|
|
153
|
+
default:
|
|
154
|
+
// VALUES clause is done, resume normal processing
|
|
155
|
+
presentableTokens.push(token);
|
|
156
|
+
lookingForCommaOrEnd = false;
|
|
157
|
+
break;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
84
160
|
else {
|
|
85
161
|
presentableTokens.push(token);
|
|
86
162
|
seekingInParen = token.type === "keyword" && token.value.toUpperCase() === "IN";
|
|
163
|
+
seekingValuesParen = token.type === "keyword" && token.value.toUpperCase() === "VALUES";
|
|
87
164
|
}
|
|
88
165
|
}
|
|
89
166
|
return {
|
|
@@ -261,6 +338,45 @@ function analyzeSQLTokens(tokens) {
|
|
|
261
338
|
}
|
|
262
339
|
}
|
|
263
340
|
}
|
|
341
|
+
// Normalize VALUES (...) clauses
|
|
342
|
+
if (token.type === 'keyword' && token.value === 'VALUES') {
|
|
343
|
+
if (tokens[i + 1]?.value === '(') {
|
|
344
|
+
appendToken('VALUES', 'keyword');
|
|
345
|
+
appendToken('(', 'punct');
|
|
346
|
+
appendToken('...', 'identifier');
|
|
347
|
+
appendToken(')', 'punct');
|
|
348
|
+
// Skip all VALUES tuples including comma-separated ones
|
|
349
|
+
let depth = 0;
|
|
350
|
+
let j = i + 1;
|
|
351
|
+
while (j < tokens.length) {
|
|
352
|
+
if (tokens[j].value === '(') {
|
|
353
|
+
depth++;
|
|
354
|
+
}
|
|
355
|
+
else if (tokens[j].value === ')') {
|
|
356
|
+
depth--;
|
|
357
|
+
if (depth === 0) {
|
|
358
|
+
// Check if there's a comma after this closing paren (more tuples)
|
|
359
|
+
let k = j + 1;
|
|
360
|
+
while (k < tokens.length && (tokens[k].type === 'whitespace' || tokens[k].type === 'comment')) {
|
|
361
|
+
k++;
|
|
362
|
+
}
|
|
363
|
+
if (tokens[k]?.value === ',') {
|
|
364
|
+
// More tuples, continue skipping
|
|
365
|
+
j = k + 1;
|
|
366
|
+
continue;
|
|
367
|
+
}
|
|
368
|
+
else {
|
|
369
|
+
// No more tuples, we're done
|
|
370
|
+
break;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
j++;
|
|
375
|
+
}
|
|
376
|
+
i = j + 1;
|
|
377
|
+
continue;
|
|
378
|
+
}
|
|
379
|
+
}
|
|
264
380
|
appendToken(token.value, token.type);
|
|
265
381
|
i++;
|
|
266
382
|
}
|
|
@@ -12,7 +12,7 @@ describe('SQL Analyzer - basic SQL operations', () => {
|
|
|
12
12
|
const sql = `INSERT INTO logs (message, level) VALUES ('hi', 'info')`;
|
|
13
13
|
const result = (0, sql_analyzer_1.analyzeSQL)(sql);
|
|
14
14
|
expect(result.tableOperations).toEqual({ logs: ['INSERT'] });
|
|
15
|
-
expect(result.presentableQuery).toEqual(
|
|
15
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO logs (message, level) VALUES (...)`);
|
|
16
16
|
});
|
|
17
17
|
it('detects an INSERT INTO ... SELECT', () => {
|
|
18
18
|
const sql = `INSERT INTO archive SELECT * FROM logs`;
|
|
@@ -85,7 +85,9 @@ describe('SQL Analyzer - basic SQL operations', () => {
|
|
|
85
85
|
users: ['INSERT', 'UPDATE'],
|
|
86
86
|
});
|
|
87
87
|
expect(result.normalizedQuery).toMatch(/replace\s+into\s+users/i);
|
|
88
|
-
expect(result.presentableQuery).toEqual(
|
|
88
|
+
expect(result.presentableQuery).toEqual(`
|
|
89
|
+
REPLACE INTO users (id, name) VALUES (...);
|
|
90
|
+
`);
|
|
89
91
|
});
|
|
90
92
|
it('detects operation type from MERGE WHEN clause', () => {
|
|
91
93
|
const sql = `
|
|
@@ -103,7 +105,15 @@ describe('SQL Analyzer - basic SQL operations', () => {
|
|
|
103
105
|
incoming: ['SELECT'],
|
|
104
106
|
});
|
|
105
107
|
expect(result.normalizedQuery).toMatch(/merge\s+into\s+inventory/i);
|
|
106
|
-
expect(result.presentableQuery).toEqual(
|
|
108
|
+
expect(result.presentableQuery).toEqual(`
|
|
109
|
+
MERGE INTO inventory AS t
|
|
110
|
+
USING incoming AS s
|
|
111
|
+
ON t.sku = s.sku
|
|
112
|
+
WHEN MATCHED THEN
|
|
113
|
+
UPDATE SET t.qty = t.qty + s.qty
|
|
114
|
+
WHEN NOT MATCHED THEN
|
|
115
|
+
INSERT (sku, qty) VALUES (...);
|
|
116
|
+
`);
|
|
107
117
|
});
|
|
108
118
|
it('handles double-quoted identifiers', () => {
|
|
109
119
|
const sql = `SELECT * FROM "Users" WHERE "Users"."Id" = 42`;
|
|
@@ -361,3 +371,115 @@ describe('SQL Analyzer - basic SQL operations', () => {
|
|
|
361
371
|
expect(result.presentableQuery).toEqual(sql);
|
|
362
372
|
});
|
|
363
373
|
});
|
|
374
|
+
describe('SQL Analyzer - bulk INSERT VALUES cardinality reduction', () => {
|
|
375
|
+
it('collapses single VALUES tuple to maintain consistency', () => {
|
|
376
|
+
const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com')`;
|
|
377
|
+
const result = (0, sql_analyzer_1.analyzeSQL)(sql);
|
|
378
|
+
expect(result.tableOperations).toEqual({ users: ['INSERT'] });
|
|
379
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
380
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
|
|
381
|
+
});
|
|
382
|
+
it('collapses multiple VALUES tuples to reduce cardinality', () => {
|
|
383
|
+
const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com'), ('Bob', 'bob@example.com'), ('Charlie', 'charlie@example.com')`;
|
|
384
|
+
const result = (0, sql_analyzer_1.analyzeSQL)(sql);
|
|
385
|
+
expect(result.tableOperations).toEqual({ users: ['INSERT'] });
|
|
386
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
387
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
|
|
388
|
+
});
|
|
389
|
+
it('collapses multi-line bulk INSERT VALUES', () => {
|
|
390
|
+
const sql = `INSERT INTO products (name, price, category_id) VALUES
|
|
391
|
+
('Laptop', 999.99, 1),
|
|
392
|
+
('Mouse', 29.99, 2),
|
|
393
|
+
('Keyboard', 79.99, 2),
|
|
394
|
+
('Monitor', 299.99, 3)`;
|
|
395
|
+
const result = (0, sql_analyzer_1.analyzeSQL)(sql);
|
|
396
|
+
expect(result.tableOperations).toEqual({ products: ['INSERT'] });
|
|
397
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
398
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO products (name, price, category_id) VALUES
|
|
399
|
+
(...)`);
|
|
400
|
+
});
|
|
401
|
+
it('handles bulk INSERT with different spacing and formatting', () => {
|
|
402
|
+
const sql = `INSERT INTO logs(timestamp,level,message)VALUES('2023-01-01','info','start'),('2023-01-02','error','failed'),('2023-01-03','info','end')`;
|
|
403
|
+
const result = (0, sql_analyzer_1.analyzeSQL)(sql);
|
|
404
|
+
expect(result.tableOperations).toEqual({ logs: ['INSERT'] });
|
|
405
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
406
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO logs(timestamp,level,message)VALUES(...)`);
|
|
407
|
+
});
|
|
408
|
+
it('collapses REPLACE INTO with multiple VALUES tuples', () => {
|
|
409
|
+
const sql = `REPLACE INTO cache (key, value, expires) VALUES ('user:1', 'data1', 3600), ('user:2', 'data2', 3600)`;
|
|
410
|
+
const result = (0, sql_analyzer_1.analyzeSQL)(sql);
|
|
411
|
+
expect(result.tableOperations).toEqual({ cache: ['INSERT', 'UPDATE'] });
|
|
412
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
413
|
+
expect(result.presentableQuery).toEqual(`REPLACE INTO cache (key, value, expires) VALUES (...)`);
|
|
414
|
+
});
|
|
415
|
+
it('handles bulk INSERT with complex nested values', () => {
|
|
416
|
+
const sql = `INSERT INTO events (data, metadata) VALUES
|
|
417
|
+
('{"type":"login"}', '{"source":"web","ip":"192.168.1.1"}'),
|
|
418
|
+
('{"type":"logout"}', '{"source":"mobile","ip":"10.0.0.1"}')`;
|
|
419
|
+
const result = (0, sql_analyzer_1.analyzeSQL)(sql);
|
|
420
|
+
expect(result.tableOperations).toEqual({ events: ['INSERT'] });
|
|
421
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
422
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO events (data, metadata) VALUES
|
|
423
|
+
(...)`);
|
|
424
|
+
});
|
|
425
|
+
it('preserves INSERT with subquery (not VALUES)', () => {
|
|
426
|
+
const sql = `INSERT INTO archive SELECT * FROM logs WHERE created < '2023-01-01'`;
|
|
427
|
+
const result = (0, sql_analyzer_1.analyzeSQL)(sql);
|
|
428
|
+
expect(result.tableOperations).toEqual({
|
|
429
|
+
archive: ['INSERT'],
|
|
430
|
+
logs: ['SELECT']
|
|
431
|
+
});
|
|
432
|
+
expect(result.presentableQuery).toEqual(sql);
|
|
433
|
+
expect(result.normalizedQuery).not.toContain("VALUES(...)");
|
|
434
|
+
});
|
|
435
|
+
it('handles bulk INSERT with quoted identifiers', () => {
|
|
436
|
+
const sql = `INSERT INTO "UserProfiles" ("firstName", "lastName") VALUES ('John', 'Doe'), ('Jane', 'Smith')`;
|
|
437
|
+
const result = (0, sql_analyzer_1.analyzeSQL)(sql);
|
|
438
|
+
expect(result.tableOperations).toEqual({ userprofiles: ['INSERT'] });
|
|
439
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
440
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO "UserProfiles" ("firstName", "lastName") VALUES (...)`);
|
|
441
|
+
});
|
|
442
|
+
it('handles bulk INSERT with mixed value types including NULL', () => {
|
|
443
|
+
const sql = `INSERT INTO metrics (name, value, tags) VALUES
|
|
444
|
+
('cpu_usage', 85.5, NULL),
|
|
445
|
+
('memory_usage', 67.2, 'production'),
|
|
446
|
+
('disk_usage', NULL, 'staging')`;
|
|
447
|
+
const result = (0, sql_analyzer_1.analyzeSQL)(sql);
|
|
448
|
+
expect(result.tableOperations).toEqual({ metrics: ['INSERT'] });
|
|
449
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
450
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO metrics (name, value, tags) VALUES
|
|
451
|
+
(...)`);
|
|
452
|
+
});
|
|
453
|
+
it('handles very large bulk INSERT (cardinality explosion scenario)', () => {
|
|
454
|
+
// Generate a bulk insert with many VALUES tuples to simulate real cardinality issues
|
|
455
|
+
const valueTuples = Array.from({ length: 100 }, (_, i) => `('user${i}', 'user${i}@example.com')`);
|
|
456
|
+
const sql = `INSERT INTO users (name, email) VALUES ${valueTuples.join(', ')}`;
|
|
457
|
+
const result = (0, sql_analyzer_1.analyzeSQL)(sql);
|
|
458
|
+
expect(result.tableOperations).toEqual({ users: ['INSERT'] });
|
|
459
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
460
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
|
|
461
|
+
// Ensure the normalized query is much shorter than the original
|
|
462
|
+
expect(result.normalizedQuery.length).toBeLessThan(sql.length / 2);
|
|
463
|
+
});
|
|
464
|
+
it('handles bulk INSERT with functions and expressions in VALUES', () => {
|
|
465
|
+
const sql = `INSERT INTO audit_log (event_time, user_id, action) VALUES
|
|
466
|
+
(NOW(), 1, 'login'),
|
|
467
|
+
(CURRENT_TIMESTAMP, 2, 'logout'),
|
|
468
|
+
(DATE('2023-01-01'), 3, 'update')`;
|
|
469
|
+
const result = (0, sql_analyzer_1.analyzeSQL)(sql);
|
|
470
|
+
expect(result.tableOperations).toEqual({ audit_log: ['INSERT'] });
|
|
471
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
472
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO audit_log (event_time, user_id, action) VALUES
|
|
473
|
+
(...)`);
|
|
474
|
+
});
|
|
475
|
+
it('handles bulk INSERT with parentheses in string values', () => {
|
|
476
|
+
const sql = `INSERT INTO comments (text, author) VALUES
|
|
477
|
+
('This is a comment (with parentheses)', 'user1'),
|
|
478
|
+
('Another comment (also with parens)', 'user2')`;
|
|
479
|
+
const result = (0, sql_analyzer_1.analyzeSQL)(sql);
|
|
480
|
+
expect(result.tableOperations).toEqual({ comments: ['INSERT'] });
|
|
481
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
482
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO comments (text, author) VALUES
|
|
483
|
+
(...)`);
|
|
484
|
+
});
|
|
485
|
+
});
|
package/package.json
CHANGED
|
@@ -538,7 +538,8 @@ function extractErrorInfo(span: ReadableSpan): {
|
|
|
538
538
|
(attrs['exception.message'] as string | undefined) ??
|
|
539
539
|
(attrs['http.error_message'] as string | undefined) ??
|
|
540
540
|
(attrs['db.response.status_code'] as string | undefined) ??
|
|
541
|
-
(isError ? (attrs['otel.status_description'] as string | undefined) : undefined)
|
|
541
|
+
(isError ? (attrs['otel.status_description'] as string | undefined) : undefined) ??
|
|
542
|
+
(isError ? (span.status.message as string | undefined) : undefined);
|
|
542
543
|
const type =
|
|
543
544
|
(attrs['exception.type'] as string | undefined) ??
|
|
544
545
|
(attrs['error.type'] as string | undefined) ??
|
package/src/sql-analyzer.test.ts
CHANGED
|
@@ -14,7 +14,7 @@ describe('SQL Analyzer - basic SQL operations', () => {
|
|
|
14
14
|
const result = analyzeSQL(sql);
|
|
15
15
|
|
|
16
16
|
expect(result.tableOperations).toEqual({ logs: ['INSERT'] });
|
|
17
|
-
expect(result.presentableQuery).toEqual(
|
|
17
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO logs (message, level) VALUES (...)`);
|
|
18
18
|
});
|
|
19
19
|
|
|
20
20
|
it('detects an INSERT INTO ... SELECT', () => {
|
|
@@ -104,7 +104,9 @@ describe('SQL Analyzer - basic SQL operations', () => {
|
|
|
104
104
|
});
|
|
105
105
|
|
|
106
106
|
expect(result.normalizedQuery).toMatch(/replace\s+into\s+users/i);
|
|
107
|
-
expect(result.presentableQuery).toEqual(
|
|
107
|
+
expect(result.presentableQuery).toEqual(`
|
|
108
|
+
REPLACE INTO users (id, name) VALUES (...);
|
|
109
|
+
`);
|
|
108
110
|
});
|
|
109
111
|
|
|
110
112
|
it('detects operation type from MERGE WHEN clause', () => {
|
|
@@ -125,7 +127,15 @@ describe('SQL Analyzer - basic SQL operations', () => {
|
|
|
125
127
|
});
|
|
126
128
|
|
|
127
129
|
expect(result.normalizedQuery).toMatch(/merge\s+into\s+inventory/i);
|
|
128
|
-
expect(result.presentableQuery).toEqual(
|
|
130
|
+
expect(result.presentableQuery).toEqual(`
|
|
131
|
+
MERGE INTO inventory AS t
|
|
132
|
+
USING incoming AS s
|
|
133
|
+
ON t.sku = s.sku
|
|
134
|
+
WHEN MATCHED THEN
|
|
135
|
+
UPDATE SET t.qty = t.qty + s.qty
|
|
136
|
+
WHEN NOT MATCHED THEN
|
|
137
|
+
INSERT (sku, qty) VALUES (...);
|
|
138
|
+
`);
|
|
129
139
|
});
|
|
130
140
|
|
|
131
141
|
it('handles double-quoted identifiers', () => {
|
|
@@ -434,3 +444,140 @@ describe('SQL Analyzer - basic SQL operations', () => {
|
|
|
434
444
|
expect(result.presentableQuery).toEqual(sql);
|
|
435
445
|
});
|
|
436
446
|
});
|
|
447
|
+
|
|
448
|
+
describe('SQL Analyzer - bulk INSERT VALUES cardinality reduction', () => {
|
|
449
|
+
it('collapses single VALUES tuple to maintain consistency', () => {
|
|
450
|
+
const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com')`;
|
|
451
|
+
const result = analyzeSQL(sql);
|
|
452
|
+
|
|
453
|
+
expect(result.tableOperations).toEqual({ users: ['INSERT'] });
|
|
454
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
455
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
|
|
456
|
+
});
|
|
457
|
+
|
|
458
|
+
it('collapses multiple VALUES tuples to reduce cardinality', () => {
|
|
459
|
+
const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com'), ('Bob', 'bob@example.com'), ('Charlie', 'charlie@example.com')`;
|
|
460
|
+
const result = analyzeSQL(sql);
|
|
461
|
+
|
|
462
|
+
expect(result.tableOperations).toEqual({ users: ['INSERT'] });
|
|
463
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
464
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
|
|
465
|
+
});
|
|
466
|
+
|
|
467
|
+
it('collapses multi-line bulk INSERT VALUES', () => {
|
|
468
|
+
const sql = `INSERT INTO products (name, price, category_id) VALUES
|
|
469
|
+
('Laptop', 999.99, 1),
|
|
470
|
+
('Mouse', 29.99, 2),
|
|
471
|
+
('Keyboard', 79.99, 2),
|
|
472
|
+
('Monitor', 299.99, 3)`;
|
|
473
|
+
const result = analyzeSQL(sql);
|
|
474
|
+
|
|
475
|
+
expect(result.tableOperations).toEqual({ products: ['INSERT'] });
|
|
476
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
477
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO products (name, price, category_id) VALUES
|
|
478
|
+
(...)`);
|
|
479
|
+
});
|
|
480
|
+
|
|
481
|
+
it('handles bulk INSERT with different spacing and formatting', () => {
|
|
482
|
+
const sql = `INSERT INTO logs(timestamp,level,message)VALUES('2023-01-01','info','start'),('2023-01-02','error','failed'),('2023-01-03','info','end')`;
|
|
483
|
+
const result = analyzeSQL(sql);
|
|
484
|
+
|
|
485
|
+
expect(result.tableOperations).toEqual({ logs: ['INSERT'] });
|
|
486
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
487
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO logs(timestamp,level,message)VALUES(...)`);
|
|
488
|
+
});
|
|
489
|
+
|
|
490
|
+
it('collapses REPLACE INTO with multiple VALUES tuples', () => {
|
|
491
|
+
const sql = `REPLACE INTO cache (key, value, expires) VALUES ('user:1', 'data1', 3600), ('user:2', 'data2', 3600)`;
|
|
492
|
+
const result = analyzeSQL(sql);
|
|
493
|
+
|
|
494
|
+
expect(result.tableOperations).toEqual({ cache: ['INSERT', 'UPDATE'] });
|
|
495
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
496
|
+
expect(result.presentableQuery).toEqual(`REPLACE INTO cache (key, value, expires) VALUES (...)`);
|
|
497
|
+
});
|
|
498
|
+
|
|
499
|
+
it('handles bulk INSERT with complex nested values', () => {
|
|
500
|
+
const sql = `INSERT INTO events (data, metadata) VALUES
|
|
501
|
+
('{"type":"login"}', '{"source":"web","ip":"192.168.1.1"}'),
|
|
502
|
+
('{"type":"logout"}', '{"source":"mobile","ip":"10.0.0.1"}')`;
|
|
503
|
+
const result = analyzeSQL(sql);
|
|
504
|
+
|
|
505
|
+
expect(result.tableOperations).toEqual({ events: ['INSERT'] });
|
|
506
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
507
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO events (data, metadata) VALUES
|
|
508
|
+
(...)`);
|
|
509
|
+
});
|
|
510
|
+
|
|
511
|
+
it('preserves INSERT with subquery (not VALUES)', () => {
|
|
512
|
+
const sql = `INSERT INTO archive SELECT * FROM logs WHERE created < '2023-01-01'`;
|
|
513
|
+
const result = analyzeSQL(sql);
|
|
514
|
+
|
|
515
|
+
expect(result.tableOperations).toEqual({
|
|
516
|
+
archive: ['INSERT'],
|
|
517
|
+
logs: ['SELECT']
|
|
518
|
+
});
|
|
519
|
+
expect(result.presentableQuery).toEqual(sql);
|
|
520
|
+
expect(result.normalizedQuery).not.toContain("VALUES(...)");
|
|
521
|
+
});
|
|
522
|
+
|
|
523
|
+
it('handles bulk INSERT with quoted identifiers', () => {
|
|
524
|
+
const sql = `INSERT INTO "UserProfiles" ("firstName", "lastName") VALUES ('John', 'Doe'), ('Jane', 'Smith')`;
|
|
525
|
+
const result = analyzeSQL(sql);
|
|
526
|
+
|
|
527
|
+
expect(result.tableOperations).toEqual({ userprofiles: ['INSERT'] });
|
|
528
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
529
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO "UserProfiles" ("firstName", "lastName") VALUES (...)`);
|
|
530
|
+
});
|
|
531
|
+
|
|
532
|
+
it('handles bulk INSERT with mixed value types including NULL', () => {
|
|
533
|
+
const sql = `INSERT INTO metrics (name, value, tags) VALUES
|
|
534
|
+
('cpu_usage', 85.5, NULL),
|
|
535
|
+
('memory_usage', 67.2, 'production'),
|
|
536
|
+
('disk_usage', NULL, 'staging')`;
|
|
537
|
+
const result = analyzeSQL(sql);
|
|
538
|
+
|
|
539
|
+
expect(result.tableOperations).toEqual({ metrics: ['INSERT'] });
|
|
540
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
541
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO metrics (name, value, tags) VALUES
|
|
542
|
+
(...)`);
|
|
543
|
+
});
|
|
544
|
+
|
|
545
|
+
it('handles very large bulk INSERT (cardinality explosion scenario)', () => {
|
|
546
|
+
// Generate a bulk insert with many VALUES tuples to simulate real cardinality issues
|
|
547
|
+
const valueTuples = Array.from({length: 100}, (_, i) => `('user${i}', 'user${i}@example.com')`);
|
|
548
|
+
const sql = `INSERT INTO users (name, email) VALUES ${valueTuples.join(', ')}`;
|
|
549
|
+
const result = analyzeSQL(sql);
|
|
550
|
+
|
|
551
|
+
expect(result.tableOperations).toEqual({ users: ['INSERT'] });
|
|
552
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
553
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
|
|
554
|
+
|
|
555
|
+
// Ensure the normalized query is much shorter than the original
|
|
556
|
+
expect(result.normalizedQuery.length).toBeLessThan(sql.length / 2);
|
|
557
|
+
});
|
|
558
|
+
|
|
559
|
+
it('handles bulk INSERT with functions and expressions in VALUES', () => {
|
|
560
|
+
const sql = `INSERT INTO audit_log (event_time, user_id, action) VALUES
|
|
561
|
+
(NOW(), 1, 'login'),
|
|
562
|
+
(CURRENT_TIMESTAMP, 2, 'logout'),
|
|
563
|
+
(DATE('2023-01-01'), 3, 'update')`;
|
|
564
|
+
const result = analyzeSQL(sql);
|
|
565
|
+
|
|
566
|
+
expect(result.tableOperations).toEqual({ audit_log: ['INSERT'] });
|
|
567
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
568
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO audit_log (event_time, user_id, action) VALUES
|
|
569
|
+
(...)`);
|
|
570
|
+
});
|
|
571
|
+
|
|
572
|
+
it('handles bulk INSERT with parentheses in string values', () => {
|
|
573
|
+
const sql = `INSERT INTO comments (text, author) VALUES
|
|
574
|
+
('This is a comment (with parentheses)', 'user1'),
|
|
575
|
+
('Another comment (also with parens)', 'user2')`;
|
|
576
|
+
const result = analyzeSQL(sql);
|
|
577
|
+
|
|
578
|
+
expect(result.tableOperations).toEqual({ comments: ['INSERT'] });
|
|
579
|
+
expect(result.normalizedQuery).toContain("VALUES(...)");
|
|
580
|
+
expect(result.presentableQuery).toEqual(`INSERT INTO comments (text, author) VALUES
|
|
581
|
+
(...)`);
|
|
582
|
+
});
|
|
583
|
+
});
|
package/src/sql-analyzer.ts
CHANGED
|
@@ -16,14 +16,18 @@ export interface SQLAnalysisResult {
|
|
|
16
16
|
/** Performs a rough tokenization of the SQL, extracts the tables involved and the operations on them, and
|
|
17
17
|
* produces two versions of the query:
|
|
18
18
|
* - A normalized version for hashing purposes that does not account for whitespace, comments, and collapses
|
|
19
|
-
* IN clauses that might cause a cardinality explosion.
|
|
20
|
-
* - A presentable version that only does the IN clause collapsing */
|
|
19
|
+
* IN clauses and VALUES clauses that might cause a cardinality explosion.
|
|
20
|
+
* - A presentable version that only does the IN clause and VALUES clause collapsing */
|
|
21
21
|
export function analyzeSQL(sql: string): SQLAnalysisResult {
|
|
22
22
|
let semanticTokens = new Array<Token>();
|
|
23
23
|
let presentableTokens = new Array<Token>();
|
|
24
24
|
let seekingInParen = false;
|
|
25
25
|
let analyzingIn = false;
|
|
26
26
|
let skippingIn = false;
|
|
27
|
+
let seekingValuesParen = false;
|
|
28
|
+
let skippingValues = false;
|
|
29
|
+
let lookingForCommaOrEnd = false;
|
|
30
|
+
let valuesDepth = 0;
|
|
27
31
|
for (let token of tokenizeSQL(sql)) {
|
|
28
32
|
switch (token.type) {
|
|
29
33
|
case "whitespace":
|
|
@@ -87,9 +91,79 @@ export function analyzeSQL(sql: string): SQLAnalysisResult {
|
|
|
87
91
|
skippingIn = false;
|
|
88
92
|
}
|
|
89
93
|
}
|
|
94
|
+
else if (seekingValuesParen) {
|
|
95
|
+
// We saw VALUES, and now look for an opening (. Skip whitespace/comments, bail if anything else.
|
|
96
|
+
switch (token.type) {
|
|
97
|
+
case "comment":
|
|
98
|
+
case "whitespace":
|
|
99
|
+
presentableTokens.push(token);
|
|
100
|
+
break;
|
|
101
|
+
case "punct":
|
|
102
|
+
if (token.value === "(") {
|
|
103
|
+
// Just add the opening paren, "..." and closing paren - preserve original spacing
|
|
104
|
+
presentableTokens.push(token);
|
|
105
|
+
presentableTokens.push({ type: "unknown", value: "..." });
|
|
106
|
+
presentableTokens.push({ type: "punct", value: ")" });
|
|
107
|
+
seekingValuesParen = false;
|
|
108
|
+
skippingValues = true;
|
|
109
|
+
valuesDepth = 1;
|
|
110
|
+
} else {
|
|
111
|
+
// Not what we expected, go back to normal processing
|
|
112
|
+
presentableTokens.push(token);
|
|
113
|
+
seekingValuesParen = false;
|
|
114
|
+
}
|
|
115
|
+
break;
|
|
116
|
+
default:
|
|
117
|
+
// Not what we expected, go back to normal processing
|
|
118
|
+
presentableTokens.push(token);
|
|
119
|
+
seekingValuesParen = false;
|
|
120
|
+
break;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
else if (skippingValues) {
|
|
124
|
+
// Skip everything until we've consumed all VALUES tuples
|
|
125
|
+
if (token.type === "punct") {
|
|
126
|
+
if (token.value === "(") {
|
|
127
|
+
valuesDepth++;
|
|
128
|
+
} else if (token.value === ")") {
|
|
129
|
+
valuesDepth--;
|
|
130
|
+
if (valuesDepth === 0) {
|
|
131
|
+
// This closes a tuple, check for comma indicating more tuples
|
|
132
|
+
lookingForCommaOrEnd = true;
|
|
133
|
+
skippingValues = false;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
else if (lookingForCommaOrEnd) {
|
|
139
|
+
// After closing a VALUES tuple, look for comma (more tuples) or end of VALUES
|
|
140
|
+
switch (token.type) {
|
|
141
|
+
case "comment":
|
|
142
|
+
case "whitespace":
|
|
143
|
+
// Skip whitespace/comments while looking for comma or end
|
|
144
|
+
break;
|
|
145
|
+
case "punct":
|
|
146
|
+
if (token.value === ",") {
|
|
147
|
+
// More tuples coming, continue skipping
|
|
148
|
+
lookingForCommaOrEnd = false;
|
|
149
|
+
skippingValues = true;
|
|
150
|
+
} else {
|
|
151
|
+
// Not a comma, so VALUES clause is done
|
|
152
|
+
presentableTokens.push(token);
|
|
153
|
+
lookingForCommaOrEnd = false;
|
|
154
|
+
}
|
|
155
|
+
break;
|
|
156
|
+
default:
|
|
157
|
+
// VALUES clause is done, resume normal processing
|
|
158
|
+
presentableTokens.push(token);
|
|
159
|
+
lookingForCommaOrEnd = false;
|
|
160
|
+
break;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
90
163
|
else {
|
|
91
164
|
presentableTokens.push(token);
|
|
92
165
|
seekingInParen = token.type === "keyword" && token.value.toUpperCase() === "IN";
|
|
166
|
+
seekingValuesParen = token.type === "keyword" && token.value.toUpperCase() === "VALUES";
|
|
93
167
|
}
|
|
94
168
|
}
|
|
95
169
|
return {
|
|
@@ -288,6 +362,46 @@ export function analyzeSQLTokens(tokens: Token[]) {
|
|
|
288
362
|
}
|
|
289
363
|
}
|
|
290
364
|
|
|
365
|
+
// Normalize VALUES (...) clauses
|
|
366
|
+
if (token.type === 'keyword' && token.value === 'VALUES') {
|
|
367
|
+
if (tokens[i + 1]?.value === '(') {
|
|
368
|
+
appendToken('VALUES', 'keyword');
|
|
369
|
+
appendToken('(', 'punct');
|
|
370
|
+
appendToken('...', 'identifier');
|
|
371
|
+
appendToken(')', 'punct');
|
|
372
|
+
|
|
373
|
+
// Skip all VALUES tuples including comma-separated ones
|
|
374
|
+
let depth = 0;
|
|
375
|
+
let j = i + 1;
|
|
376
|
+
while (j < tokens.length) {
|
|
377
|
+
if (tokens[j].value === '(') {
|
|
378
|
+
depth++;
|
|
379
|
+
} else if (tokens[j].value === ')') {
|
|
380
|
+
depth--;
|
|
381
|
+
if (depth === 0) {
|
|
382
|
+
// Check if there's a comma after this closing paren (more tuples)
|
|
383
|
+
let k = j + 1;
|
|
384
|
+
while (k < tokens.length && (tokens[k].type === 'whitespace' || tokens[k].type === 'comment')) {
|
|
385
|
+
k++;
|
|
386
|
+
}
|
|
387
|
+
if (tokens[k]?.value === ',') {
|
|
388
|
+
// More tuples, continue skipping
|
|
389
|
+
j = k + 1;
|
|
390
|
+
continue;
|
|
391
|
+
} else {
|
|
392
|
+
// No more tuples, we're done
|
|
393
|
+
break;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
j++;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
i = j + 1;
|
|
401
|
+
continue;
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
291
405
|
appendToken(token.value, token.type);
|
|
292
406
|
i++;
|
|
293
407
|
}
|