@comprehend/telemetry-node 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(npm test:*)"
5
+ ],
6
+ "deny": []
7
+ }
8
+ }
@@ -10,8 +10,8 @@ export interface SQLAnalysisResult {
10
10
  /** Performs a rough tokenization of the SQL, extracts the tables involved and the operations on them, and
11
11
  * produces two versions of the query:
12
12
  * - A normalized version for hashing purposes that does not account for whitespace, comments, and collapses
13
- * IN clauses that might cause a cardinality explosion.
14
- * - A presentable version that only does the IN clause collapsing */
13
+ * IN clauses and VALUES clauses that might cause a cardinality explosion.
14
+ * - A presentable version that only does the IN clause and VALUES clause collapsing */
15
15
  export declare function analyzeSQL(sql: string): SQLAnalysisResult;
16
16
  export declare function analyzeSQLTokens(tokens: Token[]): {
17
17
  tableOperations: {
@@ -11,14 +11,19 @@ const KEYWORDS = new Set([
11
11
  /** Performs a rough tokenization of the SQL, extracts the tables involved and the operations on them, and
12
12
  * produces two versions of the query:
13
13
  * - A normalized version for hashing purposes that does not account for whitespace, comments, and collapses
14
- * IN clauses that might cause a cardinality explosion.
15
- * - A presentable version that only does the IN clause collapsing */
14
+ * IN clauses and VALUES clauses that might cause a cardinality explosion.
15
+ * - A presentable version that only does the IN clause and VALUES clause collapsing */
16
16
  function analyzeSQL(sql) {
17
17
  let semanticTokens = new Array();
18
18
  let presentableTokens = new Array();
19
19
  let seekingInParen = false;
20
20
  let analyzingIn = false;
21
21
  let skippingIn = false;
22
+ let seekingValuesParen = false;
23
+ let skippingValues = false;
24
+ let lookingForCommaOrEnd = false;
25
+ let valuesDepth = 0;
26
+ let skippedWhitespace = [];
22
27
  for (let token of tokenizeSQL(sql)) {
23
28
  switch (token.type) {
24
29
  case "whitespace":
@@ -81,9 +86,90 @@ function analyzeSQL(sql) {
81
86
  skippingIn = false;
82
87
  }
83
88
  }
89
+ else if (seekingValuesParen) {
90
+ // We saw VALUES, and now look for an opening (. Skip whitespace/comments, bail if anything else.
91
+ switch (token.type) {
92
+ case "comment":
93
+ case "whitespace":
94
+ presentableTokens.push(token);
95
+ break;
96
+ case "punct":
97
+ if (token.value === "(") {
98
+ // Just add the opening paren, "..." and closing paren - preserve original spacing
99
+ presentableTokens.push(token);
100
+ presentableTokens.push({ type: "unknown", value: "..." });
101
+ presentableTokens.push({ type: "punct", value: ")" });
102
+ seekingValuesParen = false;
103
+ skippingValues = true;
104
+ valuesDepth = 1;
105
+ }
106
+ else {
107
+ // Not what we expected, go back to normal processing
108
+ presentableTokens.push(token);
109
+ seekingValuesParen = false;
110
+ }
111
+ break;
112
+ default:
113
+ // Not what we expected, go back to normal processing
114
+ presentableTokens.push(token);
115
+ seekingValuesParen = false;
116
+ break;
117
+ }
118
+ }
119
+ else if (skippingValues) {
120
+ // Skip everything until we've consumed all VALUES tuples
121
+ if (token.type === "punct") {
122
+ if (token.value === "(") {
123
+ valuesDepth++;
124
+ }
125
+ else if (token.value === ")") {
126
+ valuesDepth--;
127
+ if (valuesDepth === 0) {
128
+ // This closes a tuple, check for comma indicating more tuples
129
+ lookingForCommaOrEnd = true;
130
+ skippingValues = false;
131
+ }
132
+ }
133
+ }
134
+ }
135
+ else if (lookingForCommaOrEnd) {
136
+ // After closing a VALUES tuple, look for comma (more tuples) or end of VALUES
137
+ switch (token.type) {
138
+ case "comment":
139
+ case "whitespace":
140
+ // Collect whitespace/comments while looking for comma or end
141
+ skippedWhitespace.push(token);
142
+ break;
143
+ case "punct":
144
+ if (token.value === ",") {
145
+ // More tuples coming, clear skipped whitespace and continue skipping
146
+ skippedWhitespace = [];
147
+ lookingForCommaOrEnd = false;
148
+ skippingValues = true;
149
+ }
150
+ else {
151
+ // Not a comma, so VALUES clause is done
152
+ // Add back the skipped whitespace, then the current token
153
+ presentableTokens.push(...skippedWhitespace);
154
+ presentableTokens.push(token);
155
+ skippedWhitespace = [];
156
+ lookingForCommaOrEnd = false;
157
+ }
158
+ break;
159
+ default:
160
+ // VALUES clause is done, resume normal processing
161
+ // Add back the skipped whitespace, then the current token
162
+ presentableTokens.push(...skippedWhitespace);
163
+ presentableTokens.push(token);
164
+ skippedWhitespace = [];
165
+ lookingForCommaOrEnd = false;
166
+ break;
167
+ }
168
+ }
84
169
  else {
85
170
  presentableTokens.push(token);
86
171
  seekingInParen = token.type === "keyword" && token.value.toUpperCase() === "IN";
172
+ seekingValuesParen = token.type === "keyword" && token.value.toUpperCase() === "VALUES";
87
173
  }
88
174
  }
89
175
  return {
@@ -261,6 +347,45 @@ function analyzeSQLTokens(tokens) {
261
347
  }
262
348
  }
263
349
  }
350
+ // Normalize VALUES (...) clauses
351
+ if (token.type === 'keyword' && token.value === 'VALUES') {
352
+ if (tokens[i + 1]?.value === '(') {
353
+ appendToken('VALUES', 'keyword');
354
+ appendToken('(', 'punct');
355
+ appendToken('...', 'identifier');
356
+ appendToken(')', 'punct');
357
+ // Skip all VALUES tuples including comma-separated ones
358
+ let depth = 0;
359
+ let j = i + 1;
360
+ while (j < tokens.length) {
361
+ if (tokens[j].value === '(') {
362
+ depth++;
363
+ }
364
+ else if (tokens[j].value === ')') {
365
+ depth--;
366
+ if (depth === 0) {
367
+ // Check if there's a comma after this closing paren (more tuples)
368
+ let k = j + 1;
369
+ while (k < tokens.length && (tokens[k].type === 'whitespace' || tokens[k].type === 'comment')) {
370
+ k++;
371
+ }
372
+ if (tokens[k]?.value === ',') {
373
+ // More tuples, continue skipping
374
+ j = k + 1;
375
+ continue;
376
+ }
377
+ else {
378
+ // No more tuples, we're done
379
+ break;
380
+ }
381
+ }
382
+ }
383
+ j++;
384
+ }
385
+ i = j + 1;
386
+ continue;
387
+ }
388
+ }
264
389
  appendToken(token.value, token.type);
265
390
  i++;
266
391
  }
@@ -12,7 +12,7 @@ describe('SQL Analyzer - basic SQL operations', () => {
12
12
  const sql = `INSERT INTO logs (message, level) VALUES ('hi', 'info')`;
13
13
  const result = (0, sql_analyzer_1.analyzeSQL)(sql);
14
14
  expect(result.tableOperations).toEqual({ logs: ['INSERT'] });
15
- expect(result.presentableQuery).toEqual(sql);
15
+ expect(result.presentableQuery).toEqual(`INSERT INTO logs (message, level) VALUES (...)`);
16
16
  });
17
17
  it('detects an INSERT INTO ... SELECT', () => {
18
18
  const sql = `INSERT INTO archive SELECT * FROM logs`;
@@ -85,7 +85,9 @@ describe('SQL Analyzer - basic SQL operations', () => {
85
85
  users: ['INSERT', 'UPDATE'],
86
86
  });
87
87
  expect(result.normalizedQuery).toMatch(/replace\s+into\s+users/i);
88
- expect(result.presentableQuery).toEqual(sql);
88
+ expect(result.presentableQuery).toEqual(`
89
+ REPLACE INTO users (id, name) VALUES (...);
90
+ `);
89
91
  });
90
92
  it('detects operation type from MERGE WHEN clause', () => {
91
93
  const sql = `
@@ -103,7 +105,15 @@ describe('SQL Analyzer - basic SQL operations', () => {
103
105
  incoming: ['SELECT'],
104
106
  });
105
107
  expect(result.normalizedQuery).toMatch(/merge\s+into\s+inventory/i);
106
- expect(result.presentableQuery).toEqual(sql);
108
+ expect(result.presentableQuery).toEqual(`
109
+ MERGE INTO inventory AS t
110
+ USING incoming AS s
111
+ ON t.sku = s.sku
112
+ WHEN MATCHED THEN
113
+ UPDATE SET t.qty = t.qty + s.qty
114
+ WHEN NOT MATCHED THEN
115
+ INSERT (sku, qty) VALUES (...);
116
+ `);
107
117
  });
108
118
  it('handles double-quoted identifiers', () => {
109
119
  const sql = `SELECT * FROM "Users" WHERE "Users"."Id" = 42`;
@@ -361,3 +371,127 @@ describe('SQL Analyzer - basic SQL operations', () => {
361
371
  expect(result.presentableQuery).toEqual(sql);
362
372
  });
363
373
  });
374
+ describe('SQL Analyzer - bulk INSERT VALUES cardinality reduction', () => {
375
+ it('collapses single VALUES tuple to maintain consistency', () => {
376
+ const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com')`;
377
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
378
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
379
+ expect(result.normalizedQuery).toContain("VALUES(...)");
380
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
381
+ });
382
+ it('collapses multiple VALUES tuples to reduce cardinality', () => {
383
+ const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com'), ('Bob', 'bob@example.com'), ('Charlie', 'charlie@example.com')`;
384
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
385
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
386
+ expect(result.normalizedQuery).toContain("VALUES(...)");
387
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
388
+ });
389
+ it('collapses multi-line bulk INSERT VALUES', () => {
390
+ const sql = `INSERT INTO products (name, price, category_id) VALUES
391
+ ('Laptop', 999.99, 1),
392
+ ('Mouse', 29.99, 2),
393
+ ('Keyboard', 79.99, 2),
394
+ ('Monitor', 299.99, 3)`;
395
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
396
+ expect(result.tableOperations).toEqual({ products: ['INSERT'] });
397
+ expect(result.normalizedQuery).toContain("VALUES(...)");
398
+ expect(result.presentableQuery).toEqual(`INSERT INTO products (name, price, category_id) VALUES
399
+ (...)`);
400
+ });
401
+ it('handles bulk INSERT with different spacing and formatting', () => {
402
+ const sql = `INSERT INTO logs(timestamp,level,message)VALUES('2023-01-01','info','start'),('2023-01-02','error','failed'),('2023-01-03','info','end')`;
403
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
404
+ expect(result.tableOperations).toEqual({ logs: ['INSERT'] });
405
+ expect(result.normalizedQuery).toContain("VALUES(...)");
406
+ expect(result.presentableQuery).toEqual(`INSERT INTO logs(timestamp,level,message)VALUES(...)`);
407
+ });
408
+ it('collapses REPLACE INTO with multiple VALUES tuples', () => {
409
+ const sql = `REPLACE INTO cache (key, value, expires) VALUES ('user:1', 'data1', 3600), ('user:2', 'data2', 3600)`;
410
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
411
+ expect(result.tableOperations).toEqual({ cache: ['INSERT', 'UPDATE'] });
412
+ expect(result.normalizedQuery).toContain("VALUES(...)");
413
+ expect(result.presentableQuery).toEqual(`REPLACE INTO cache (key, value, expires) VALUES (...)`);
414
+ });
415
+ it('handles bulk INSERT with complex nested values', () => {
416
+ const sql = `INSERT INTO events (data, metadata) VALUES
417
+ ('{"type":"login"}', '{"source":"web","ip":"192.168.1.1"}'),
418
+ ('{"type":"logout"}', '{"source":"mobile","ip":"10.0.0.1"}')`;
419
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
420
+ expect(result.tableOperations).toEqual({ events: ['INSERT'] });
421
+ expect(result.normalizedQuery).toContain("VALUES(...)");
422
+ expect(result.presentableQuery).toEqual(`INSERT INTO events (data, metadata) VALUES
423
+ (...)`);
424
+ });
425
+ it('preserves INSERT with subquery (not VALUES)', () => {
426
+ const sql = `INSERT INTO archive SELECT * FROM logs WHERE created < '2023-01-01'`;
427
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
428
+ expect(result.tableOperations).toEqual({
429
+ archive: ['INSERT'],
430
+ logs: ['SELECT']
431
+ });
432
+ expect(result.presentableQuery).toEqual(sql);
433
+ expect(result.normalizedQuery).not.toContain("VALUES(...)");
434
+ });
435
+ it('handles bulk INSERT with quoted identifiers', () => {
436
+ const sql = `INSERT INTO "UserProfiles" ("firstName", "lastName") VALUES ('John', 'Doe'), ('Jane', 'Smith')`;
437
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
438
+ expect(result.tableOperations).toEqual({ userprofiles: ['INSERT'] });
439
+ expect(result.normalizedQuery).toContain("VALUES(...)");
440
+ expect(result.presentableQuery).toEqual(`INSERT INTO "UserProfiles" ("firstName", "lastName") VALUES (...)`);
441
+ });
442
+ it('handles bulk INSERT with mixed value types including NULL', () => {
443
+ const sql = `INSERT INTO metrics (name, value, tags) VALUES
444
+ ('cpu_usage', 85.5, NULL),
445
+ ('memory_usage', 67.2, 'production'),
446
+ ('disk_usage', NULL, 'staging')`;
447
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
448
+ expect(result.tableOperations).toEqual({ metrics: ['INSERT'] });
449
+ expect(result.normalizedQuery).toContain("VALUES(...)");
450
+ expect(result.presentableQuery).toEqual(`INSERT INTO metrics (name, value, tags) VALUES
451
+ (...)`);
452
+ });
453
+ it('handles very large bulk INSERT (cardinality explosion scenario)', () => {
454
+ // Generate a bulk insert with many VALUES tuples to simulate real cardinality issues
455
+ const valueTuples = Array.from({ length: 100 }, (_, i) => `('user${i}', 'user${i}@example.com')`);
456
+ const sql = `INSERT INTO users (name, email) VALUES ${valueTuples.join(', ')}`;
457
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
458
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
459
+ expect(result.normalizedQuery).toContain("VALUES(...)");
460
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
461
+ // Ensure the normalized query is much shorter than the original
462
+ expect(result.normalizedQuery.length).toBeLessThan(sql.length / 2);
463
+ });
464
+ it('handles bulk INSERT with functions and expressions in VALUES', () => {
465
+ const sql = `INSERT INTO audit_log (event_time, user_id, action) VALUES
466
+ (NOW(), 1, 'login'),
467
+ (CURRENT_TIMESTAMP, 2, 'logout'),
468
+ (DATE('2023-01-01'), 3, 'update')`;
469
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
470
+ expect(result.tableOperations).toEqual({ audit_log: ['INSERT'] });
471
+ expect(result.normalizedQuery).toContain("VALUES(...)");
472
+ expect(result.presentableQuery).toEqual(`INSERT INTO audit_log (event_time, user_id, action) VALUES
473
+ (...)`);
474
+ });
475
+ it('handles bulk INSERT with parentheses in string values', () => {
476
+ const sql = `INSERT INTO comments (text, author) VALUES
477
+ ('This is a comment (with parentheses)', 'user1'),
478
+ ('Another comment (also with parens)', 'user2')`;
479
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
480
+ expect(result.tableOperations).toEqual({ comments: ['INSERT'] });
481
+ expect(result.normalizedQuery).toContain("VALUES(...)");
482
+ expect(result.presentableQuery).toEqual(`INSERT INTO comments (text, author) VALUES
483
+ (...)`);
484
+ });
485
+ it('preserves whitespace before ON CONFLICT after VALUES clause', () => {
486
+ const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com') ON CONFLICT (email) DO NOTHING`;
487
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
488
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
489
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...) ON CONFLICT (email) DO NOTHING`);
490
+ });
491
+ it('preserves whitespace before ON CONFLICT with multiple VALUES tuples', () => {
492
+ const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com'), ('Bob', 'bob@example.com') ON CONFLICT (email) DO UPDATE SET name = EXCLUDED.name`;
493
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
494
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
495
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...) ON CONFLICT (email) DO UPDATE SET name = EXCLUDED.name`);
496
+ });
497
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@comprehend/telemetry-node",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "description": "Integration of comprehend.dev with OpenTelemetry in Node.js and similar environemnts.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -14,7 +14,7 @@ describe('SQL Analyzer - basic SQL operations', () => {
14
14
  const result = analyzeSQL(sql);
15
15
 
16
16
  expect(result.tableOperations).toEqual({ logs: ['INSERT'] });
17
- expect(result.presentableQuery).toEqual(sql);
17
+ expect(result.presentableQuery).toEqual(`INSERT INTO logs (message, level) VALUES (...)`);
18
18
  });
19
19
 
20
20
  it('detects an INSERT INTO ... SELECT', () => {
@@ -104,7 +104,9 @@ describe('SQL Analyzer - basic SQL operations', () => {
104
104
  });
105
105
 
106
106
  expect(result.normalizedQuery).toMatch(/replace\s+into\s+users/i);
107
- expect(result.presentableQuery).toEqual(sql);
107
+ expect(result.presentableQuery).toEqual(`
108
+ REPLACE INTO users (id, name) VALUES (...);
109
+ `);
108
110
  });
109
111
 
110
112
  it('detects operation type from MERGE WHEN clause', () => {
@@ -125,7 +127,15 @@ describe('SQL Analyzer - basic SQL operations', () => {
125
127
  });
126
128
 
127
129
  expect(result.normalizedQuery).toMatch(/merge\s+into\s+inventory/i);
128
- expect(result.presentableQuery).toEqual(sql);
130
+ expect(result.presentableQuery).toEqual(`
131
+ MERGE INTO inventory AS t
132
+ USING incoming AS s
133
+ ON t.sku = s.sku
134
+ WHEN MATCHED THEN
135
+ UPDATE SET t.qty = t.qty + s.qty
136
+ WHEN NOT MATCHED THEN
137
+ INSERT (sku, qty) VALUES (...);
138
+ `);
129
139
  });
130
140
 
131
141
  it('handles double-quoted identifiers', () => {
@@ -434,3 +444,156 @@ describe('SQL Analyzer - basic SQL operations', () => {
434
444
  expect(result.presentableQuery).toEqual(sql);
435
445
  });
436
446
  });
447
+
448
+ describe('SQL Analyzer - bulk INSERT VALUES cardinality reduction', () => {
449
+ it('collapses single VALUES tuple to maintain consistency', () => {
450
+ const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com')`;
451
+ const result = analyzeSQL(sql);
452
+
453
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
454
+ expect(result.normalizedQuery).toContain("VALUES(...)");
455
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
456
+ });
457
+
458
+ it('collapses multiple VALUES tuples to reduce cardinality', () => {
459
+ const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com'), ('Bob', 'bob@example.com'), ('Charlie', 'charlie@example.com')`;
460
+ const result = analyzeSQL(sql);
461
+
462
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
463
+ expect(result.normalizedQuery).toContain("VALUES(...)");
464
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
465
+ });
466
+
467
+ it('collapses multi-line bulk INSERT VALUES', () => {
468
+ const sql = `INSERT INTO products (name, price, category_id) VALUES
469
+ ('Laptop', 999.99, 1),
470
+ ('Mouse', 29.99, 2),
471
+ ('Keyboard', 79.99, 2),
472
+ ('Monitor', 299.99, 3)`;
473
+ const result = analyzeSQL(sql);
474
+
475
+ expect(result.tableOperations).toEqual({ products: ['INSERT'] });
476
+ expect(result.normalizedQuery).toContain("VALUES(...)");
477
+ expect(result.presentableQuery).toEqual(`INSERT INTO products (name, price, category_id) VALUES
478
+ (...)`);
479
+ });
480
+
481
+ it('handles bulk INSERT with different spacing and formatting', () => {
482
+ const sql = `INSERT INTO logs(timestamp,level,message)VALUES('2023-01-01','info','start'),('2023-01-02','error','failed'),('2023-01-03','info','end')`;
483
+ const result = analyzeSQL(sql);
484
+
485
+ expect(result.tableOperations).toEqual({ logs: ['INSERT'] });
486
+ expect(result.normalizedQuery).toContain("VALUES(...)");
487
+ expect(result.presentableQuery).toEqual(`INSERT INTO logs(timestamp,level,message)VALUES(...)`);
488
+ });
489
+
490
+ it('collapses REPLACE INTO with multiple VALUES tuples', () => {
491
+ const sql = `REPLACE INTO cache (key, value, expires) VALUES ('user:1', 'data1', 3600), ('user:2', 'data2', 3600)`;
492
+ const result = analyzeSQL(sql);
493
+
494
+ expect(result.tableOperations).toEqual({ cache: ['INSERT', 'UPDATE'] });
495
+ expect(result.normalizedQuery).toContain("VALUES(...)");
496
+ expect(result.presentableQuery).toEqual(`REPLACE INTO cache (key, value, expires) VALUES (...)`);
497
+ });
498
+
499
+ it('handles bulk INSERT with complex nested values', () => {
500
+ const sql = `INSERT INTO events (data, metadata) VALUES
501
+ ('{"type":"login"}', '{"source":"web","ip":"192.168.1.1"}'),
502
+ ('{"type":"logout"}', '{"source":"mobile","ip":"10.0.0.1"}')`;
503
+ const result = analyzeSQL(sql);
504
+
505
+ expect(result.tableOperations).toEqual({ events: ['INSERT'] });
506
+ expect(result.normalizedQuery).toContain("VALUES(...)");
507
+ expect(result.presentableQuery).toEqual(`INSERT INTO events (data, metadata) VALUES
508
+ (...)`);
509
+ });
510
+
511
+ it('preserves INSERT with subquery (not VALUES)', () => {
512
+ const sql = `INSERT INTO archive SELECT * FROM logs WHERE created < '2023-01-01'`;
513
+ const result = analyzeSQL(sql);
514
+
515
+ expect(result.tableOperations).toEqual({
516
+ archive: ['INSERT'],
517
+ logs: ['SELECT']
518
+ });
519
+ expect(result.presentableQuery).toEqual(sql);
520
+ expect(result.normalizedQuery).not.toContain("VALUES(...)");
521
+ });
522
+
523
+ it('handles bulk INSERT with quoted identifiers', () => {
524
+ const sql = `INSERT INTO "UserProfiles" ("firstName", "lastName") VALUES ('John', 'Doe'), ('Jane', 'Smith')`;
525
+ const result = analyzeSQL(sql);
526
+
527
+ expect(result.tableOperations).toEqual({ userprofiles: ['INSERT'] });
528
+ expect(result.normalizedQuery).toContain("VALUES(...)");
529
+ expect(result.presentableQuery).toEqual(`INSERT INTO "UserProfiles" ("firstName", "lastName") VALUES (...)`);
530
+ });
531
+
532
+ it('handles bulk INSERT with mixed value types including NULL', () => {
533
+ const sql = `INSERT INTO metrics (name, value, tags) VALUES
534
+ ('cpu_usage', 85.5, NULL),
535
+ ('memory_usage', 67.2, 'production'),
536
+ ('disk_usage', NULL, 'staging')`;
537
+ const result = analyzeSQL(sql);
538
+
539
+ expect(result.tableOperations).toEqual({ metrics: ['INSERT'] });
540
+ expect(result.normalizedQuery).toContain("VALUES(...)");
541
+ expect(result.presentableQuery).toEqual(`INSERT INTO metrics (name, value, tags) VALUES
542
+ (...)`);
543
+ });
544
+
545
+ it('handles very large bulk INSERT (cardinality explosion scenario)', () => {
546
+ // Generate a bulk insert with many VALUES tuples to simulate real cardinality issues
547
+ const valueTuples = Array.from({length: 100}, (_, i) => `('user${i}', 'user${i}@example.com')`);
548
+ const sql = `INSERT INTO users (name, email) VALUES ${valueTuples.join(', ')}`;
549
+ const result = analyzeSQL(sql);
550
+
551
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
552
+ expect(result.normalizedQuery).toContain("VALUES(...)");
553
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
554
+
555
+ // Ensure the normalized query is much shorter than the original
556
+ expect(result.normalizedQuery.length).toBeLessThan(sql.length / 2);
557
+ });
558
+
559
+ it('handles bulk INSERT with functions and expressions in VALUES', () => {
560
+ const sql = `INSERT INTO audit_log (event_time, user_id, action) VALUES
561
+ (NOW(), 1, 'login'),
562
+ (CURRENT_TIMESTAMP, 2, 'logout'),
563
+ (DATE('2023-01-01'), 3, 'update')`;
564
+ const result = analyzeSQL(sql);
565
+
566
+ expect(result.tableOperations).toEqual({ audit_log: ['INSERT'] });
567
+ expect(result.normalizedQuery).toContain("VALUES(...)");
568
+ expect(result.presentableQuery).toEqual(`INSERT INTO audit_log (event_time, user_id, action) VALUES
569
+ (...)`);
570
+ });
571
+
572
+ it('handles bulk INSERT with parentheses in string values', () => {
573
+ const sql = `INSERT INTO comments (text, author) VALUES
574
+ ('This is a comment (with parentheses)', 'user1'),
575
+ ('Another comment (also with parens)', 'user2')`;
576
+ const result = analyzeSQL(sql);
577
+
578
+ expect(result.tableOperations).toEqual({ comments: ['INSERT'] });
579
+ expect(result.normalizedQuery).toContain("VALUES(...)");
580
+ expect(result.presentableQuery).toEqual(`INSERT INTO comments (text, author) VALUES
581
+ (...)`);
582
+ });
583
+
584
+ it('preserves whitespace before ON CONFLICT after VALUES clause', () => {
585
+ const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com') ON CONFLICT (email) DO NOTHING`;
586
+ const result = analyzeSQL(sql);
587
+
588
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
589
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...) ON CONFLICT (email) DO NOTHING`);
590
+ });
591
+
592
+ it('preserves whitespace before ON CONFLICT with multiple VALUES tuples', () => {
593
+ const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com'), ('Bob', 'bob@example.com') ON CONFLICT (email) DO UPDATE SET name = EXCLUDED.name`;
594
+ const result = analyzeSQL(sql);
595
+
596
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
597
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...) ON CONFLICT (email) DO UPDATE SET name = EXCLUDED.name`);
598
+ });
599
+ });
@@ -16,14 +16,19 @@ export interface SQLAnalysisResult {
16
16
  /** Performs a rough tokenization of the SQL, extracts the tables involved and the operations on them, and
17
17
  * produces two versions of the query:
18
18
  * - A normalized version for hashing purposes that does not account for whitespace, comments, and collapses
19
- * IN clauses that might cause a cardinality explosion.
20
- * - A presentable version that only does the IN clause collapsing */
19
+ * IN clauses and VALUES clauses that might cause a cardinality explosion.
20
+ * - A presentable version that only does the IN clause and VALUES clause collapsing */
21
21
  export function analyzeSQL(sql: string): SQLAnalysisResult {
22
22
  let semanticTokens = new Array<Token>();
23
23
  let presentableTokens = new Array<Token>();
24
24
  let seekingInParen = false;
25
25
  let analyzingIn = false;
26
26
  let skippingIn = false;
27
+ let seekingValuesParen = false;
28
+ let skippingValues = false;
29
+ let lookingForCommaOrEnd = false;
30
+ let valuesDepth = 0;
31
+ let skippedWhitespace: Token[] = [];
27
32
  for (let token of tokenizeSQL(sql)) {
28
33
  switch (token.type) {
29
34
  case "whitespace":
@@ -87,9 +92,87 @@ export function analyzeSQL(sql: string): SQLAnalysisResult {
87
92
  skippingIn = false;
88
93
  }
89
94
  }
95
+ else if (seekingValuesParen) {
96
+ // We saw VALUES, and now look for an opening (. Skip whitespace/comments, bail if anything else.
97
+ switch (token.type) {
98
+ case "comment":
99
+ case "whitespace":
100
+ presentableTokens.push(token);
101
+ break;
102
+ case "punct":
103
+ if (token.value === "(") {
104
+ // Just add the opening paren, "..." and closing paren - preserve original spacing
105
+ presentableTokens.push(token);
106
+ presentableTokens.push({ type: "unknown", value: "..." });
107
+ presentableTokens.push({ type: "punct", value: ")" });
108
+ seekingValuesParen = false;
109
+ skippingValues = true;
110
+ valuesDepth = 1;
111
+ } else {
112
+ // Not what we expected, go back to normal processing
113
+ presentableTokens.push(token);
114
+ seekingValuesParen = false;
115
+ }
116
+ break;
117
+ default:
118
+ // Not what we expected, go back to normal processing
119
+ presentableTokens.push(token);
120
+ seekingValuesParen = false;
121
+ break;
122
+ }
123
+ }
124
+ else if (skippingValues) {
125
+ // Skip everything until we've consumed all VALUES tuples
126
+ if (token.type === "punct") {
127
+ if (token.value === "(") {
128
+ valuesDepth++;
129
+ } else if (token.value === ")") {
130
+ valuesDepth--;
131
+ if (valuesDepth === 0) {
132
+ // This closes a tuple, check for comma indicating more tuples
133
+ lookingForCommaOrEnd = true;
134
+ skippingValues = false;
135
+ }
136
+ }
137
+ }
138
+ }
139
+ else if (lookingForCommaOrEnd) {
140
+ // After closing a VALUES tuple, look for comma (more tuples) or end of VALUES
141
+ switch (token.type) {
142
+ case "comment":
143
+ case "whitespace":
144
+ // Collect whitespace/comments while looking for comma or end
145
+ skippedWhitespace.push(token);
146
+ break;
147
+ case "punct":
148
+ if (token.value === ",") {
149
+ // More tuples coming, clear skipped whitespace and continue skipping
150
+ skippedWhitespace = [];
151
+ lookingForCommaOrEnd = false;
152
+ skippingValues = true;
153
+ } else {
154
+ // Not a comma, so VALUES clause is done
155
+ // Add back the skipped whitespace, then the current token
156
+ presentableTokens.push(...skippedWhitespace);
157
+ presentableTokens.push(token);
158
+ skippedWhitespace = [];
159
+ lookingForCommaOrEnd = false;
160
+ }
161
+ break;
162
+ default:
163
+ // VALUES clause is done, resume normal processing
164
+ // Add back the skipped whitespace, then the current token
165
+ presentableTokens.push(...skippedWhitespace);
166
+ presentableTokens.push(token);
167
+ skippedWhitespace = [];
168
+ lookingForCommaOrEnd = false;
169
+ break;
170
+ }
171
+ }
90
172
  else {
91
173
  presentableTokens.push(token);
92
174
  seekingInParen = token.type === "keyword" && token.value.toUpperCase() === "IN";
175
+ seekingValuesParen = token.type === "keyword" && token.value.toUpperCase() === "VALUES";
93
176
  }
94
177
  }
95
178
  return {
@@ -288,6 +371,46 @@ export function analyzeSQLTokens(tokens: Token[]) {
288
371
  }
289
372
  }
290
373
 
374
+ // Normalize VALUES (...) clauses
375
+ if (token.type === 'keyword' && token.value === 'VALUES') {
376
+ if (tokens[i + 1]?.value === '(') {
377
+ appendToken('VALUES', 'keyword');
378
+ appendToken('(', 'punct');
379
+ appendToken('...', 'identifier');
380
+ appendToken(')', 'punct');
381
+
382
+ // Skip all VALUES tuples including comma-separated ones
383
+ let depth = 0;
384
+ let j = i + 1;
385
+ while (j < tokens.length) {
386
+ if (tokens[j].value === '(') {
387
+ depth++;
388
+ } else if (tokens[j].value === ')') {
389
+ depth--;
390
+ if (depth === 0) {
391
+ // Check if there's a comma after this closing paren (more tuples)
392
+ let k = j + 1;
393
+ while (k < tokens.length && (tokens[k].type === 'whitespace' || tokens[k].type === 'comment')) {
394
+ k++;
395
+ }
396
+ if (tokens[k]?.value === ',') {
397
+ // More tuples, continue skipping
398
+ j = k + 1;
399
+ continue;
400
+ } else {
401
+ // No more tuples, we're done
402
+ break;
403
+ }
404
+ }
405
+ }
406
+ j++;
407
+ }
408
+
409
+ i = j + 1;
410
+ continue;
411
+ }
412
+ }
413
+
291
414
  appendToken(token.value, token.type);
292
415
  i++;
293
416
  }