@comprehend/telemetry-node 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(npm test:*)",
5
+ "Bash(grep:*)"
6
+ ],
7
+ "deny": []
8
+ }
9
+ }
@@ -3,7 +3,6 @@
3
3
  <component name="NewModuleRootManager">
4
4
  <content url="file://$MODULE_DIR$">
5
5
  <excludeFolder url="file://$MODULE_DIR$/.tmp" />
6
- <excludeFolder url="file://$MODULE_DIR$/dist" />
7
6
  <excludeFolder url="file://$MODULE_DIR$/temp" />
8
7
  <excludeFolder url="file://$MODULE_DIR$/tmp" />
9
8
  </content>
@@ -427,7 +427,8 @@ function extractErrorInfo(span) {
427
427
  const message = attrs['exception.message'] ??
428
428
  attrs['http.error_message'] ??
429
429
  attrs['db.response.status_code'] ??
430
- (isError ? attrs['otel.status_description'] : undefined);
430
+ (isError ? attrs['otel.status_description'] : undefined) ??
431
+ (isError ? span.status.message : undefined);
431
432
  const type = attrs['exception.type'] ??
432
433
  attrs['error.type'] ??
433
434
  attrs['http.error_name'];
@@ -10,8 +10,8 @@ export interface SQLAnalysisResult {
10
10
  /** Performs a rough tokenization of the SQL, extracts the tables involved and the operations on them, and
11
11
  * produces two versions of the query:
12
12
  * - A normalized version for hashing purposes that does not account for whitespace, comments, and collapses
13
- * IN clauses that might cause a cardinality explosion.
14
- * - A presentable version that only does the IN clause collapsing */
13
+ * IN clauses and VALUES clauses that might cause a cardinality explosion.
14
+ * - A presentable version that only does the IN clause and VALUES clause collapsing */
15
15
  export declare function analyzeSQL(sql: string): SQLAnalysisResult;
16
16
  export declare function analyzeSQLTokens(tokens: Token[]): {
17
17
  tableOperations: {
@@ -11,14 +11,18 @@ const KEYWORDS = new Set([
11
11
  /** Performs a rough tokenization of the SQL, extracts the tables involved and the operations on them, and
12
12
  * produces two versions of the query:
13
13
  * - A normalized version for hashing purposes that does not account for whitespace, comments, and collapses
14
- * IN clauses that might cause a cardinality explosion.
15
- * - A presentable version that only does the IN clause collapsing */
14
+ * IN clauses and VALUES clauses that might cause a cardinality explosion.
15
+ * - A presentable version that only does the IN clause and VALUES clause collapsing */
16
16
  function analyzeSQL(sql) {
17
17
  let semanticTokens = new Array();
18
18
  let presentableTokens = new Array();
19
19
  let seekingInParen = false;
20
20
  let analyzingIn = false;
21
21
  let skippingIn = false;
22
+ let seekingValuesParen = false;
23
+ let skippingValues = false;
24
+ let lookingForCommaOrEnd = false;
25
+ let valuesDepth = 0;
22
26
  for (let token of tokenizeSQL(sql)) {
23
27
  switch (token.type) {
24
28
  case "whitespace":
@@ -81,9 +85,82 @@ function analyzeSQL(sql) {
81
85
  skippingIn = false;
82
86
  }
83
87
  }
88
+ else if (seekingValuesParen) {
89
+ // We saw VALUES, and now look for an opening (. Skip whitespace/comments, bail if anything else.
90
+ switch (token.type) {
91
+ case "comment":
92
+ case "whitespace":
93
+ presentableTokens.push(token);
94
+ break;
95
+ case "punct":
96
+ if (token.value === "(") {
97
+ // Just add the opening paren, "..." and closing paren - preserve original spacing
98
+ presentableTokens.push(token);
99
+ presentableTokens.push({ type: "unknown", value: "..." });
100
+ presentableTokens.push({ type: "punct", value: ")" });
101
+ seekingValuesParen = false;
102
+ skippingValues = true;
103
+ valuesDepth = 1;
104
+ }
105
+ else {
106
+ // Not what we expected, go back to normal processing
107
+ presentableTokens.push(token);
108
+ seekingValuesParen = false;
109
+ }
110
+ break;
111
+ default:
112
+ // Not what we expected, go back to normal processing
113
+ presentableTokens.push(token);
114
+ seekingValuesParen = false;
115
+ break;
116
+ }
117
+ }
118
+ else if (skippingValues) {
119
+ // Skip everything until we've consumed all VALUES tuples
120
+ if (token.type === "punct") {
121
+ if (token.value === "(") {
122
+ valuesDepth++;
123
+ }
124
+ else if (token.value === ")") {
125
+ valuesDepth--;
126
+ if (valuesDepth === 0) {
127
+ // This closes a tuple, check for comma indicating more tuples
128
+ lookingForCommaOrEnd = true;
129
+ skippingValues = false;
130
+ }
131
+ }
132
+ }
133
+ }
134
+ else if (lookingForCommaOrEnd) {
135
+ // After closing a VALUES tuple, look for comma (more tuples) or end of VALUES
136
+ switch (token.type) {
137
+ case "comment":
138
+ case "whitespace":
139
+ // Skip whitespace/comments while looking for comma or end
140
+ break;
141
+ case "punct":
142
+ if (token.value === ",") {
143
+ // More tuples coming, continue skipping
144
+ lookingForCommaOrEnd = false;
145
+ skippingValues = true;
146
+ }
147
+ else {
148
+ // Not a comma, so VALUES clause is done
149
+ presentableTokens.push(token);
150
+ lookingForCommaOrEnd = false;
151
+ }
152
+ break;
153
+ default:
154
+ // VALUES clause is done, resume normal processing
155
+ presentableTokens.push(token);
156
+ lookingForCommaOrEnd = false;
157
+ break;
158
+ }
159
+ }
84
160
  else {
85
161
  presentableTokens.push(token);
86
162
  seekingInParen = token.type === "keyword" && token.value.toUpperCase() === "IN";
163
+ seekingValuesParen = token.type === "keyword" && token.value.toUpperCase() === "VALUES";
87
164
  }
88
165
  }
89
166
  return {
@@ -261,6 +338,45 @@ function analyzeSQLTokens(tokens) {
261
338
  }
262
339
  }
263
340
  }
341
+ // Normalize VALUES (...) clauses
342
+ if (token.type === 'keyword' && token.value === 'VALUES') {
343
+ if (tokens[i + 1]?.value === '(') {
344
+ appendToken('VALUES', 'keyword');
345
+ appendToken('(', 'punct');
346
+ appendToken('...', 'identifier');
347
+ appendToken(')', 'punct');
348
+ // Skip all VALUES tuples including comma-separated ones
349
+ let depth = 0;
350
+ let j = i + 1;
351
+ while (j < tokens.length) {
352
+ if (tokens[j].value === '(') {
353
+ depth++;
354
+ }
355
+ else if (tokens[j].value === ')') {
356
+ depth--;
357
+ if (depth === 0) {
358
+ // Check if there's a comma after this closing paren (more tuples)
359
+ let k = j + 1;
360
+ while (k < tokens.length && (tokens[k].type === 'whitespace' || tokens[k].type === 'comment')) {
361
+ k++;
362
+ }
363
+ if (tokens[k]?.value === ',') {
364
+ // More tuples, continue skipping
365
+ j = k + 1;
366
+ continue;
367
+ }
368
+ else {
369
+ // No more tuples, we're done
370
+ break;
371
+ }
372
+ }
373
+ }
374
+ j++;
375
+ }
376
+ i = j + 1;
377
+ continue;
378
+ }
379
+ }
264
380
  appendToken(token.value, token.type);
265
381
  i++;
266
382
  }
@@ -12,7 +12,7 @@ describe('SQL Analyzer - basic SQL operations', () => {
12
12
  const sql = `INSERT INTO logs (message, level) VALUES ('hi', 'info')`;
13
13
  const result = (0, sql_analyzer_1.analyzeSQL)(sql);
14
14
  expect(result.tableOperations).toEqual({ logs: ['INSERT'] });
15
- expect(result.presentableQuery).toEqual(sql);
15
+ expect(result.presentableQuery).toEqual(`INSERT INTO logs (message, level) VALUES (...)`);
16
16
  });
17
17
  it('detects an INSERT INTO ... SELECT', () => {
18
18
  const sql = `INSERT INTO archive SELECT * FROM logs`;
@@ -85,7 +85,9 @@ describe('SQL Analyzer - basic SQL operations', () => {
85
85
  users: ['INSERT', 'UPDATE'],
86
86
  });
87
87
  expect(result.normalizedQuery).toMatch(/replace\s+into\s+users/i);
88
- expect(result.presentableQuery).toEqual(sql);
88
+ expect(result.presentableQuery).toEqual(`
89
+ REPLACE INTO users (id, name) VALUES (...);
90
+ `);
89
91
  });
90
92
  it('detects operation type from MERGE WHEN clause', () => {
91
93
  const sql = `
@@ -103,7 +105,15 @@ describe('SQL Analyzer - basic SQL operations', () => {
103
105
  incoming: ['SELECT'],
104
106
  });
105
107
  expect(result.normalizedQuery).toMatch(/merge\s+into\s+inventory/i);
106
- expect(result.presentableQuery).toEqual(sql);
108
+ expect(result.presentableQuery).toEqual(`
109
+ MERGE INTO inventory AS t
110
+ USING incoming AS s
111
+ ON t.sku = s.sku
112
+ WHEN MATCHED THEN
113
+ UPDATE SET t.qty = t.qty + s.qty
114
+ WHEN NOT MATCHED THEN
115
+ INSERT (sku, qty) VALUES (...);
116
+ `);
107
117
  });
108
118
  it('handles double-quoted identifiers', () => {
109
119
  const sql = `SELECT * FROM "Users" WHERE "Users"."Id" = 42`;
@@ -361,3 +371,115 @@ describe('SQL Analyzer - basic SQL operations', () => {
361
371
  expect(result.presentableQuery).toEqual(sql);
362
372
  });
363
373
  });
374
+ describe('SQL Analyzer - bulk INSERT VALUES cardinality reduction', () => {
375
+ it('collapses single VALUES tuple to maintain consistency', () => {
376
+ const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com')`;
377
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
378
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
379
+ expect(result.normalizedQuery).toContain("VALUES(...)");
380
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
381
+ });
382
+ it('collapses multiple VALUES tuples to reduce cardinality', () => {
383
+ const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com'), ('Bob', 'bob@example.com'), ('Charlie', 'charlie@example.com')`;
384
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
385
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
386
+ expect(result.normalizedQuery).toContain("VALUES(...)");
387
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
388
+ });
389
+ it('collapses multi-line bulk INSERT VALUES', () => {
390
+ const sql = `INSERT INTO products (name, price, category_id) VALUES
391
+ ('Laptop', 999.99, 1),
392
+ ('Mouse', 29.99, 2),
393
+ ('Keyboard', 79.99, 2),
394
+ ('Monitor', 299.99, 3)`;
395
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
396
+ expect(result.tableOperations).toEqual({ products: ['INSERT'] });
397
+ expect(result.normalizedQuery).toContain("VALUES(...)");
398
+ expect(result.presentableQuery).toEqual(`INSERT INTO products (name, price, category_id) VALUES
399
+ (...)`);
400
+ });
401
+ it('handles bulk INSERT with different spacing and formatting', () => {
402
+ const sql = `INSERT INTO logs(timestamp,level,message)VALUES('2023-01-01','info','start'),('2023-01-02','error','failed'),('2023-01-03','info','end')`;
403
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
404
+ expect(result.tableOperations).toEqual({ logs: ['INSERT'] });
405
+ expect(result.normalizedQuery).toContain("VALUES(...)");
406
+ expect(result.presentableQuery).toEqual(`INSERT INTO logs(timestamp,level,message)VALUES(...)`);
407
+ });
408
+ it('collapses REPLACE INTO with multiple VALUES tuples', () => {
409
+ const sql = `REPLACE INTO cache (key, value, expires) VALUES ('user:1', 'data1', 3600), ('user:2', 'data2', 3600)`;
410
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
411
+ expect(result.tableOperations).toEqual({ cache: ['INSERT', 'UPDATE'] });
412
+ expect(result.normalizedQuery).toContain("VALUES(...)");
413
+ expect(result.presentableQuery).toEqual(`REPLACE INTO cache (key, value, expires) VALUES (...)`);
414
+ });
415
+ it('handles bulk INSERT with complex nested values', () => {
416
+ const sql = `INSERT INTO events (data, metadata) VALUES
417
+ ('{"type":"login"}', '{"source":"web","ip":"192.168.1.1"}'),
418
+ ('{"type":"logout"}', '{"source":"mobile","ip":"10.0.0.1"}')`;
419
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
420
+ expect(result.tableOperations).toEqual({ events: ['INSERT'] });
421
+ expect(result.normalizedQuery).toContain("VALUES(...)");
422
+ expect(result.presentableQuery).toEqual(`INSERT INTO events (data, metadata) VALUES
423
+ (...)`);
424
+ });
425
+ it('preserves INSERT with subquery (not VALUES)', () => {
426
+ const sql = `INSERT INTO archive SELECT * FROM logs WHERE created < '2023-01-01'`;
427
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
428
+ expect(result.tableOperations).toEqual({
429
+ archive: ['INSERT'],
430
+ logs: ['SELECT']
431
+ });
432
+ expect(result.presentableQuery).toEqual(sql);
433
+ expect(result.normalizedQuery).not.toContain("VALUES(...)");
434
+ });
435
+ it('handles bulk INSERT with quoted identifiers', () => {
436
+ const sql = `INSERT INTO "UserProfiles" ("firstName", "lastName") VALUES ('John', 'Doe'), ('Jane', 'Smith')`;
437
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
438
+ expect(result.tableOperations).toEqual({ userprofiles: ['INSERT'] });
439
+ expect(result.normalizedQuery).toContain("VALUES(...)");
440
+ expect(result.presentableQuery).toEqual(`INSERT INTO "UserProfiles" ("firstName", "lastName") VALUES (...)`);
441
+ });
442
+ it('handles bulk INSERT with mixed value types including NULL', () => {
443
+ const sql = `INSERT INTO metrics (name, value, tags) VALUES
444
+ ('cpu_usage', 85.5, NULL),
445
+ ('memory_usage', 67.2, 'production'),
446
+ ('disk_usage', NULL, 'staging')`;
447
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
448
+ expect(result.tableOperations).toEqual({ metrics: ['INSERT'] });
449
+ expect(result.normalizedQuery).toContain("VALUES(...)");
450
+ expect(result.presentableQuery).toEqual(`INSERT INTO metrics (name, value, tags) VALUES
451
+ (...)`);
452
+ });
453
+ it('handles very large bulk INSERT (cardinality explosion scenario)', () => {
454
+ // Generate a bulk insert with many VALUES tuples to simulate real cardinality issues
455
+ const valueTuples = Array.from({ length: 100 }, (_, i) => `('user${i}', 'user${i}@example.com')`);
456
+ const sql = `INSERT INTO users (name, email) VALUES ${valueTuples.join(', ')}`;
457
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
458
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
459
+ expect(result.normalizedQuery).toContain("VALUES(...)");
460
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
461
+ // Ensure the normalized query is much shorter than the original
462
+ expect(result.normalizedQuery.length).toBeLessThan(sql.length / 2);
463
+ });
464
+ it('handles bulk INSERT with functions and expressions in VALUES', () => {
465
+ const sql = `INSERT INTO audit_log (event_time, user_id, action) VALUES
466
+ (NOW(), 1, 'login'),
467
+ (CURRENT_TIMESTAMP, 2, 'logout'),
468
+ (DATE('2023-01-01'), 3, 'update')`;
469
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
470
+ expect(result.tableOperations).toEqual({ audit_log: ['INSERT'] });
471
+ expect(result.normalizedQuery).toContain("VALUES(...)");
472
+ expect(result.presentableQuery).toEqual(`INSERT INTO audit_log (event_time, user_id, action) VALUES
473
+ (...)`);
474
+ });
475
+ it('handles bulk INSERT with parentheses in string values', () => {
476
+ const sql = `INSERT INTO comments (text, author) VALUES
477
+ ('This is a comment (with parentheses)', 'user1'),
478
+ ('Another comment (also with parens)', 'user2')`;
479
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
480
+ expect(result.tableOperations).toEqual({ comments: ['INSERT'] });
481
+ expect(result.normalizedQuery).toContain("VALUES(...)");
482
+ expect(result.presentableQuery).toEqual(`INSERT INTO comments (text, author) VALUES
483
+ (...)`);
484
+ });
485
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@comprehend/telemetry-node",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "description": "Integration of comprehend.dev with OpenTelemetry in Node.js and similar environemnts.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -538,7 +538,8 @@ function extractErrorInfo(span: ReadableSpan): {
538
538
  (attrs['exception.message'] as string | undefined) ??
539
539
  (attrs['http.error_message'] as string | undefined) ??
540
540
  (attrs['db.response.status_code'] as string | undefined) ??
541
- (isError ? (attrs['otel.status_description'] as string | undefined) : undefined);
541
+ (isError ? (attrs['otel.status_description'] as string | undefined) : undefined) ??
542
+ (isError ? (span.status.message as string | undefined) : undefined);
542
543
  const type =
543
544
  (attrs['exception.type'] as string | undefined) ??
544
545
  (attrs['error.type'] as string | undefined) ??
@@ -14,7 +14,7 @@ describe('SQL Analyzer - basic SQL operations', () => {
14
14
  const result = analyzeSQL(sql);
15
15
 
16
16
  expect(result.tableOperations).toEqual({ logs: ['INSERT'] });
17
- expect(result.presentableQuery).toEqual(sql);
17
+ expect(result.presentableQuery).toEqual(`INSERT INTO logs (message, level) VALUES (...)`);
18
18
  });
19
19
 
20
20
  it('detects an INSERT INTO ... SELECT', () => {
@@ -104,7 +104,9 @@ describe('SQL Analyzer - basic SQL operations', () => {
104
104
  });
105
105
 
106
106
  expect(result.normalizedQuery).toMatch(/replace\s+into\s+users/i);
107
- expect(result.presentableQuery).toEqual(sql);
107
+ expect(result.presentableQuery).toEqual(`
108
+ REPLACE INTO users (id, name) VALUES (...);
109
+ `);
108
110
  });
109
111
 
110
112
  it('detects operation type from MERGE WHEN clause', () => {
@@ -125,7 +127,15 @@ describe('SQL Analyzer - basic SQL operations', () => {
125
127
  });
126
128
 
127
129
  expect(result.normalizedQuery).toMatch(/merge\s+into\s+inventory/i);
128
- expect(result.presentableQuery).toEqual(sql);
130
+ expect(result.presentableQuery).toEqual(`
131
+ MERGE INTO inventory AS t
132
+ USING incoming AS s
133
+ ON t.sku = s.sku
134
+ WHEN MATCHED THEN
135
+ UPDATE SET t.qty = t.qty + s.qty
136
+ WHEN NOT MATCHED THEN
137
+ INSERT (sku, qty) VALUES (...);
138
+ `);
129
139
  });
130
140
 
131
141
  it('handles double-quoted identifiers', () => {
@@ -434,3 +444,140 @@ describe('SQL Analyzer - basic SQL operations', () => {
434
444
  expect(result.presentableQuery).toEqual(sql);
435
445
  });
436
446
  });
447
+
448
+ describe('SQL Analyzer - bulk INSERT VALUES cardinality reduction', () => {
449
+ it('collapses single VALUES tuple to maintain consistency', () => {
450
+ const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com')`;
451
+ const result = analyzeSQL(sql);
452
+
453
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
454
+ expect(result.normalizedQuery).toContain("VALUES(...)");
455
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
456
+ });
457
+
458
+ it('collapses multiple VALUES tuples to reduce cardinality', () => {
459
+ const sql = `INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com'), ('Bob', 'bob@example.com'), ('Charlie', 'charlie@example.com')`;
460
+ const result = analyzeSQL(sql);
461
+
462
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
463
+ expect(result.normalizedQuery).toContain("VALUES(...)");
464
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
465
+ });
466
+
467
+ it('collapses multi-line bulk INSERT VALUES', () => {
468
+ const sql = `INSERT INTO products (name, price, category_id) VALUES
469
+ ('Laptop', 999.99, 1),
470
+ ('Mouse', 29.99, 2),
471
+ ('Keyboard', 79.99, 2),
472
+ ('Monitor', 299.99, 3)`;
473
+ const result = analyzeSQL(sql);
474
+
475
+ expect(result.tableOperations).toEqual({ products: ['INSERT'] });
476
+ expect(result.normalizedQuery).toContain("VALUES(...)");
477
+ expect(result.presentableQuery).toEqual(`INSERT INTO products (name, price, category_id) VALUES
478
+ (...)`);
479
+ });
480
+
481
+ it('handles bulk INSERT with different spacing and formatting', () => {
482
+ const sql = `INSERT INTO logs(timestamp,level,message)VALUES('2023-01-01','info','start'),('2023-01-02','error','failed'),('2023-01-03','info','end')`;
483
+ const result = analyzeSQL(sql);
484
+
485
+ expect(result.tableOperations).toEqual({ logs: ['INSERT'] });
486
+ expect(result.normalizedQuery).toContain("VALUES(...)");
487
+ expect(result.presentableQuery).toEqual(`INSERT INTO logs(timestamp,level,message)VALUES(...)`);
488
+ });
489
+
490
+ it('collapses REPLACE INTO with multiple VALUES tuples', () => {
491
+ const sql = `REPLACE INTO cache (key, value, expires) VALUES ('user:1', 'data1', 3600), ('user:2', 'data2', 3600)`;
492
+ const result = analyzeSQL(sql);
493
+
494
+ expect(result.tableOperations).toEqual({ cache: ['INSERT', 'UPDATE'] });
495
+ expect(result.normalizedQuery).toContain("VALUES(...)");
496
+ expect(result.presentableQuery).toEqual(`REPLACE INTO cache (key, value, expires) VALUES (...)`);
497
+ });
498
+
499
+ it('handles bulk INSERT with complex nested values', () => {
500
+ const sql = `INSERT INTO events (data, metadata) VALUES
501
+ ('{"type":"login"}', '{"source":"web","ip":"192.168.1.1"}'),
502
+ ('{"type":"logout"}', '{"source":"mobile","ip":"10.0.0.1"}')`;
503
+ const result = analyzeSQL(sql);
504
+
505
+ expect(result.tableOperations).toEqual({ events: ['INSERT'] });
506
+ expect(result.normalizedQuery).toContain("VALUES(...)");
507
+ expect(result.presentableQuery).toEqual(`INSERT INTO events (data, metadata) VALUES
508
+ (...)`);
509
+ });
510
+
511
+ it('preserves INSERT with subquery (not VALUES)', () => {
512
+ const sql = `INSERT INTO archive SELECT * FROM logs WHERE created < '2023-01-01'`;
513
+ const result = analyzeSQL(sql);
514
+
515
+ expect(result.tableOperations).toEqual({
516
+ archive: ['INSERT'],
517
+ logs: ['SELECT']
518
+ });
519
+ expect(result.presentableQuery).toEqual(sql);
520
+ expect(result.normalizedQuery).not.toContain("VALUES(...)");
521
+ });
522
+
523
+ it('handles bulk INSERT with quoted identifiers', () => {
524
+ const sql = `INSERT INTO "UserProfiles" ("firstName", "lastName") VALUES ('John', 'Doe'), ('Jane', 'Smith')`;
525
+ const result = analyzeSQL(sql);
526
+
527
+ expect(result.tableOperations).toEqual({ userprofiles: ['INSERT'] });
528
+ expect(result.normalizedQuery).toContain("VALUES(...)");
529
+ expect(result.presentableQuery).toEqual(`INSERT INTO "UserProfiles" ("firstName", "lastName") VALUES (...)`);
530
+ });
531
+
532
+ it('handles bulk INSERT with mixed value types including NULL', () => {
533
+ const sql = `INSERT INTO metrics (name, value, tags) VALUES
534
+ ('cpu_usage', 85.5, NULL),
535
+ ('memory_usage', 67.2, 'production'),
536
+ ('disk_usage', NULL, 'staging')`;
537
+ const result = analyzeSQL(sql);
538
+
539
+ expect(result.tableOperations).toEqual({ metrics: ['INSERT'] });
540
+ expect(result.normalizedQuery).toContain("VALUES(...)");
541
+ expect(result.presentableQuery).toEqual(`INSERT INTO metrics (name, value, tags) VALUES
542
+ (...)`);
543
+ });
544
+
545
+ it('handles very large bulk INSERT (cardinality explosion scenario)', () => {
546
+ // Generate a bulk insert with many VALUES tuples to simulate real cardinality issues
547
+ const valueTuples = Array.from({length: 100}, (_, i) => `('user${i}', 'user${i}@example.com')`);
548
+ const sql = `INSERT INTO users (name, email) VALUES ${valueTuples.join(', ')}`;
549
+ const result = analyzeSQL(sql);
550
+
551
+ expect(result.tableOperations).toEqual({ users: ['INSERT'] });
552
+ expect(result.normalizedQuery).toContain("VALUES(...)");
553
+ expect(result.presentableQuery).toEqual(`INSERT INTO users (name, email) VALUES (...)`);
554
+
555
+ // Ensure the normalized query is much shorter than the original
556
+ expect(result.normalizedQuery.length).toBeLessThan(sql.length / 2);
557
+ });
558
+
559
+ it('handles bulk INSERT with functions and expressions in VALUES', () => {
560
+ const sql = `INSERT INTO audit_log (event_time, user_id, action) VALUES
561
+ (NOW(), 1, 'login'),
562
+ (CURRENT_TIMESTAMP, 2, 'logout'),
563
+ (DATE('2023-01-01'), 3, 'update')`;
564
+ const result = analyzeSQL(sql);
565
+
566
+ expect(result.tableOperations).toEqual({ audit_log: ['INSERT'] });
567
+ expect(result.normalizedQuery).toContain("VALUES(...)");
568
+ expect(result.presentableQuery).toEqual(`INSERT INTO audit_log (event_time, user_id, action) VALUES
569
+ (...)`);
570
+ });
571
+
572
+ it('handles bulk INSERT with parentheses in string values', () => {
573
+ const sql = `INSERT INTO comments (text, author) VALUES
574
+ ('This is a comment (with parentheses)', 'user1'),
575
+ ('Another comment (also with parens)', 'user2')`;
576
+ const result = analyzeSQL(sql);
577
+
578
+ expect(result.tableOperations).toEqual({ comments: ['INSERT'] });
579
+ expect(result.normalizedQuery).toContain("VALUES(...)");
580
+ expect(result.presentableQuery).toEqual(`INSERT INTO comments (text, author) VALUES
581
+ (...)`);
582
+ });
583
+ });
@@ -16,14 +16,18 @@ export interface SQLAnalysisResult {
16
16
  /** Performs a rough tokenization of the SQL, extracts the tables involved and the operations on them, and
17
17
  * produces two versions of the query:
18
18
  * - A normalized version for hashing purposes that does not account for whitespace, comments, and collapses
19
- * IN clauses that might cause a cardinality explosion.
20
- * - A presentable version that only does the IN clause collapsing */
19
+ * IN clauses and VALUES clauses that might cause a cardinality explosion.
20
+ * - A presentable version that only does the IN clause and VALUES clause collapsing */
21
21
  export function analyzeSQL(sql: string): SQLAnalysisResult {
22
22
  let semanticTokens = new Array<Token>();
23
23
  let presentableTokens = new Array<Token>();
24
24
  let seekingInParen = false;
25
25
  let analyzingIn = false;
26
26
  let skippingIn = false;
27
+ let seekingValuesParen = false;
28
+ let skippingValues = false;
29
+ let lookingForCommaOrEnd = false;
30
+ let valuesDepth = 0;
27
31
  for (let token of tokenizeSQL(sql)) {
28
32
  switch (token.type) {
29
33
  case "whitespace":
@@ -87,9 +91,79 @@ export function analyzeSQL(sql: string): SQLAnalysisResult {
87
91
  skippingIn = false;
88
92
  }
89
93
  }
94
+ else if (seekingValuesParen) {
95
+ // We saw VALUES, and now look for an opening (. Skip whitespace/comments, bail if anything else.
96
+ switch (token.type) {
97
+ case "comment":
98
+ case "whitespace":
99
+ presentableTokens.push(token);
100
+ break;
101
+ case "punct":
102
+ if (token.value === "(") {
103
+ // Just add the opening paren, "..." and closing paren - preserve original spacing
104
+ presentableTokens.push(token);
105
+ presentableTokens.push({ type: "unknown", value: "..." });
106
+ presentableTokens.push({ type: "punct", value: ")" });
107
+ seekingValuesParen = false;
108
+ skippingValues = true;
109
+ valuesDepth = 1;
110
+ } else {
111
+ // Not what we expected, go back to normal processing
112
+ presentableTokens.push(token);
113
+ seekingValuesParen = false;
114
+ }
115
+ break;
116
+ default:
117
+ // Not what we expected, go back to normal processing
118
+ presentableTokens.push(token);
119
+ seekingValuesParen = false;
120
+ break;
121
+ }
122
+ }
123
+ else if (skippingValues) {
124
+ // Skip everything until we've consumed all VALUES tuples
125
+ if (token.type === "punct") {
126
+ if (token.value === "(") {
127
+ valuesDepth++;
128
+ } else if (token.value === ")") {
129
+ valuesDepth--;
130
+ if (valuesDepth === 0) {
131
+ // This closes a tuple, check for comma indicating more tuples
132
+ lookingForCommaOrEnd = true;
133
+ skippingValues = false;
134
+ }
135
+ }
136
+ }
137
+ }
138
+ else if (lookingForCommaOrEnd) {
139
+ // After closing a VALUES tuple, look for comma (more tuples) or end of VALUES
140
+ switch (token.type) {
141
+ case "comment":
142
+ case "whitespace":
143
+ // Skip whitespace/comments while looking for comma or end
144
+ break;
145
+ case "punct":
146
+ if (token.value === ",") {
147
+ // More tuples coming, continue skipping
148
+ lookingForCommaOrEnd = false;
149
+ skippingValues = true;
150
+ } else {
151
+ // Not a comma, so VALUES clause is done
152
+ presentableTokens.push(token);
153
+ lookingForCommaOrEnd = false;
154
+ }
155
+ break;
156
+ default:
157
+ // VALUES clause is done, resume normal processing
158
+ presentableTokens.push(token);
159
+ lookingForCommaOrEnd = false;
160
+ break;
161
+ }
162
+ }
90
163
  else {
91
164
  presentableTokens.push(token);
92
165
  seekingInParen = token.type === "keyword" && token.value.toUpperCase() === "IN";
166
+ seekingValuesParen = token.type === "keyword" && token.value.toUpperCase() === "VALUES";
93
167
  }
94
168
  }
95
169
  return {
@@ -288,6 +362,46 @@ export function analyzeSQLTokens(tokens: Token[]) {
288
362
  }
289
363
  }
290
364
 
365
+ // Normalize VALUES (...) clauses
366
+ if (token.type === 'keyword' && token.value === 'VALUES') {
367
+ if (tokens[i + 1]?.value === '(') {
368
+ appendToken('VALUES', 'keyword');
369
+ appendToken('(', 'punct');
370
+ appendToken('...', 'identifier');
371
+ appendToken(')', 'punct');
372
+
373
+ // Skip all VALUES tuples including comma-separated ones
374
+ let depth = 0;
375
+ let j = i + 1;
376
+ while (j < tokens.length) {
377
+ if (tokens[j].value === '(') {
378
+ depth++;
379
+ } else if (tokens[j].value === ')') {
380
+ depth--;
381
+ if (depth === 0) {
382
+ // Check if there's a comma after this closing paren (more tuples)
383
+ let k = j + 1;
384
+ while (k < tokens.length && (tokens[k].type === 'whitespace' || tokens[k].type === 'comment')) {
385
+ k++;
386
+ }
387
+ if (tokens[k]?.value === ',') {
388
+ // More tuples, continue skipping
389
+ j = k + 1;
390
+ continue;
391
+ } else {
392
+ // No more tuples, we're done
393
+ break;
394
+ }
395
+ }
396
+ }
397
+ j++;
398
+ }
399
+
400
+ i = j + 1;
401
+ continue;
402
+ }
403
+ }
404
+
291
405
  appendToken(token.value, token.type);
292
406
  i++;
293
407
  }