@comprehend/telemetry-node 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,287 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.analyzeSQL = analyzeSQL;
4
+ exports.analyzeSQLTokens = analyzeSQLTokens;
5
+ const KEYWORDS = new Set([
6
+ 'SELECT', 'FROM', 'WHERE', 'INSERT', 'REPLACE', 'INTO', 'VALUES', 'DELETE', 'UPDATE',
7
+ 'MERGE', 'SET', 'JOIN', 'LEFT', 'RIGHT', 'FULL', 'OUTER', 'INNER', 'ON', 'AS', 'AND', 'OR',
8
+ 'NOT', 'IS', 'NULL', 'IN', 'WITH', 'RECURSIVE', 'UNION', 'ALL',
9
+ 'GROUP', 'BY', 'HAVING', 'ORDER', 'LIMIT', 'OFFSET', 'LATERAL', 'USING'
10
+ ]);
11
+ /** Performs a rough tokenization of the SQL, extracts the tables involved and the operations on them, and
12
+ * produces two versions of the query:
13
+ * - A normalized version for hashing purposes that does not account for whitespace, comments, and collapses
14
+ * IN clauses that might cause a cardinality explosion.
15
+ * - A presentable version that only does the IN clause collapsing */
16
+ function analyzeSQL(sql) {
17
+ let semanticTokens = new Array();
18
+ let presentableTokens = new Array();
19
+ let seekingInParen = false;
20
+ let analyzingIn = false;
21
+ let skippingIn = false;
22
+ for (let token of tokenizeSQL(sql)) {
23
+ switch (token.type) {
24
+ case "whitespace":
25
+ case "comment":
26
+ case "id-quote":
27
+ // Skip
28
+ break;
29
+ case "keyword":
30
+ // Normalize to uppercase.
31
+ semanticTokens.push({ type: "keyword", value: token.value.toUpperCase() });
32
+ break;
33
+ case "identifier":
34
+ // Normalize to lowercase.
35
+ semanticTokens.push({ type: "identifier", value: token.value.toLowerCase() });
36
+ break;
37
+ default:
38
+ semanticTokens.push(token);
39
+ break;
40
+ }
41
+ if (seekingInParen) {
42
+ // We saw IN, and now look for an opening (. Skip whitespace/comments, bail if anything else.
43
+ presentableTokens.push(token);
44
+ switch (token.type) {
45
+ case "comment":
46
+ case "whitespace":
47
+ break;
48
+ case "punct":
49
+ seekingInParen = false;
50
+ analyzingIn = token.value === "(";
51
+ break;
52
+ default:
53
+ seekingInParen = false;
54
+ break;
55
+ }
56
+ }
57
+ else if (analyzingIn) {
58
+ // We saw the opening paren of an IN. Pass over whitespace and comments. If we see a
59
+ // keyword we know it's not something to collapse, it's a sub-query. Otherwise, we
60
+ // enter skipping mode.
61
+ switch (token.type) {
62
+ case "comment":
63
+ case "whitespace":
64
+ presentableTokens.push(token);
65
+ break;
66
+ case "keyword":
67
+ case "punct": // maybe immediate ), certainly not a value
68
+ presentableTokens.push(token);
69
+ analyzingIn = false;
70
+ break;
71
+ default:
72
+ analyzingIn = false;
73
+ skippingIn = true;
74
+ presentableTokens.push({ type: "unknown", value: "..." });
75
+ }
76
+ }
77
+ else if (skippingIn) {
78
+ // Omit tokens until a closing ).
79
+ if (token.type === "punct" && token.value === ")") {
80
+ presentableTokens.push(token);
81
+ skippingIn = false;
82
+ }
83
+ }
84
+ else {
85
+ presentableTokens.push(token);
86
+ seekingInParen = token.type === "keyword" && token.value.toUpperCase() === "IN";
87
+ }
88
+ }
89
+ return {
90
+ ...analyzeSQLTokens(semanticTokens),
91
+ presentableQuery: presentableTokens.map(t => t.value).join("")
92
+ };
93
+ }
94
+ function* tokenizeSQL(sql) {
95
+ const regex = /(--[^\n]*|\/\*[\s\S]*?\*\/)|('[^']*')|("(?:[^"]*)")|(`[^`]*`)|(\[[^\]]+\])|(\b[a-zA-Z_][\w$]*\b)|([(),;])|(<=|>=|<>|!=|=|<|>)|(\s+)|(\S)/g;
96
+ let match;
97
+ while ((match = regex.exec(sql))) {
98
+ const [full, comment, // 1
99
+ singleQuoted, // 2: string literal
100
+ doubleQuoted, // 3: "identifier"
101
+ backtickQuoted, // 4: `identifier`
102
+ bracketQuoted, // 5: [identifier]
103
+ word, // 6
104
+ punct, // 7
105
+ operator, // 8
106
+ ws, // 9
107
+ unknown // 10
108
+ ] = match;
109
+ if (comment) {
110
+ yield { type: 'comment', value: comment };
111
+ }
112
+ else if (singleQuoted) {
113
+ yield { type: 'string', value: singleQuoted };
114
+ }
115
+ else if (doubleQuoted) {
116
+ yield { type: 'id-quote', value: '"' };
117
+ yield { type: 'identifier', value: doubleQuoted.slice(1, -1) };
118
+ yield { type: 'id-quote', value: '"' };
119
+ }
120
+ else if (backtickQuoted) {
121
+ yield { type: 'id-quote', value: '`' };
122
+ yield { type: 'identifier', value: backtickQuoted.slice(1, -1) };
123
+ yield { type: 'id-quote', value: '`' };
124
+ }
125
+ else if (bracketQuoted) {
126
+ yield { type: 'id-quote', value: '[' };
127
+ yield { type: 'identifier', value: bracketQuoted.slice(1, -1) };
128
+ yield { type: 'id-quote', value: ']' };
129
+ }
130
+ else if (word) {
131
+ yield { type: KEYWORDS.has(word.toUpperCase()) ? 'keyword' : "identifier", value: word };
132
+ }
133
+ else if (punct) {
134
+ yield { type: 'punct', value: punct };
135
+ }
136
+ else if (operator) {
137
+ yield { type: 'operator', value: operator };
138
+ }
139
+ else if (ws) {
140
+ yield { type: 'whitespace', value: ws };
141
+ }
142
+ else if (unknown) {
143
+ yield { type: 'unknown', value: unknown };
144
+ }
145
+ }
146
+ }
147
+ function analyzeSQLTokens(tokens) {
148
+ const aliasNames = new Set();
149
+ const tableOps = {};
150
+ const normalizedTokens = [];
151
+ let currentOp = null;
152
+ let lastTokenType = null;
153
+ function appendToken(val, type) {
154
+ if (normalizedTokens.length && type !== 'punct' && lastTokenType !== 'punct') {
155
+ normalizedTokens.push(' ');
156
+ }
157
+ normalizedTokens.push(val);
158
+ lastTokenType = type;
159
+ }
160
+ for (let i = 0; i < tokens.length;) {
161
+ const token = tokens[i];
162
+ // Record operation context
163
+ if (token.type === 'keyword' && ['SELECT', 'INSERT', 'UPDATE', 'DELETE'].includes(token.value)) {
164
+ currentOp = { ops: [token.value.toUpperCase()], at: i };
165
+ }
166
+ else if (token.type === "keyword" && token.value === 'USING') {
167
+ currentOp = { ops: ["SELECT"], at: i };
168
+ }
169
+ else if (token.type === "keyword" && token.value === 'REPLACE') {
170
+ currentOp = { ops: ["INSERT", "UPDATE"], at: i };
171
+ }
172
+ else if (token.type === "keyword" && token.value === 'MERGE') {
173
+ let sawInsert = false;
174
+ let sawUpdate = false;
175
+ let sawDelete = false;
176
+ for (let j = i + 1; j < tokens.length; j++) {
177
+ if (tokens[j].type === "keyword") {
178
+ if (tokens[j].value === "INSERT")
179
+ sawInsert = true;
180
+ if (tokens[j].value === "UPDATE")
181
+ sawUpdate = true;
182
+ if (tokens[j].value === "DELETE")
183
+ sawDelete = true;
184
+ }
185
+ }
186
+ currentOp = { ops: [], at: i };
187
+ if (sawInsert)
188
+ currentOp.ops.push("INSERT");
189
+ if (sawUpdate)
190
+ currentOp.ops.push("UPDATE");
191
+ if (sawDelete)
192
+ currentOp.ops.push("DELETE");
193
+ }
194
+ // Detect CTE-style alias: <identifier> AS (
195
+ if (token.type === 'identifier' &&
196
+ tokens[i + 1]?.type === 'keyword' &&
197
+ tokens[i + 1].value === 'AS' &&
198
+ tokens[i + 2]?.value === '(') {
199
+ const alias = token.value.toLowerCase();
200
+ aliasNames.add(alias);
201
+ appendToken(token.value, token.type);
202
+ appendToken('AS', 'keyword');
203
+ appendToken('(', 'punct');
204
+ i += 3;
205
+ continue;
206
+ }
207
+ // Detect AS <alias> (table aliases, subquery aliases, etc.)
208
+ if (token.type === 'keyword' &&
209
+ token.value.toUpperCase() === 'AS' &&
210
+ tokens[i + 1]?.type === 'identifier') {
211
+ const alias = tokens[i + 1].value.toLowerCase();
212
+ aliasNames.add(alias);
213
+ appendToken(token.value, token.type);
214
+ appendToken(tokens[i + 1].value, tokens[i + 1].type);
215
+ i += 2;
216
+ continue;
217
+ }
218
+ // Record table name if in FROM, JOIN, INTO, UPDATE
219
+ if (token.type === 'keyword' &&
220
+ ['FROM', 'JOIN', 'INTO', 'UPDATE', 'USING'].includes(token.value) &&
221
+ tokens[i + 1]?.type === 'identifier' &&
222
+ !(['FROM', 'JOIN', 'USING'].includes(token.value) && tokens[i + 2]?.value === "(") // functions
223
+ ) {
224
+ const table = tokens[i + 1].value.toLowerCase();
225
+ if (currentOp && !aliasNames.has(table) && hasBalancedParens(tokens, currentOp.at, i)) {
226
+ tableOps[table] || (tableOps[table] = new Set());
227
+ for (let op of currentOp.ops) {
228
+ tableOps[table].add(op);
229
+ }
230
+ }
231
+ }
232
+ // Normalize IN (...) clauses
233
+ if (token.type === 'keyword' && token.value === 'IN') {
234
+ if (tokens[i + 1]?.value === '(' &&
235
+ tokens[i + 2] // make sure something exists inside
236
+ ) {
237
+ appendToken('IN', 'keyword');
238
+ appendToken('(', 'punct');
239
+ const firstInside = tokens[i + 2];
240
+ if (firstInside.type === 'keyword') {
241
+ // Subquery → parse normally
242
+ i += 2;
243
+ continue;
244
+ }
245
+ else {
246
+ // Literal list → collapse
247
+ appendToken('...', 'identifier');
248
+ // Skip until matching ')'
249
+ let depth = 1;
250
+ let j = i + 3;
251
+ while (j < tokens.length && depth > 0) {
252
+ if (tokens[j].value === '(')
253
+ depth++;
254
+ else if (tokens[j].value === ')')
255
+ depth--;
256
+ j++;
257
+ }
258
+ appendToken(')', 'punct');
259
+ i = j;
260
+ continue;
261
+ }
262
+ }
263
+ }
264
+ appendToken(token.value, token.type);
265
+ i++;
266
+ }
267
+ return {
268
+ tableOperations: Object.fromEntries(Object.entries(tableOps).map(([k, v]) => [k, Array.from(v)])),
269
+ normalizedQuery: normalizedTokens.join('')
270
+ };
271
+ }
272
+ function hasBalancedParens(tokens, start, end) {
273
+ let balance = 0;
274
+ for (let i = start; i < end; i++) {
275
+ const token = tokens[i];
276
+ if (token.type === 'punct') {
277
+ if (token.value === '(')
278
+ balance++;
279
+ else if (token.value === ')')
280
+ balance--;
281
+ }
282
+ // Early exit: unbalanced in wrong direction
283
+ if (balance < 0)
284
+ return false;
285
+ }
286
+ return balance === 0;
287
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,363 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const sql_analyzer_1 = require("./sql-analyzer");
4
+ describe('SQL Analyzer - basic SQL operations', () => {
5
+ it('detects a simple SELECT from one table', () => {
6
+ const sql = `SELECT * FROM users`;
7
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
8
+ expect(result.tableOperations).toEqual({ users: ['SELECT'] });
9
+ expect(result.presentableQuery).toEqual(sql);
10
+ });
11
+ it('detects an INSERT INTO ... VALUES', () => {
12
+ const sql = `INSERT INTO logs (message, level) VALUES ('hi', 'info')`;
13
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
14
+ expect(result.tableOperations).toEqual({ logs: ['INSERT'] });
15
+ expect(result.presentableQuery).toEqual(sql);
16
+ });
17
+ it('detects an INSERT INTO ... SELECT', () => {
18
+ const sql = `INSERT INTO archive SELECT * FROM logs`;
19
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
20
+ expect(result.tableOperations).toEqual({
21
+ archive: ['INSERT'],
22
+ logs: ['SELECT']
23
+ });
24
+ expect(result.presentableQuery).toEqual(sql);
25
+ });
26
+ it('detects a simple UPDATE', () => {
27
+ const sql = `UPDATE users SET last_login = NOW() WHERE id = 1`;
28
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
29
+ expect(result.tableOperations).toEqual({ users: ['UPDATE'] });
30
+ expect(result.presentableQuery).toEqual(sql);
31
+ });
32
+ it('detects a simple DELETE', () => {
33
+ const sql = `DELETE FROM sessions WHERE expired = true`;
34
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
35
+ expect(result.tableOperations).toEqual({ sessions: ['DELETE'] });
36
+ expect(result.presentableQuery).toEqual(sql);
37
+ });
38
+ it('detects tables in DELETE ... USING clause', () => {
39
+ const sql = `
40
+ DELETE FROM sessions
41
+ USING users
42
+ WHERE sessions.user_id = users.id
43
+ `;
44
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
45
+ expect(result.tableOperations).toEqual({
46
+ sessions: ['DELETE'],
47
+ users: ['SELECT'],
48
+ });
49
+ expect(result.normalizedQuery).toMatch(/delete\s+from\s+sessions/i);
50
+ expect(result.normalizedQuery).toMatch(/using\s+users/i);
51
+ expect(result.presentableQuery).toEqual(sql);
52
+ });
53
+ it('detects tables with aliases in FROM clause', () => {
54
+ const sql = `
55
+ SELECT u.id, u.name
56
+ FROM users u
57
+ WHERE u.active = true
58
+ `;
59
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
60
+ expect(result.tableOperations).toEqual({
61
+ users: ['SELECT'],
62
+ });
63
+ expect(result.normalizedQuery).toMatch(/from\s+users/i);
64
+ expect(result.presentableQuery).toEqual(sql);
65
+ });
66
+ it('handles multiple operations on the same table', () => {
67
+ const sql = `
68
+ INSERT INTO stats (user_id, value)
69
+ SELECT id, 42 FROM users;
70
+ UPDATE stats SET value = 99 WHERE value < 50;
71
+ `;
72
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
73
+ expect(result.tableOperations).toEqual({
74
+ stats: ['INSERT', 'UPDATE'],
75
+ users: ['SELECT']
76
+ });
77
+ expect(result.presentableQuery).toEqual(sql);
78
+ });
79
+ it('detects tables in REPLACE INTO statements', () => {
80
+ const sql = `
81
+ REPLACE INTO users (id, name) VALUES (1, 'Alice');
82
+ `;
83
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
84
+ expect(result.tableOperations).toEqual({
85
+ users: ['INSERT', 'UPDATE'],
86
+ });
87
+ expect(result.normalizedQuery).toMatch(/replace\s+into\s+users/i);
88
+ expect(result.presentableQuery).toEqual(sql);
89
+ });
90
+ it('detects operation type from MERGE WHEN clause', () => {
91
+ const sql = `
92
+ MERGE INTO inventory AS t
93
+ USING incoming AS s
94
+ ON t.sku = s.sku
95
+ WHEN MATCHED THEN
96
+ UPDATE SET t.qty = t.qty + s.qty
97
+ WHEN NOT MATCHED THEN
98
+ INSERT (sku, qty) VALUES (s.sku, s.qty);
99
+ `;
100
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
101
+ expect(result.tableOperations).toEqual({
102
+ inventory: ['INSERT', 'UPDATE'],
103
+ incoming: ['SELECT'],
104
+ });
105
+ expect(result.normalizedQuery).toMatch(/merge\s+into\s+inventory/i);
106
+ expect(result.presentableQuery).toEqual(sql);
107
+ });
108
+ it('handles double-quoted identifiers', () => {
109
+ const sql = `SELECT * FROM "Users" WHERE "Users"."Id" = 42`;
110
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
111
+ expect(result.tableOperations).toEqual({ users: ['SELECT'] });
112
+ expect(result.normalizedQuery).toMatch(/from\s+users/i);
113
+ expect(result.presentableQuery).toEqual(sql);
114
+ });
115
+ it('handles backtick-quoted identifiers (MySQL style)', () => {
116
+ const sql = 'SELECT * FROM `auditLogs` WHERE `eventType` = "login"';
117
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
118
+ expect(result.tableOperations).toEqual({ auditlogs: ['SELECT'] });
119
+ expect(result.normalizedQuery).toMatch(/from\s+auditlogs/i);
120
+ expect(result.presentableQuery).toEqual(sql);
121
+ });
122
+ it('handles bracket-quoted identifiers (SQL Server style)', () => {
123
+ const sql = `SELECT [userId], [userName] FROM [Accounts]`;
124
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
125
+ expect(result.tableOperations).toEqual({ accounts: ['SELECT'] });
126
+ expect(result.normalizedQuery).toMatch(/from\s+accounts/i);
127
+ expect(result.presentableQuery).toEqual(sql);
128
+ });
129
+ it('normalizes quoted table names and columns to lowercase', () => {
130
+ const sql = `SELECT "ID", "Email" FROM "Customer"`;
131
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
132
+ expect(result.tableOperations).toEqual({ customer: ['SELECT'] });
133
+ expect(result.normalizedQuery).toMatch(/from\s+customer/i);
134
+ expect(result.presentableQuery).toEqual(sql);
135
+ });
136
+ it('detects real tables from subqueries in the FROM clause', () => {
137
+ const sql = `
138
+ SELECT sq.name
139
+ FROM (
140
+ SELECT name FROM employees WHERE active = true
141
+ ) sq
142
+ `;
143
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
144
+ expect(result.tableOperations).toEqual({
145
+ employees: ['SELECT'],
146
+ });
147
+ expect(result.normalizedQuery).toMatch(/from\s+employees/i);
148
+ expect(result.presentableQuery).toEqual(sql);
149
+ });
150
+ it('does not treat FROM function() as a table', () => {
151
+ const sql = `
152
+ SELECT * FROM get_active_users();
153
+ `;
154
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
155
+ expect(result.tableOperations).toEqual({});
156
+ expect(result.normalizedQuery).toMatch(/from\s+get_active_users\s*\(\)/i);
157
+ expect(result.presentableQuery).toEqual(sql);
158
+ });
159
+ it('detects simple inner join', () => {
160
+ const sql = `
161
+ SELECT * FROM users
162
+ JOIN orders ON users.id = orders.user_id
163
+ `;
164
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
165
+ expect(result.tableOperations).toEqual({
166
+ users: ['SELECT'],
167
+ orders: ['SELECT'],
168
+ });
169
+ });
170
+ it('detects LEFT JOIN', () => {
171
+ const sql = `
172
+ SELECT u.name, o.total
173
+ FROM users u
174
+ LEFT JOIN orders o ON u.id = o.user_id
175
+ `;
176
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
177
+ expect(result.tableOperations).toEqual({
178
+ users: ['SELECT'],
179
+ orders: ['SELECT'],
180
+ });
181
+ });
182
+ it('detects RIGHT JOIN', () => {
183
+ const sql = `
184
+ SELECT * FROM payments
185
+ RIGHT JOIN invoices ON payments.invoice_id = invoices.id
186
+ `;
187
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
188
+ expect(result.tableOperations).toEqual({
189
+ payments: ['SELECT'],
190
+ invoices: ['SELECT'],
191
+ });
192
+ });
193
+ it('detects FULL OUTER JOIN', () => {
194
+ const sql = `
195
+ SELECT * FROM logs l
196
+ FULL OUTER JOIN metrics m ON l.time = m.time
197
+ `;
198
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
199
+ expect(result.tableOperations).toEqual({
200
+ logs: ['SELECT'],
201
+ metrics: ['SELECT'],
202
+ });
203
+ });
204
+ it('detects JOIN with subquery alias', () => {
205
+ const sql = `
206
+ SELECT * FROM users u
207
+ JOIN (SELECT * FROM events WHERE type = 'login') e ON u.id = e.user_id
208
+ `;
209
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
210
+ expect(result.tableOperations).toEqual({
211
+ users: ['SELECT'],
212
+ events: ['SELECT'],
213
+ });
214
+ });
215
+ it('ignores subquery alias after JOIN', () => {
216
+ const sql = `
217
+ SELECT * FROM users
218
+ JOIN (SELECT * FROM sessions) AS s ON users.id = s.user_id
219
+ `;
220
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
221
+ expect(result.tableOperations).toEqual({
222
+ users: ['SELECT'],
223
+ sessions: ['SELECT'],
224
+ });
225
+ });
226
+ it('handles JOIN with quoted identifiers', () => {
227
+ const sql = `
228
+ SELECT * FROM "userData"
229
+ JOIN "auditLogs" ON "userData"."id" = "auditLogs"."userId"
230
+ `;
231
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
232
+ expect(result.tableOperations).toEqual({
233
+ userdata: ['SELECT'],
234
+ auditlogs: ['SELECT'],
235
+ });
236
+ });
237
+ it('detects tables involved in LATERAL JOINs', () => {
238
+ const sql = `
239
+ SELECT u.id, r.*
240
+ FROM users u
241
+ LEFT JOIN LATERAL (
242
+ SELECT * FROM reports WHERE reports.user_id = u.id ORDER BY created_at DESC LIMIT 1
243
+ ) r ON true
244
+ `;
245
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
246
+ expect(result.tableOperations).toEqual({
247
+ users: ['SELECT'],
248
+ reports: ['SELECT'],
249
+ });
250
+ expect(result.normalizedQuery).toMatch(/join\s+lateral/i);
251
+ expect(result.normalizedQuery).toMatch(/from\s+reports/i);
252
+ });
253
+ it('collapses IN clauses with values to avoid cardinality explosion', () => {
254
+ const sql = `SELECT name FROM users WHERE id IN (1, 2, 3)`;
255
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
256
+ expect(result.tableOperations).toEqual({ users: ['SELECT'] });
257
+ expect(result.normalizedQuery).toContain('IN(...)');
258
+ expect(result.presentableQuery).toEqual(`SELECT name FROM users WHERE id IN (...)`);
259
+ });
260
+ it('preserves and analyzes subquery in IN clause', () => {
261
+ const sql = `SELECT * FROM users WHERE id IN (SELECT user_id FROM events)`;
262
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
263
+ expect(result.tableOperations).toEqual({ users: ['SELECT'], events: ['SELECT'] });
264
+ expect(result.normalizedQuery).toMatch(/IN\s*\(.*SELECT.*\)/i);
265
+ expect(result.presentableQuery).toEqual(sql);
266
+ });
267
+ it('ignores CTEs in table detection', () => {
268
+ const sql = `
269
+ WITH recent_orders AS (
270
+ SELECT * FROM orders
271
+ )
272
+ SELECT * FROM recent_orders JOIN users ON users.id = recent_orders.user_id
273
+ `;
274
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
275
+ expect(result.tableOperations).not.toHaveProperty('recent_orders');
276
+ expect(result.tableOperations).toEqual({ orders: ['SELECT'], users: ['SELECT'] });
277
+ });
278
+ it('handles multiple CTEs and ignores them as tables', () => {
279
+ const sql = `
280
+ WITH active_users AS (
281
+ SELECT * FROM users WHERE active = true
282
+ ),
283
+ recent_logins AS (
284
+ SELECT * FROM logins WHERE login_time > NOW() - INTERVAL '7 days'
285
+ )
286
+ SELECT au.id, rl.login_time
287
+ FROM active_users au
288
+ JOIN recent_logins rl ON au.id = rl.user_id
289
+ JOIN sessions s ON s.user_id = au.id
290
+ `;
291
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
292
+ expect(result.tableOperations).toEqual({
293
+ users: ['SELECT'],
294
+ logins: ['SELECT'],
295
+ sessions: ['SELECT'],
296
+ });
297
+ expect(result.tableOperations).not.toHaveProperty('active_users');
298
+ expect(result.tableOperations).not.toHaveProperty('recent_logins');
299
+ // Also check normalization keeps the CTEs in the output
300
+ expect(result.normalizedQuery).toMatch(/with\s+active_users/i);
301
+ expect(result.normalizedQuery).toMatch(/recent_logins/i);
302
+ });
303
+ it('handles multiple quoted CTEs and real quoted table names', () => {
304
+ const sql = `
305
+ WITH "ActiveUsers" AS (
306
+ SELECT * FROM "Users" WHERE "Active" = true
307
+ ),
308
+ [RecentLogins] AS (
309
+ SELECT * FROM [Logins] WHERE [LoginTime] > NOW() - INTERVAL '7 days'
310
+ )
311
+ SELECT au."Id", rl."LoginTime"
312
+ FROM "ActiveUsers" au
313
+ JOIN [RecentLogins] rl ON au."Id" = rl."UserId"
314
+ JOIN \`Sessions\` s ON s.\`UserId\` = au."Id"
315
+ `;
316
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
317
+ expect(result.tableOperations).toEqual({
318
+ users: ['SELECT'],
319
+ logins: ['SELECT'],
320
+ sessions: ['SELECT'],
321
+ });
322
+ expect(result.tableOperations).not.toHaveProperty('activeusers');
323
+ expect(result.tableOperations).not.toHaveProperty('recentlogins');
324
+ expect(result.normalizedQuery).toMatch(/with\s+activeusers/i);
325
+ expect(result.normalizedQuery).toMatch(/recentlogins/i);
326
+ expect(result.normalizedQuery).toMatch(/from\s+activeusers/i);
327
+ expect(result.normalizedQuery).toMatch(/join\s+sessions/i);
328
+ });
329
+ it('handles recursive CTEs and ignores the CTE alias as a table', () => {
330
+ const sql = `
331
+ WITH RECURSIVE descendants AS (
332
+ SELECT id, parent_id FROM categories WHERE parent_id IS NULL
333
+ UNION ALL
334
+ SELECT c.id, c.parent_id
335
+ FROM categories c
336
+ JOIN descendants d ON c.parent_id = d.id
337
+ )
338
+ SELECT * FROM descendants;
339
+ `;
340
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
341
+ expect(result.tableOperations).toEqual({
342
+ categories: ['SELECT'],
343
+ });
344
+ expect(result.tableOperations).not.toHaveProperty('descendants');
345
+ // Normalization check (ensure RECURSIVE appears)
346
+ expect(result.normalizedQuery).toMatch(/with\s+recursive\s+descendants/i);
347
+ expect(result.normalizedQuery).toMatch(/join\s+descendants/i);
348
+ });
349
+ it('ignores function argument in EXTRACT() when detecting tables', () => {
350
+ const sql = `
351
+ SELECT id, extract('epoch' FROM created) AS time, actor, changes
352
+ FROM transactions
353
+ WHERE transactions.id = $1
354
+ `;
355
+ const result = (0, sql_analyzer_1.analyzeSQL)(sql);
356
+ expect(result.tableOperations).toEqual({
357
+ transactions: ['SELECT']
358
+ });
359
+ // Bonus assertion: normalized query includes EXTRACT(...) intact
360
+ expect(result.normalizedQuery).toMatch(/extract\s*\(.*from.*created.*\)/i);
361
+ expect(result.presentableQuery).toEqual(sql);
362
+ });
363
+ });