@comprehend/telemetry-node 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/.claude/settings.local.json +2 -1
  2. package/.idea/telemetry-node.iml +0 -1
  3. package/DEVELOPMENT.md +69 -0
  4. package/README.md +173 -0
  5. package/dist/ComprehendDevSpanProcessor.d.ts +9 -6
  6. package/dist/ComprehendDevSpanProcessor.js +146 -87
  7. package/dist/ComprehendDevSpanProcessor.test.d.ts +1 -0
  8. package/dist/ComprehendDevSpanProcessor.test.js +495 -0
  9. package/dist/ComprehendMetricsExporter.d.ts +18 -0
  10. package/dist/ComprehendMetricsExporter.js +178 -0
  11. package/dist/ComprehendMetricsExporter.test.d.ts +1 -0
  12. package/dist/ComprehendMetricsExporter.test.js +266 -0
  13. package/dist/ComprehendSDK.d.ts +18 -0
  14. package/dist/ComprehendSDK.js +56 -0
  15. package/dist/ComprehendSDK.test.d.ts +1 -0
  16. package/dist/ComprehendSDK.test.js +126 -0
  17. package/dist/WebSocketConnection.d.ts +23 -3
  18. package/dist/WebSocketConnection.js +106 -12
  19. package/dist/WebSocketConnection.test.d.ts +1 -0
  20. package/dist/WebSocketConnection.test.js +473 -0
  21. package/dist/index.d.ts +3 -1
  22. package/dist/index.js +5 -1
  23. package/dist/sql-analyzer.js +2 -11
  24. package/dist/sql-analyzer.test.js +0 -12
  25. package/dist/util.d.ts +2 -0
  26. package/dist/util.js +7 -0
  27. package/dist/wire-protocol.d.ts +168 -28
  28. package/jest.config.js +1 -0
  29. package/package.json +4 -2
  30. package/src/ComprehendDevSpanProcessor.test.ts +626 -0
  31. package/src/ComprehendDevSpanProcessor.ts +170 -105
  32. package/src/ComprehendMetricsExporter.test.ts +334 -0
  33. package/src/ComprehendMetricsExporter.ts +225 -0
  34. package/src/ComprehendSDK.test.ts +160 -0
  35. package/src/ComprehendSDK.ts +63 -0
  36. package/src/WebSocketConnection.test.ts +616 -0
  37. package/src/WebSocketConnection.ts +135 -13
  38. package/src/index.ts +3 -2
  39. package/src/util.ts +6 -0
  40. package/src/wire-protocol.ts +204 -29
  41. package/src/sql-analyzer.test.ts +0 -599
  42. package/src/sql-analyzer.ts +0 -439
@@ -1,439 +0,0 @@
1
- type Token = { type: 'keyword' | 'identifier' | 'id-quote' | 'string' | 'comment' | 'punct' | 'operator' | 'whitespace' | 'unknown'; value: string };
2
-
3
- const KEYWORDS = new Set([
4
- 'SELECT', 'FROM', 'WHERE', 'INSERT', 'REPLACE', 'INTO', 'VALUES', 'DELETE', 'UPDATE',
5
- 'MERGE', 'SET', 'JOIN', 'LEFT', 'RIGHT', 'FULL', 'OUTER', 'INNER', 'ON', 'AS', 'AND', 'OR',
6
- 'NOT', 'IS', 'NULL', 'IN', 'WITH', 'RECURSIVE', 'UNION', 'ALL',
7
- 'GROUP', 'BY', 'HAVING', 'ORDER', 'LIMIT', 'OFFSET', 'LATERAL', 'USING'
8
- ]);
9
-
10
- export interface SQLAnalysisResult {
11
- tableOperations: Record<string, string[]>;
12
- normalizedQuery: string;
13
- presentableQuery: string;
14
- }
15
-
16
- /** Performs a rough tokenization of the SQL, extracts the tables involved and the operations on them, and
17
- * produces two versions of the query:
18
- * - A normalized version for hashing purposes that does not account for whitespace, comments, and collapses
19
- * IN clauses and VALUES clauses that might cause a cardinality explosion.
20
- * - A presentable version that only does the IN clause and VALUES clause collapsing */
21
- export function analyzeSQL(sql: string): SQLAnalysisResult {
22
- let semanticTokens = new Array<Token>();
23
- let presentableTokens = new Array<Token>();
24
- let seekingInParen = false;
25
- let analyzingIn = false;
26
- let skippingIn = false;
27
- let seekingValuesParen = false;
28
- let skippingValues = false;
29
- let lookingForCommaOrEnd = false;
30
- let valuesDepth = 0;
31
- let skippedWhitespace: Token[] = [];
32
- for (let token of tokenizeSQL(sql)) {
33
- switch (token.type) {
34
- case "whitespace":
35
- case "comment":
36
- case "id-quote":
37
- // Skip
38
- break;
39
- case "keyword":
40
- // Normalize to uppercase.
41
- semanticTokens.push({ type: "keyword", value: token.value.toUpperCase() });
42
- break;
43
- case "identifier":
44
- // Normalize to lowercase.
45
- semanticTokens.push({ type: "identifier", value: token.value.toLowerCase() });
46
- break;
47
- default:
48
- semanticTokens.push(token);
49
- break;
50
- }
51
-
52
- if (seekingInParen) {
53
- // We saw IN, and now look for an opening (. Skip whitespace/comments, bail if anything else.
54
- presentableTokens.push(token);
55
- switch (token.type) {
56
- case "comment":
57
- case "whitespace":
58
- break;
59
- case "punct":
60
- seekingInParen = false;
61
- analyzingIn = token.value === "(";
62
- break;
63
- default:
64
- seekingInParen = false;
65
- break;
66
- }
67
- }
68
- else if (analyzingIn) {
69
- // We saw the opening paren of an IN. Pass over whitespace and comments. If we see a
70
- // keyword we know it's not something to collapse, it's a sub-query. Otherwise, we
71
- // enter skipping mode.
72
- switch (token.type) {
73
- case "comment":
74
- case "whitespace":
75
- presentableTokens.push(token);
76
- break;
77
- case "keyword":
78
- case "punct": // maybe immediate ), certainly not a value
79
- presentableTokens.push(token);
80
- analyzingIn = false;
81
- break;
82
- default:
83
- analyzingIn = false;
84
- skippingIn = true;
85
- presentableTokens.push({ type: "unknown", value: "..." })
86
- }
87
- }
88
- else if (skippingIn) {
89
- // Omit tokens until a closing ).
90
- if (token.type === "punct" && token.value === ")") {
91
- presentableTokens.push(token);
92
- skippingIn = false;
93
- }
94
- }
95
- else if (seekingValuesParen) {
96
- // We saw VALUES, and now look for an opening (. Skip whitespace/comments, bail if anything else.
97
- switch (token.type) {
98
- case "comment":
99
- case "whitespace":
100
- presentableTokens.push(token);
101
- break;
102
- case "punct":
103
- if (token.value === "(") {
104
- // Just add the opening paren, "..." and closing paren - preserve original spacing
105
- presentableTokens.push(token);
106
- presentableTokens.push({ type: "unknown", value: "..." });
107
- presentableTokens.push({ type: "punct", value: ")" });
108
- seekingValuesParen = false;
109
- skippingValues = true;
110
- valuesDepth = 1;
111
- } else {
112
- // Not what we expected, go back to normal processing
113
- presentableTokens.push(token);
114
- seekingValuesParen = false;
115
- }
116
- break;
117
- default:
118
- // Not what we expected, go back to normal processing
119
- presentableTokens.push(token);
120
- seekingValuesParen = false;
121
- break;
122
- }
123
- }
124
- else if (skippingValues) {
125
- // Skip everything until we've consumed all VALUES tuples
126
- if (token.type === "punct") {
127
- if (token.value === "(") {
128
- valuesDepth++;
129
- } else if (token.value === ")") {
130
- valuesDepth--;
131
- if (valuesDepth === 0) {
132
- // This closes a tuple, check for comma indicating more tuples
133
- lookingForCommaOrEnd = true;
134
- skippingValues = false;
135
- }
136
- }
137
- }
138
- }
139
- else if (lookingForCommaOrEnd) {
140
- // After closing a VALUES tuple, look for comma (more tuples) or end of VALUES
141
- switch (token.type) {
142
- case "comment":
143
- case "whitespace":
144
- // Collect whitespace/comments while looking for comma or end
145
- skippedWhitespace.push(token);
146
- break;
147
- case "punct":
148
- if (token.value === ",") {
149
- // More tuples coming, clear skipped whitespace and continue skipping
150
- skippedWhitespace = [];
151
- lookingForCommaOrEnd = false;
152
- skippingValues = true;
153
- } else {
154
- // Not a comma, so VALUES clause is done
155
- // Add back the skipped whitespace, then the current token
156
- presentableTokens.push(...skippedWhitespace);
157
- presentableTokens.push(token);
158
- skippedWhitespace = [];
159
- lookingForCommaOrEnd = false;
160
- }
161
- break;
162
- default:
163
- // VALUES clause is done, resume normal processing
164
- // Add back the skipped whitespace, then the current token
165
- presentableTokens.push(...skippedWhitespace);
166
- presentableTokens.push(token);
167
- skippedWhitespace = [];
168
- lookingForCommaOrEnd = false;
169
- break;
170
- }
171
- }
172
- else {
173
- presentableTokens.push(token);
174
- seekingInParen = token.type === "keyword" && token.value.toUpperCase() === "IN";
175
- seekingValuesParen = token.type === "keyword" && token.value.toUpperCase() === "VALUES";
176
- }
177
- }
178
- return {
179
- ...analyzeSQLTokens(semanticTokens),
180
- presentableQuery: presentableTokens.map(t => t.value).join("")
181
- };
182
- }
183
-
184
- function* tokenizeSQL(sql: string): Generator<Token> {
185
- const regex = /(--[^\n]*|\/\*[\s\S]*?\*\/)|('[^']*')|("(?:[^"]*)")|(`[^`]*`)|(\[[^\]]+\])|(\b[a-zA-Z_][\w$]*\b)|([(),;])|(<=|>=|<>|!=|=|<|>)|(\s+)|(\S)/g;
186
- let match: RegExpExecArray | null;
187
-
188
- while ((match = regex.exec(sql))) {
189
- const [
190
- full,
191
- comment, // 1
192
- singleQuoted, // 2: string literal
193
- doubleQuoted, // 3: "identifier"
194
- backtickQuoted, // 4: `identifier`
195
- bracketQuoted, // 5: [identifier]
196
- word, // 6
197
- punct, // 7
198
- operator, // 8
199
- ws, // 9
200
- unknown // 10
201
- ] = match;
202
- if (comment) {
203
- yield { type: 'comment', value: comment };
204
- }
205
- else if (singleQuoted) {
206
- yield { type: 'string', value: singleQuoted };
207
- }
208
- else if (doubleQuoted) {
209
- yield { type: 'id-quote', value: '"' };
210
- yield { type: 'identifier', value: doubleQuoted.slice(1, -1) };
211
- yield { type: 'id-quote', value: '"' };
212
- }
213
- else if (backtickQuoted) {
214
- yield { type: 'id-quote', value: '`' };
215
- yield { type: 'identifier', value: backtickQuoted.slice(1, -1) };
216
- yield { type: 'id-quote', value: '`' };
217
- }
218
- else if (bracketQuoted) {
219
- yield { type: 'id-quote', value: '[' };
220
- yield { type: 'identifier', value: bracketQuoted.slice(1, -1) };
221
- yield { type: 'id-quote', value: ']' };
222
- }
223
- else if (word) {
224
- yield { type: KEYWORDS.has(word.toUpperCase()) ? 'keyword' : "identifier", value: word };
225
- }
226
- else if (punct) {
227
- yield { type: 'punct', value: punct };
228
- }
229
- else if (operator) {
230
- yield {type: 'operator', value: operator};
231
- }
232
- else if (ws) {
233
- yield { type: 'whitespace', value: ws };
234
- }
235
- else if (unknown) {
236
- yield { type: 'unknown', value: unknown };
237
- }
238
- }
239
- }
240
-
241
- export function analyzeSQLTokens(tokens: Token[]) {
242
- const aliasNames = new Set<string>();
243
- const tableOps: Record<string, Set<string>> = {};
244
- const normalizedTokens: string[] = [];
245
-
246
- let currentOp: { ops: string[], at: number } | null = null;
247
- let lastTokenType: string | null = null;
248
-
249
- function appendToken(val: string, type: string) {
250
- if (normalizedTokens.length && type !== 'punct' && lastTokenType !== 'punct') {
251
- normalizedTokens.push(' ');
252
- }
253
- normalizedTokens.push(val);
254
- lastTokenType = type;
255
- }
256
-
257
- for (let i = 0; i < tokens.length; ) {
258
- const token = tokens[i];
259
-
260
- // Record operation context
261
- if (token.type === 'keyword' && ['SELECT', 'INSERT', 'UPDATE', 'DELETE'].includes(token.value)) {
262
- currentOp = { ops: [token.value.toUpperCase()], at: i };
263
- }
264
- else if (token.type === "keyword" && token.value === 'USING') {
265
- currentOp = { ops: ["SELECT"], at: i };
266
- }
267
- else if (token.type === "keyword" && token.value === 'REPLACE') {
268
- currentOp = { ops: ["INSERT", "UPDATE"], at: i };
269
- }
270
- else if (token.type === "keyword" && token.value === 'MERGE') {
271
- let sawInsert = false;
272
- let sawUpdate = false;
273
- let sawDelete = false;
274
- for (let j = i + 1; j < tokens.length; j++) {
275
- if (tokens[j].type === "keyword") {
276
- if (tokens[j].value === "INSERT")
277
- sawInsert = true;
278
- if (tokens[j].value === "UPDATE")
279
- sawUpdate = true;
280
- if (tokens[j].value === "DELETE")
281
- sawDelete = true;
282
- }
283
- }
284
- currentOp = { ops: [], at: i };
285
- if (sawInsert)
286
- currentOp.ops.push("INSERT");
287
- if (sawUpdate)
288
- currentOp.ops.push("UPDATE");
289
- if (sawDelete)
290
- currentOp.ops.push("DELETE");
291
- }
292
-
293
- // Detect CTE-style alias: <identifier> AS (
294
- if (
295
- token.type === 'identifier' &&
296
- tokens[i + 1]?.type === 'keyword' &&
297
- tokens[i + 1].value === 'AS' &&
298
- tokens[i + 2]?.value === '('
299
- ) {
300
- const alias = token.value.toLowerCase();
301
- aliasNames.add(alias);
302
- appendToken(token.value, token.type);
303
- appendToken('AS', 'keyword');
304
- appendToken('(', 'punct');
305
- i += 3;
306
- continue;
307
- }
308
-
309
- // Detect AS <alias> (table aliases, subquery aliases, etc.)
310
- if (
311
- token.type === 'keyword' &&
312
- token.value.toUpperCase() === 'AS' &&
313
- tokens[i + 1]?.type === 'identifier'
314
- ) {
315
- const alias = tokens[i + 1].value.toLowerCase();
316
- aliasNames.add(alias);
317
- appendToken(token.value, token.type);
318
- appendToken(tokens[i + 1].value, tokens[i + 1].type);
319
- i += 2;
320
- continue;
321
- }
322
-
323
- // Record table name if in FROM, JOIN, INTO, UPDATE
324
- if (
325
- token.type === 'keyword' &&
326
- ['FROM', 'JOIN', 'INTO', 'UPDATE', 'USING'].includes(token.value) &&
327
- tokens[i + 1]?.type === 'identifier' &&
328
- !(['FROM', 'JOIN', 'USING'].includes(token.value) && tokens[i + 2]?.value === "(") // functions
329
- ) {
330
- const table = tokens[i + 1].value.toLowerCase();
331
- if (currentOp && !aliasNames.has(table) && hasBalancedParens(tokens, currentOp.at, i)) {
332
- tableOps[table] ||= new Set();
333
- for (let op of currentOp.ops) {
334
- tableOps[table].add(op);
335
- }
336
- }
337
- }
338
-
339
- // Normalize IN (...) clauses
340
- if (token.type === 'keyword' && token.value === 'IN') {
341
- if (
342
- tokens[i + 1]?.value === '(' &&
343
- tokens[i + 2] // make sure something exists inside
344
- ) {
345
- appendToken('IN', 'keyword');
346
- appendToken('(', 'punct');
347
-
348
- const firstInside = tokens[i + 2];
349
- if (firstInside.type === 'keyword') {
350
- // Subquery → parse normally
351
- i += 2;
352
- continue;
353
- }
354
- else {
355
- // Literal list → collapse
356
- appendToken('...', 'identifier');
357
-
358
- // Skip until matching ')'
359
- let depth = 1;
360
- let j = i + 3;
361
- while (j < tokens.length && depth > 0) {
362
- if (tokens[j].value === '(') depth++;
363
- else if (tokens[j].value === ')') depth--;
364
- j++;
365
- }
366
-
367
- appendToken(')', 'punct');
368
- i = j;
369
- continue;
370
- }
371
- }
372
- }
373
-
374
- // Normalize VALUES (...) clauses
375
- if (token.type === 'keyword' && token.value === 'VALUES') {
376
- if (tokens[i + 1]?.value === '(') {
377
- appendToken('VALUES', 'keyword');
378
- appendToken('(', 'punct');
379
- appendToken('...', 'identifier');
380
- appendToken(')', 'punct');
381
-
382
- // Skip all VALUES tuples including comma-separated ones
383
- let depth = 0;
384
- let j = i + 1;
385
- while (j < tokens.length) {
386
- if (tokens[j].value === '(') {
387
- depth++;
388
- } else if (tokens[j].value === ')') {
389
- depth--;
390
- if (depth === 0) {
391
- // Check if there's a comma after this closing paren (more tuples)
392
- let k = j + 1;
393
- while (k < tokens.length && (tokens[k].type === 'whitespace' || tokens[k].type === 'comment')) {
394
- k++;
395
- }
396
- if (tokens[k]?.value === ',') {
397
- // More tuples, continue skipping
398
- j = k + 1;
399
- continue;
400
- } else {
401
- // No more tuples, we're done
402
- break;
403
- }
404
- }
405
- }
406
- j++;
407
- }
408
-
409
- i = j + 1;
410
- continue;
411
- }
412
- }
413
-
414
- appendToken(token.value, token.type);
415
- i++;
416
- }
417
-
418
- return {
419
- tableOperations: Object.fromEntries(
420
- Object.entries(tableOps).map(([k, v]) => [k, Array.from(v)])
421
- ),
422
- normalizedQuery: normalizedTokens.join('')
423
- };
424
- }
425
-
426
- function hasBalancedParens(tokens: Token[], start: number, end: number): boolean {
427
- let balance = 0;
428
- for (let i = start; i < end; i++) {
429
- const token = tokens[i];
430
- if (token.type === 'punct') {
431
- if (token.value === '(') balance++;
432
- else if (token.value === ')') balance--;
433
- }
434
-
435
- // Early exit: unbalanced in wrong direction
436
- if (balance < 0) return false;
437
- }
438
- return balance === 0;
439
- }