@comprehend/telemetry-node 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,316 @@
1
+ type Token = { type: 'keyword' | 'identifier' | 'id-quote' | 'string' | 'comment' | 'punct' | 'operator' | 'whitespace' | 'unknown'; value: string };
2
+
3
+ const KEYWORDS = new Set([
4
+ 'SELECT', 'FROM', 'WHERE', 'INSERT', 'REPLACE', 'INTO', 'VALUES', 'DELETE', 'UPDATE',
5
+ 'MERGE', 'SET', 'JOIN', 'LEFT', 'RIGHT', 'FULL', 'OUTER', 'INNER', 'ON', 'AS', 'AND', 'OR',
6
+ 'NOT', 'IS', 'NULL', 'IN', 'WITH', 'RECURSIVE', 'UNION', 'ALL',
7
+ 'GROUP', 'BY', 'HAVING', 'ORDER', 'LIMIT', 'OFFSET', 'LATERAL', 'USING'
8
+ ]);
9
+
10
+ export interface SQLAnalysisResult {
11
+ tableOperations: Record<string, string[]>;
12
+ normalizedQuery: string;
13
+ presentableQuery: string;
14
+ }
15
+
16
+ /** Performs a rough tokenization of the SQL, extracts the tables involved and the operations on them, and
17
+ * produces two versions of the query:
18
+ * - A normalized version for hashing purposes that does not account for whitespace, comments, and collapses
19
+ * IN clauses that might cause a cardinality explosion.
20
+ * - A presentable version that only does the IN clause collapsing */
21
+ export function analyzeSQL(sql: string): SQLAnalysisResult {
22
+ let semanticTokens = new Array<Token>();
23
+ let presentableTokens = new Array<Token>();
24
+ let seekingInParen = false;
25
+ let analyzingIn = false;
26
+ let skippingIn = false;
27
+ for (let token of tokenizeSQL(sql)) {
28
+ switch (token.type) {
29
+ case "whitespace":
30
+ case "comment":
31
+ case "id-quote":
32
+ // Skip
33
+ break;
34
+ case "keyword":
35
+ // Normalize to uppercase.
36
+ semanticTokens.push({ type: "keyword", value: token.value.toUpperCase() });
37
+ break;
38
+ case "identifier":
39
+ // Normalize to lowercase.
40
+ semanticTokens.push({ type: "identifier", value: token.value.toLowerCase() });
41
+ break;
42
+ default:
43
+ semanticTokens.push(token);
44
+ break;
45
+ }
46
+
47
+ if (seekingInParen) {
48
+ // We saw IN, and now look for an opening (. Skip whitespace/comments, bail if anything else.
49
+ presentableTokens.push(token);
50
+ switch (token.type) {
51
+ case "comment":
52
+ case "whitespace":
53
+ break;
54
+ case "punct":
55
+ seekingInParen = false;
56
+ analyzingIn = token.value === "(";
57
+ break;
58
+ default:
59
+ seekingInParen = false;
60
+ break;
61
+ }
62
+ }
63
+ else if (analyzingIn) {
64
+ // We saw the opening paren of an IN. Pass over whitespace and comments. If we see a
65
+ // keyword we know it's not something to collapse, it's a sub-query. Otherwise, we
66
+ // enter skipping mode.
67
+ switch (token.type) {
68
+ case "comment":
69
+ case "whitespace":
70
+ presentableTokens.push(token);
71
+ break;
72
+ case "keyword":
73
+ case "punct": // maybe immediate ), certainly not a value
74
+ presentableTokens.push(token);
75
+ analyzingIn = false;
76
+ break;
77
+ default:
78
+ analyzingIn = false;
79
+ skippingIn = true;
80
+ presentableTokens.push({ type: "unknown", value: "..." })
81
+ }
82
+ }
83
+ else if (skippingIn) {
84
+ // Omit tokens until a closing ).
85
+ if (token.type === "punct" && token.value === ")") {
86
+ presentableTokens.push(token);
87
+ skippingIn = false;
88
+ }
89
+ }
90
+ else {
91
+ presentableTokens.push(token);
92
+ seekingInParen = token.type === "keyword" && token.value.toUpperCase() === "IN";
93
+ }
94
+ }
95
+ return {
96
+ ...analyzeSQLTokens(semanticTokens),
97
+ presentableQuery: presentableTokens.map(t => t.value).join("")
98
+ };
99
+ }
100
+
101
+ function* tokenizeSQL(sql: string): Generator<Token> {
102
+ const regex = /(--[^\n]*|\/\*[\s\S]*?\*\/)|('[^']*')|("(?:[^"]*)")|(`[^`]*`)|(\[[^\]]+\])|(\b[a-zA-Z_][\w$]*\b)|([(),;])|(<=|>=|<>|!=|=|<|>)|(\s+)|(\S)/g;
103
+ let match: RegExpExecArray | null;
104
+
105
+ while ((match = regex.exec(sql))) {
106
+ const [
107
+ full,
108
+ comment, // 1
109
+ singleQuoted, // 2: string literal
110
+ doubleQuoted, // 3: "identifier"
111
+ backtickQuoted, // 4: `identifier`
112
+ bracketQuoted, // 5: [identifier]
113
+ word, // 6
114
+ punct, // 7
115
+ operator, // 8
116
+ ws, // 9
117
+ unknown // 10
118
+ ] = match;
119
+ if (comment) {
120
+ yield { type: 'comment', value: comment };
121
+ }
122
+ else if (singleQuoted) {
123
+ yield { type: 'string', value: singleQuoted };
124
+ }
125
+ else if (doubleQuoted) {
126
+ yield { type: 'id-quote', value: '"' };
127
+ yield { type: 'identifier', value: doubleQuoted.slice(1, -1) };
128
+ yield { type: 'id-quote', value: '"' };
129
+ }
130
+ else if (backtickQuoted) {
131
+ yield { type: 'id-quote', value: '`' };
132
+ yield { type: 'identifier', value: backtickQuoted.slice(1, -1) };
133
+ yield { type: 'id-quote', value: '`' };
134
+ }
135
+ else if (bracketQuoted) {
136
+ yield { type: 'id-quote', value: '[' };
137
+ yield { type: 'identifier', value: bracketQuoted.slice(1, -1) };
138
+ yield { type: 'id-quote', value: ']' };
139
+ }
140
+ else if (word) {
141
+ yield { type: KEYWORDS.has(word.toUpperCase()) ? 'keyword' : "identifier", value: word };
142
+ }
143
+ else if (punct) {
144
+ yield { type: 'punct', value: punct };
145
+ }
146
+ else if (operator) {
147
+ yield {type: 'operator', value: operator};
148
+ }
149
+ else if (ws) {
150
+ yield { type: 'whitespace', value: ws };
151
+ }
152
+ else if (unknown) {
153
+ yield { type: 'unknown', value: unknown };
154
+ }
155
+ }
156
+ }
157
+
158
+ export function analyzeSQLTokens(tokens: Token[]) {
159
+ const aliasNames = new Set<string>();
160
+ const tableOps: Record<string, Set<string>> = {};
161
+ const normalizedTokens: string[] = [];
162
+
163
+ let currentOp: { ops: string[], at: number } | null = null;
164
+ let lastTokenType: string | null = null;
165
+
166
+ function appendToken(val: string, type: string) {
167
+ if (normalizedTokens.length && type !== 'punct' && lastTokenType !== 'punct') {
168
+ normalizedTokens.push(' ');
169
+ }
170
+ normalizedTokens.push(val);
171
+ lastTokenType = type;
172
+ }
173
+
174
+ for (let i = 0; i < tokens.length; ) {
175
+ const token = tokens[i];
176
+
177
+ // Record operation context
178
+ if (token.type === 'keyword' && ['SELECT', 'INSERT', 'UPDATE', 'DELETE'].includes(token.value)) {
179
+ currentOp = { ops: [token.value.toUpperCase()], at: i };
180
+ }
181
+ else if (token.type === "keyword" && token.value === 'USING') {
182
+ currentOp = { ops: ["SELECT"], at: i };
183
+ }
184
+ else if (token.type === "keyword" && token.value === 'REPLACE') {
185
+ currentOp = { ops: ["INSERT", "UPDATE"], at: i };
186
+ }
187
+ else if (token.type === "keyword" && token.value === 'MERGE') {
188
+ let sawInsert = false;
189
+ let sawUpdate = false;
190
+ let sawDelete = false;
191
+ for (let j = i + 1; j < tokens.length; j++) {
192
+ if (tokens[j].type === "keyword") {
193
+ if (tokens[j].value === "INSERT")
194
+ sawInsert = true;
195
+ if (tokens[j].value === "UPDATE")
196
+ sawUpdate = true;
197
+ if (tokens[j].value === "DELETE")
198
+ sawDelete = true;
199
+ }
200
+ }
201
+ currentOp = { ops: [], at: i };
202
+ if (sawInsert)
203
+ currentOp.ops.push("INSERT");
204
+ if (sawUpdate)
205
+ currentOp.ops.push("UPDATE");
206
+ if (sawDelete)
207
+ currentOp.ops.push("DELETE");
208
+ }
209
+
210
+ // Detect CTE-style alias: <identifier> AS (
211
+ if (
212
+ token.type === 'identifier' &&
213
+ tokens[i + 1]?.type === 'keyword' &&
214
+ tokens[i + 1].value === 'AS' &&
215
+ tokens[i + 2]?.value === '('
216
+ ) {
217
+ const alias = token.value.toLowerCase();
218
+ aliasNames.add(alias);
219
+ appendToken(token.value, token.type);
220
+ appendToken('AS', 'keyword');
221
+ appendToken('(', 'punct');
222
+ i += 3;
223
+ continue;
224
+ }
225
+
226
+ // Detect AS <alias> (table aliases, subquery aliases, etc.)
227
+ if (
228
+ token.type === 'keyword' &&
229
+ token.value.toUpperCase() === 'AS' &&
230
+ tokens[i + 1]?.type === 'identifier'
231
+ ) {
232
+ const alias = tokens[i + 1].value.toLowerCase();
233
+ aliasNames.add(alias);
234
+ appendToken(token.value, token.type);
235
+ appendToken(tokens[i + 1].value, tokens[i + 1].type);
236
+ i += 2;
237
+ continue;
238
+ }
239
+
240
+ // Record table name if in FROM, JOIN, INTO, UPDATE
241
+ if (
242
+ token.type === 'keyword' &&
243
+ ['FROM', 'JOIN', 'INTO', 'UPDATE', 'USING'].includes(token.value) &&
244
+ tokens[i + 1]?.type === 'identifier' &&
245
+ !(['FROM', 'JOIN', 'USING'].includes(token.value) && tokens[i + 2]?.value === "(") // functions
246
+ ) {
247
+ const table = tokens[i + 1].value.toLowerCase();
248
+ if (currentOp && !aliasNames.has(table) && hasBalancedParens(tokens, currentOp.at, i)) {
249
+ tableOps[table] ||= new Set();
250
+ for (let op of currentOp.ops) {
251
+ tableOps[table].add(op);
252
+ }
253
+ }
254
+ }
255
+
256
+ // Normalize IN (...) clauses
257
+ if (token.type === 'keyword' && token.value === 'IN') {
258
+ if (
259
+ tokens[i + 1]?.value === '(' &&
260
+ tokens[i + 2] // make sure something exists inside
261
+ ) {
262
+ appendToken('IN', 'keyword');
263
+ appendToken('(', 'punct');
264
+
265
+ const firstInside = tokens[i + 2];
266
+ if (firstInside.type === 'keyword') {
267
+ // Subquery → parse normally
268
+ i += 2;
269
+ continue;
270
+ }
271
+ else {
272
+ // Literal list → collapse
273
+ appendToken('...', 'identifier');
274
+
275
+ // Skip until matching ')'
276
+ let depth = 1;
277
+ let j = i + 3;
278
+ while (j < tokens.length && depth > 0) {
279
+ if (tokens[j].value === '(') depth++;
280
+ else if (tokens[j].value === ')') depth--;
281
+ j++;
282
+ }
283
+
284
+ appendToken(')', 'punct');
285
+ i = j;
286
+ continue;
287
+ }
288
+ }
289
+ }
290
+
291
+ appendToken(token.value, token.type);
292
+ i++;
293
+ }
294
+
295
+ return {
296
+ tableOperations: Object.fromEntries(
297
+ Object.entries(tableOps).map(([k, v]) => [k, Array.from(v)])
298
+ ),
299
+ normalizedQuery: normalizedTokens.join('')
300
+ };
301
+ }
302
+
303
+ function hasBalancedParens(tokens: Token[], start: number, end: number): boolean {
304
+ let balance = 0;
305
+ for (let i = start; i < end; i++) {
306
+ const token = tokens[i];
307
+ if (token.type === 'punct') {
308
+ if (token.value === '(') balance++;
309
+ else if (token.value === ')') balance--;
310
+ }
311
+
312
+ // Early exit: unbalanced in wrong direction
313
+ if (balance < 0) return false;
314
+ }
315
+ return balance === 0;
316
+ }
@@ -0,0 +1,134 @@
1
+ import {HrTime} from "@opentelemetry/api";
2
+
3
+ export type ObservationInputMessage = InitMessage | NewObservedEntityMessage | NewObservedInteractionMessage | ObservationMessage;
4
+ export type ObservationOutputMessage = InitAck | ObservedAck | ObservationsAck;
5
+
6
+
7
+ export interface InitMessage {
8
+ event: "init";
9
+ protocolVersion: 1;
10
+ token: string;
11
+ }
12
+
13
+
14
+ export interface NewObservedEntityMessage {
15
+ event: "new-entity";
16
+ type: string;
17
+ hash: string;
18
+ }
19
+
20
+ export interface NewObservedServiceMessage extends NewObservedEntityMessage {
21
+ type: "service";
22
+ name: string;
23
+ namespace?: string;
24
+ environment?: string;
25
+ }
26
+
27
+ export interface NewObservedHttpRouteMessage extends NewObservedEntityMessage {
28
+ type: "http-route";
29
+ parent: string;
30
+ method: string;
31
+ route: string;
32
+ }
33
+
34
+ export interface NewObservedDatabaseMessage extends NewObservedEntityMessage {
35
+ type: "database";
36
+ system: string;
37
+ name?: string;
38
+ host?: string;
39
+ port?: number;
40
+ }
41
+
42
+ export interface NewObservedHttpServiceMessage extends NewObservedEntityMessage {
43
+ type: "http-service";
44
+ protocol: string;
45
+ host: string;
46
+ port: number;
47
+ }
48
+
49
+
50
+ export interface NewObservedInteractionMessage {
51
+ event: "new-interaction";
52
+ type: string;
53
+ hash: string;
54
+ from: string;
55
+ to: string;
56
+ }
57
+
58
+ export interface NewObservedHttpRequestMessage extends NewObservedInteractionMessage {
59
+ type: "http-request";
60
+ }
61
+
62
+ export interface NewObservedDatabaseConnectionMessage extends NewObservedInteractionMessage {
63
+ type: "db-connection";
64
+ connection?: string;
65
+ user?: string;
66
+ }
67
+
68
+ export interface NewObservedDatabaseQueryMessage extends NewObservedInteractionMessage {
69
+ type: "db-query";
70
+ query: string;
71
+ selects?: string[];
72
+ inserts?: string[];
73
+ updates?: string[];
74
+ deletes?: string[];
75
+ }
76
+
77
+
78
+ export interface ObservationMessage {
79
+ event: "observations";
80
+ seq: number;
81
+ observations: Array<Observation>;
82
+ }
83
+
84
+ export interface Observation {
85
+ type: string;
86
+ subject: string; // Hash of the entity or interaction the observation relates to
87
+ timestamp: HrTime;
88
+ errorMessage?: string;
89
+ errorType?: string;
90
+ stack?: string;
91
+ }
92
+
93
+ export interface HttpClientObservation extends Observation {
94
+ type: "http-client";
95
+ path: string;
96
+ method: string;
97
+ duration: HrTime;
98
+ status?: number;
99
+ httpVersion?: string;
100
+ requestBytes?: number;
101
+ responseBytes?: number;
102
+ }
103
+
104
+ export interface HttpServerObservation extends Observation {
105
+ type: "http-server";
106
+ path: string;
107
+ status: number;
108
+ duration: HrTime;
109
+ httpVersion?: string;
110
+ requestBytes?: number;
111
+ responseBytes?: number;
112
+ userAgent?: string;
113
+ }
114
+
115
+ export interface DatabaseQueryObservation extends Observation {
116
+ type: "db-query";
117
+ duration: HrTime;
118
+ returnedRows?: number;
119
+ }
120
+
121
+
122
+ export interface InitAck {
123
+ type: "ack-authorized";
124
+ }
125
+
126
+ export interface ObservedAck {
127
+ type: "ack-observed";
128
+ hash: string;
129
+ }
130
+
131
+ export interface ObservationsAck {
132
+ type: "ack-observations";
133
+ seq: number;
134
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,13 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2020",
4
+ "module": "CommonJS",
5
+ "moduleResolution": "Node",
6
+ "declaration": true,
7
+ "outDir": "dist",
8
+ "strict": true,
9
+ "esModuleInterop": true,
10
+ "skipLibCheck": true
11
+ },
12
+ "include": ["src"]
13
+ }