@comprehend/telemetry-node 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.idea/modules.xml +8 -0
- package/.idea/telemetry-node.iml +13 -0
- package/.idea/vcs.xml +6 -0
- package/dist/ComprehendDevSpanProcessor.d.ts +25 -0
- package/dist/ComprehendDevSpanProcessor.js +447 -0
- package/dist/WebSocketConnection.d.ts +22 -0
- package/dist/WebSocketConnection.js +102 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +5 -0
- package/dist/sql-analyzer.d.ts +22 -0
- package/dist/sql-analyzer.js +287 -0
- package/dist/sql-analyzer.test.d.ts +1 -0
- package/dist/sql-analyzer.test.js +363 -0
- package/dist/wire-protocol.d.ts +110 -0
- package/dist/wire-protocol.js +2 -0
- package/jest.config.js +11 -0
- package/package.json +35 -0
- package/src/ComprehendDevSpanProcessor.ts +563 -0
- package/src/WebSocketConnection.ts +121 -0
- package/src/index.ts +2 -0
- package/src/sql-analyzer.test.ts +436 -0
- package/src/sql-analyzer.ts +316 -0
- package/src/wire-protocol.ts +134 -0
- package/tsconfig.json +13 -0
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
type Token = { type: 'keyword' | 'identifier' | 'id-quote' | 'string' | 'comment' | 'punct' | 'operator' | 'whitespace' | 'unknown'; value: string };
|
|
2
|
+
|
|
3
|
+
const KEYWORDS = new Set([
|
|
4
|
+
'SELECT', 'FROM', 'WHERE', 'INSERT', 'REPLACE', 'INTO', 'VALUES', 'DELETE', 'UPDATE',
|
|
5
|
+
'MERGE', 'SET', 'JOIN', 'LEFT', 'RIGHT', 'FULL', 'OUTER', 'INNER', 'ON', 'AS', 'AND', 'OR',
|
|
6
|
+
'NOT', 'IS', 'NULL', 'IN', 'WITH', 'RECURSIVE', 'UNION', 'ALL',
|
|
7
|
+
'GROUP', 'BY', 'HAVING', 'ORDER', 'LIMIT', 'OFFSET', 'LATERAL', 'USING'
|
|
8
|
+
]);
|
|
9
|
+
|
|
10
|
+
export interface SQLAnalysisResult {
|
|
11
|
+
tableOperations: Record<string, string[]>;
|
|
12
|
+
normalizedQuery: string;
|
|
13
|
+
presentableQuery: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/** Performs a rough tokenization of the SQL, extracts the tables involved and the operations on them, and
|
|
17
|
+
* produces two versions of the query:
|
|
18
|
+
* - A normalized version for hashing purposes that does not account for whitespace, comments, and collapses
|
|
19
|
+
* IN clauses that might cause a cardinality explosion.
|
|
20
|
+
* - A presentable version that only does the IN clause collapsing */
|
|
21
|
+
export function analyzeSQL(sql: string): SQLAnalysisResult {
|
|
22
|
+
let semanticTokens = new Array<Token>();
|
|
23
|
+
let presentableTokens = new Array<Token>();
|
|
24
|
+
let seekingInParen = false;
|
|
25
|
+
let analyzingIn = false;
|
|
26
|
+
let skippingIn = false;
|
|
27
|
+
for (let token of tokenizeSQL(sql)) {
|
|
28
|
+
switch (token.type) {
|
|
29
|
+
case "whitespace":
|
|
30
|
+
case "comment":
|
|
31
|
+
case "id-quote":
|
|
32
|
+
// Skip
|
|
33
|
+
break;
|
|
34
|
+
case "keyword":
|
|
35
|
+
// Normalize to uppercase.
|
|
36
|
+
semanticTokens.push({ type: "keyword", value: token.value.toUpperCase() });
|
|
37
|
+
break;
|
|
38
|
+
case "identifier":
|
|
39
|
+
// Normalize to lowercase.
|
|
40
|
+
semanticTokens.push({ type: "identifier", value: token.value.toLowerCase() });
|
|
41
|
+
break;
|
|
42
|
+
default:
|
|
43
|
+
semanticTokens.push(token);
|
|
44
|
+
break;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (seekingInParen) {
|
|
48
|
+
// We saw IN, and now look for an opening (. Skip whitespace/comments, bail if anything else.
|
|
49
|
+
presentableTokens.push(token);
|
|
50
|
+
switch (token.type) {
|
|
51
|
+
case "comment":
|
|
52
|
+
case "whitespace":
|
|
53
|
+
break;
|
|
54
|
+
case "punct":
|
|
55
|
+
seekingInParen = false;
|
|
56
|
+
analyzingIn = token.value === "(";
|
|
57
|
+
break;
|
|
58
|
+
default:
|
|
59
|
+
seekingInParen = false;
|
|
60
|
+
break;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
else if (analyzingIn) {
|
|
64
|
+
// We saw the opening paren of an IN. Pass over whitespace and comments. If we see a
|
|
65
|
+
// keyword we know it's not something to collapse, it's a sub-query. Otherwise, we
|
|
66
|
+
// enter skipping mode.
|
|
67
|
+
switch (token.type) {
|
|
68
|
+
case "comment":
|
|
69
|
+
case "whitespace":
|
|
70
|
+
presentableTokens.push(token);
|
|
71
|
+
break;
|
|
72
|
+
case "keyword":
|
|
73
|
+
case "punct": // maybe immediate ), certainly not a value
|
|
74
|
+
presentableTokens.push(token);
|
|
75
|
+
analyzingIn = false;
|
|
76
|
+
break;
|
|
77
|
+
default:
|
|
78
|
+
analyzingIn = false;
|
|
79
|
+
skippingIn = true;
|
|
80
|
+
presentableTokens.push({ type: "unknown", value: "..." })
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
else if (skippingIn) {
|
|
84
|
+
// Omit tokens until a closing ).
|
|
85
|
+
if (token.type === "punct" && token.value === ")") {
|
|
86
|
+
presentableTokens.push(token);
|
|
87
|
+
skippingIn = false;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
presentableTokens.push(token);
|
|
92
|
+
seekingInParen = token.type === "keyword" && token.value.toUpperCase() === "IN";
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return {
|
|
96
|
+
...analyzeSQLTokens(semanticTokens),
|
|
97
|
+
presentableQuery: presentableTokens.map(t => t.value).join("")
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function* tokenizeSQL(sql: string): Generator<Token> {
|
|
102
|
+
const regex = /(--[^\n]*|\/\*[\s\S]*?\*\/)|('[^']*')|("(?:[^"]*)")|(`[^`]*`)|(\[[^\]]+\])|(\b[a-zA-Z_][\w$]*\b)|([(),;])|(<=|>=|<>|!=|=|<|>)|(\s+)|(\S)/g;
|
|
103
|
+
let match: RegExpExecArray | null;
|
|
104
|
+
|
|
105
|
+
while ((match = regex.exec(sql))) {
|
|
106
|
+
const [
|
|
107
|
+
full,
|
|
108
|
+
comment, // 1
|
|
109
|
+
singleQuoted, // 2: string literal
|
|
110
|
+
doubleQuoted, // 3: "identifier"
|
|
111
|
+
backtickQuoted, // 4: `identifier`
|
|
112
|
+
bracketQuoted, // 5: [identifier]
|
|
113
|
+
word, // 6
|
|
114
|
+
punct, // 7
|
|
115
|
+
operator, // 8
|
|
116
|
+
ws, // 9
|
|
117
|
+
unknown // 10
|
|
118
|
+
] = match;
|
|
119
|
+
if (comment) {
|
|
120
|
+
yield { type: 'comment', value: comment };
|
|
121
|
+
}
|
|
122
|
+
else if (singleQuoted) {
|
|
123
|
+
yield { type: 'string', value: singleQuoted };
|
|
124
|
+
}
|
|
125
|
+
else if (doubleQuoted) {
|
|
126
|
+
yield { type: 'id-quote', value: '"' };
|
|
127
|
+
yield { type: 'identifier', value: doubleQuoted.slice(1, -1) };
|
|
128
|
+
yield { type: 'id-quote', value: '"' };
|
|
129
|
+
}
|
|
130
|
+
else if (backtickQuoted) {
|
|
131
|
+
yield { type: 'id-quote', value: '`' };
|
|
132
|
+
yield { type: 'identifier', value: backtickQuoted.slice(1, -1) };
|
|
133
|
+
yield { type: 'id-quote', value: '`' };
|
|
134
|
+
}
|
|
135
|
+
else if (bracketQuoted) {
|
|
136
|
+
yield { type: 'id-quote', value: '[' };
|
|
137
|
+
yield { type: 'identifier', value: bracketQuoted.slice(1, -1) };
|
|
138
|
+
yield { type: 'id-quote', value: ']' };
|
|
139
|
+
}
|
|
140
|
+
else if (word) {
|
|
141
|
+
yield { type: KEYWORDS.has(word.toUpperCase()) ? 'keyword' : "identifier", value: word };
|
|
142
|
+
}
|
|
143
|
+
else if (punct) {
|
|
144
|
+
yield { type: 'punct', value: punct };
|
|
145
|
+
}
|
|
146
|
+
else if (operator) {
|
|
147
|
+
yield {type: 'operator', value: operator};
|
|
148
|
+
}
|
|
149
|
+
else if (ws) {
|
|
150
|
+
yield { type: 'whitespace', value: ws };
|
|
151
|
+
}
|
|
152
|
+
else if (unknown) {
|
|
153
|
+
yield { type: 'unknown', value: unknown };
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
export function analyzeSQLTokens(tokens: Token[]) {
|
|
159
|
+
const aliasNames = new Set<string>();
|
|
160
|
+
const tableOps: Record<string, Set<string>> = {};
|
|
161
|
+
const normalizedTokens: string[] = [];
|
|
162
|
+
|
|
163
|
+
let currentOp: { ops: string[], at: number } | null = null;
|
|
164
|
+
let lastTokenType: string | null = null;
|
|
165
|
+
|
|
166
|
+
function appendToken(val: string, type: string) {
|
|
167
|
+
if (normalizedTokens.length && type !== 'punct' && lastTokenType !== 'punct') {
|
|
168
|
+
normalizedTokens.push(' ');
|
|
169
|
+
}
|
|
170
|
+
normalizedTokens.push(val);
|
|
171
|
+
lastTokenType = type;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
for (let i = 0; i < tokens.length; ) {
|
|
175
|
+
const token = tokens[i];
|
|
176
|
+
|
|
177
|
+
// Record operation context
|
|
178
|
+
if (token.type === 'keyword' && ['SELECT', 'INSERT', 'UPDATE', 'DELETE'].includes(token.value)) {
|
|
179
|
+
currentOp = { ops: [token.value.toUpperCase()], at: i };
|
|
180
|
+
}
|
|
181
|
+
else if (token.type === "keyword" && token.value === 'USING') {
|
|
182
|
+
currentOp = { ops: ["SELECT"], at: i };
|
|
183
|
+
}
|
|
184
|
+
else if (token.type === "keyword" && token.value === 'REPLACE') {
|
|
185
|
+
currentOp = { ops: ["INSERT", "UPDATE"], at: i };
|
|
186
|
+
}
|
|
187
|
+
else if (token.type === "keyword" && token.value === 'MERGE') {
|
|
188
|
+
let sawInsert = false;
|
|
189
|
+
let sawUpdate = false;
|
|
190
|
+
let sawDelete = false;
|
|
191
|
+
for (let j = i + 1; j < tokens.length; j++) {
|
|
192
|
+
if (tokens[j].type === "keyword") {
|
|
193
|
+
if (tokens[j].value === "INSERT")
|
|
194
|
+
sawInsert = true;
|
|
195
|
+
if (tokens[j].value === "UPDATE")
|
|
196
|
+
sawUpdate = true;
|
|
197
|
+
if (tokens[j].value === "DELETE")
|
|
198
|
+
sawDelete = true;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
currentOp = { ops: [], at: i };
|
|
202
|
+
if (sawInsert)
|
|
203
|
+
currentOp.ops.push("INSERT");
|
|
204
|
+
if (sawUpdate)
|
|
205
|
+
currentOp.ops.push("UPDATE");
|
|
206
|
+
if (sawDelete)
|
|
207
|
+
currentOp.ops.push("DELETE");
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Detect CTE-style alias: <identifier> AS (
|
|
211
|
+
if (
|
|
212
|
+
token.type === 'identifier' &&
|
|
213
|
+
tokens[i + 1]?.type === 'keyword' &&
|
|
214
|
+
tokens[i + 1].value === 'AS' &&
|
|
215
|
+
tokens[i + 2]?.value === '('
|
|
216
|
+
) {
|
|
217
|
+
const alias = token.value.toLowerCase();
|
|
218
|
+
aliasNames.add(alias);
|
|
219
|
+
appendToken(token.value, token.type);
|
|
220
|
+
appendToken('AS', 'keyword');
|
|
221
|
+
appendToken('(', 'punct');
|
|
222
|
+
i += 3;
|
|
223
|
+
continue;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Detect AS <alias> (table aliases, subquery aliases, etc.)
|
|
227
|
+
if (
|
|
228
|
+
token.type === 'keyword' &&
|
|
229
|
+
token.value.toUpperCase() === 'AS' &&
|
|
230
|
+
tokens[i + 1]?.type === 'identifier'
|
|
231
|
+
) {
|
|
232
|
+
const alias = tokens[i + 1].value.toLowerCase();
|
|
233
|
+
aliasNames.add(alias);
|
|
234
|
+
appendToken(token.value, token.type);
|
|
235
|
+
appendToken(tokens[i + 1].value, tokens[i + 1].type);
|
|
236
|
+
i += 2;
|
|
237
|
+
continue;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Record table name if in FROM, JOIN, INTO, UPDATE
|
|
241
|
+
if (
|
|
242
|
+
token.type === 'keyword' &&
|
|
243
|
+
['FROM', 'JOIN', 'INTO', 'UPDATE', 'USING'].includes(token.value) &&
|
|
244
|
+
tokens[i + 1]?.type === 'identifier' &&
|
|
245
|
+
!(['FROM', 'JOIN', 'USING'].includes(token.value) && tokens[i + 2]?.value === "(") // functions
|
|
246
|
+
) {
|
|
247
|
+
const table = tokens[i + 1].value.toLowerCase();
|
|
248
|
+
if (currentOp && !aliasNames.has(table) && hasBalancedParens(tokens, currentOp.at, i)) {
|
|
249
|
+
tableOps[table] ||= new Set();
|
|
250
|
+
for (let op of currentOp.ops) {
|
|
251
|
+
tableOps[table].add(op);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Normalize IN (...) clauses
|
|
257
|
+
if (token.type === 'keyword' && token.value === 'IN') {
|
|
258
|
+
if (
|
|
259
|
+
tokens[i + 1]?.value === '(' &&
|
|
260
|
+
tokens[i + 2] // make sure something exists inside
|
|
261
|
+
) {
|
|
262
|
+
appendToken('IN', 'keyword');
|
|
263
|
+
appendToken('(', 'punct');
|
|
264
|
+
|
|
265
|
+
const firstInside = tokens[i + 2];
|
|
266
|
+
if (firstInside.type === 'keyword') {
|
|
267
|
+
// Subquery → parse normally
|
|
268
|
+
i += 2;
|
|
269
|
+
continue;
|
|
270
|
+
}
|
|
271
|
+
else {
|
|
272
|
+
// Literal list → collapse
|
|
273
|
+
appendToken('...', 'identifier');
|
|
274
|
+
|
|
275
|
+
// Skip until matching ')'
|
|
276
|
+
let depth = 1;
|
|
277
|
+
let j = i + 3;
|
|
278
|
+
while (j < tokens.length && depth > 0) {
|
|
279
|
+
if (tokens[j].value === '(') depth++;
|
|
280
|
+
else if (tokens[j].value === ')') depth--;
|
|
281
|
+
j++;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
appendToken(')', 'punct');
|
|
285
|
+
i = j;
|
|
286
|
+
continue;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
appendToken(token.value, token.type);
|
|
292
|
+
i++;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
return {
|
|
296
|
+
tableOperations: Object.fromEntries(
|
|
297
|
+
Object.entries(tableOps).map(([k, v]) => [k, Array.from(v)])
|
|
298
|
+
),
|
|
299
|
+
normalizedQuery: normalizedTokens.join('')
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
function hasBalancedParens(tokens: Token[], start: number, end: number): boolean {
|
|
304
|
+
let balance = 0;
|
|
305
|
+
for (let i = start; i < end; i++) {
|
|
306
|
+
const token = tokens[i];
|
|
307
|
+
if (token.type === 'punct') {
|
|
308
|
+
if (token.value === '(') balance++;
|
|
309
|
+
else if (token.value === ')') balance--;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// Early exit: unbalanced in wrong direction
|
|
313
|
+
if (balance < 0) return false;
|
|
314
|
+
}
|
|
315
|
+
return balance === 0;
|
|
316
|
+
}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import {HrTime} from "@opentelemetry/api";
|
|
2
|
+
|
|
3
|
+
export type ObservationInputMessage = InitMessage | NewObservedEntityMessage | NewObservedInteractionMessage | ObservationMessage;
|
|
4
|
+
export type ObservationOutputMessage = InitAck | ObservedAck | ObservationsAck;
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
export interface InitMessage {
|
|
8
|
+
event: "init";
|
|
9
|
+
protocolVersion: 1;
|
|
10
|
+
token: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
export interface NewObservedEntityMessage {
|
|
15
|
+
event: "new-entity";
|
|
16
|
+
type: string;
|
|
17
|
+
hash: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface NewObservedServiceMessage extends NewObservedEntityMessage {
|
|
21
|
+
type: "service";
|
|
22
|
+
name: string;
|
|
23
|
+
namespace?: string;
|
|
24
|
+
environment?: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface NewObservedHttpRouteMessage extends NewObservedEntityMessage {
|
|
28
|
+
type: "http-route";
|
|
29
|
+
parent: string;
|
|
30
|
+
method: string;
|
|
31
|
+
route: string;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface NewObservedDatabaseMessage extends NewObservedEntityMessage {
|
|
35
|
+
type: "database";
|
|
36
|
+
system: string;
|
|
37
|
+
name?: string;
|
|
38
|
+
host?: string;
|
|
39
|
+
port?: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface NewObservedHttpServiceMessage extends NewObservedEntityMessage {
|
|
43
|
+
type: "http-service";
|
|
44
|
+
protocol: string;
|
|
45
|
+
host: string;
|
|
46
|
+
port: number;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
export interface NewObservedInteractionMessage {
|
|
51
|
+
event: "new-interaction";
|
|
52
|
+
type: string;
|
|
53
|
+
hash: string;
|
|
54
|
+
from: string;
|
|
55
|
+
to: string;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export interface NewObservedHttpRequestMessage extends NewObservedInteractionMessage {
|
|
59
|
+
type: "http-request";
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export interface NewObservedDatabaseConnectionMessage extends NewObservedInteractionMessage {
|
|
63
|
+
type: "db-connection";
|
|
64
|
+
connection?: string;
|
|
65
|
+
user?: string;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export interface NewObservedDatabaseQueryMessage extends NewObservedInteractionMessage {
|
|
69
|
+
type: "db-query";
|
|
70
|
+
query: string;
|
|
71
|
+
selects?: string[];
|
|
72
|
+
inserts?: string[];
|
|
73
|
+
updates?: string[];
|
|
74
|
+
deletes?: string[];
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
export interface ObservationMessage {
|
|
79
|
+
event: "observations";
|
|
80
|
+
seq: number;
|
|
81
|
+
observations: Array<Observation>;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export interface Observation {
|
|
85
|
+
type: string;
|
|
86
|
+
subject: string; // Hash of the entity or interaction the observation relates to
|
|
87
|
+
timestamp: HrTime;
|
|
88
|
+
errorMessage?: string;
|
|
89
|
+
errorType?: string;
|
|
90
|
+
stack?: string;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export interface HttpClientObservation extends Observation {
|
|
94
|
+
type: "http-client";
|
|
95
|
+
path: string;
|
|
96
|
+
method: string;
|
|
97
|
+
duration: HrTime;
|
|
98
|
+
status?: number;
|
|
99
|
+
httpVersion?: string;
|
|
100
|
+
requestBytes?: number;
|
|
101
|
+
responseBytes?: number;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export interface HttpServerObservation extends Observation {
|
|
105
|
+
type: "http-server";
|
|
106
|
+
path: string;
|
|
107
|
+
status: number;
|
|
108
|
+
duration: HrTime;
|
|
109
|
+
httpVersion?: string;
|
|
110
|
+
requestBytes?: number;
|
|
111
|
+
responseBytes?: number;
|
|
112
|
+
userAgent?: string;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export interface DatabaseQueryObservation extends Observation {
|
|
116
|
+
type: "db-query";
|
|
117
|
+
duration: HrTime;
|
|
118
|
+
returnedRows?: number;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
export interface InitAck {
|
|
123
|
+
type: "ack-authorized";
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export interface ObservedAck {
|
|
127
|
+
type: "ack-observed";
|
|
128
|
+
hash: string;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
export interface ObservationsAck {
|
|
132
|
+
type: "ack-observations";
|
|
133
|
+
seq: number;
|
|
134
|
+
}
|
package/tsconfig.json
ADDED