@flisk/analyze-tracking 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/bin/cli.js +1 -1
  2. package/package.json +9 -7
  3. package/src/analyze/go/astTraversal.js +121 -0
  4. package/src/analyze/go/constants.js +20 -0
  5. package/src/analyze/go/eventDeduplicator.js +47 -0
  6. package/src/analyze/go/eventExtractor.js +156 -0
  7. package/src/analyze/go/goAstParser/constants.js +39 -0
  8. package/src/analyze/go/goAstParser/expressionParser.js +281 -0
  9. package/src/analyze/go/goAstParser/index.js +52 -0
  10. package/src/analyze/go/goAstParser/statementParser.js +387 -0
  11. package/src/analyze/go/goAstParser/tokenizer.js +196 -0
  12. package/src/analyze/go/goAstParser/typeParser.js +202 -0
  13. package/src/analyze/go/goAstParser/utils.js +99 -0
  14. package/src/analyze/go/index.js +55 -0
  15. package/src/analyze/go/propertyExtractor.js +670 -0
  16. package/src/analyze/go/trackingDetector.js +71 -0
  17. package/src/analyze/go/trackingExtractor.js +54 -0
  18. package/src/analyze/go/typeContext.js +88 -0
  19. package/src/analyze/go/utils.js +215 -0
  20. package/src/analyze/index.js +11 -6
  21. package/src/analyze/javascript/constants.js +115 -0
  22. package/src/analyze/javascript/detectors/analytics-source.js +119 -0
  23. package/src/analyze/javascript/detectors/index.js +10 -0
  24. package/src/analyze/javascript/extractors/event-extractor.js +179 -0
  25. package/src/analyze/javascript/extractors/index.js +13 -0
  26. package/src/analyze/javascript/extractors/property-extractor.js +172 -0
  27. package/src/analyze/javascript/index.js +38 -0
  28. package/src/analyze/javascript/parser.js +126 -0
  29. package/src/analyze/javascript/utils/function-finder.js +123 -0
  30. package/src/analyze/python/index.js +111 -0
  31. package/src/analyze/python/pythonTrackingAnalyzer.py +814 -0
  32. package/src/analyze/ruby/detectors.js +46 -0
  33. package/src/analyze/ruby/extractors.js +258 -0
  34. package/src/analyze/ruby/index.js +51 -0
  35. package/src/analyze/ruby/traversal.js +123 -0
  36. package/src/analyze/ruby/types.js +30 -0
  37. package/src/analyze/ruby/visitor.js +66 -0
  38. package/src/analyze/typescript/constants.js +109 -0
  39. package/src/analyze/typescript/detectors/analytics-source.js +120 -0
  40. package/src/analyze/typescript/detectors/index.js +10 -0
  41. package/src/analyze/typescript/extractors/event-extractor.js +269 -0
  42. package/src/analyze/typescript/extractors/index.js +14 -0
  43. package/src/analyze/typescript/extractors/property-extractor.js +395 -0
  44. package/src/analyze/typescript/index.js +48 -0
  45. package/src/analyze/typescript/parser.js +131 -0
  46. package/src/analyze/typescript/utils/function-finder.js +114 -0
  47. package/src/analyze/typescript/utils/type-resolver.js +193 -0
  48. package/src/generateDescriptions/index.js +81 -0
  49. package/src/generateDescriptions/llmUtils.js +33 -0
  50. package/src/generateDescriptions/promptUtils.js +62 -0
  51. package/src/generateDescriptions/schemaUtils.js +61 -0
  52. package/src/index.js +7 -2
  53. package/src/{fileProcessor.js → utils/fileProcessor.js} +5 -0
  54. package/src/{repoDetails.js → utils/repoDetails.js} +5 -0
  55. package/src/{yamlGenerator.js → utils/yamlGenerator.js} +5 -0
  56. package/src/analyze/analyzeGoFile.js +0 -1164
  57. package/src/analyze/analyzeJsFile.js +0 -87
  58. package/src/analyze/analyzePythonFile.js +0 -42
  59. package/src/analyze/analyzeRubyFile.js +0 -419
  60. package/src/analyze/analyzeTsFile.js +0 -192
  61. package/src/analyze/go2json.js +0 -1069
  62. package/src/analyze/helpers.js +0 -656
  63. package/src/analyze/pythonTrackingAnalyzer.py +0 -541
  64. package/src/generateDescriptions.js +0 -196
@@ -1,87 +0,0 @@
1
- const fs = require('fs');
2
- const acorn = require('acorn');
3
- const jsx = require('acorn-jsx');
4
- const walk = require('acorn-walk');
5
- const { extend } = require('acorn-jsx-walk');
6
- const { detectSourceJs, findWrappingFunctionJs, extractJsProperties } = require('./helpers');
7
-
8
- const parser = acorn.Parser.extend(jsx());
9
- const parserOptions = { ecmaVersion: 'latest', sourceType: 'module', locations: true };
10
- extend(walk.base);
11
-
12
- function analyzeJsFile(filePath, customFunction) {
13
- let events = [];
14
- try {
15
- const code = fs.readFileSync(filePath, 'utf8');
16
- let ast;
17
- try {
18
- ast = parser.parse(code, parserOptions);
19
- } catch (parseError) {
20
- console.error(`Error parsing file ${filePath}`);
21
- return events; // Return empty events array if parsing fails
22
- }
23
-
24
- walk.ancestor(ast, {
25
- CallExpression(node, ancestors) {
26
- try {
27
- const source = detectSourceJs(node, customFunction);
28
- if (source === 'unknown') return;
29
-
30
- let eventName = null;
31
- let propertiesNode = null;
32
-
33
- if (source === 'googleanalytics' && node.arguments.length >= 3) {
34
- eventName = node.arguments[1]?.value || null;
35
- propertiesNode = node.arguments[2];
36
- } else if (source === 'snowplow' && node.arguments.length > 0) {
37
- // Snowplow pattern: tracker.track(buildStructEvent({...}))
38
- const firstArg = node.arguments[0];
39
- if (firstArg.type === 'CallExpression' && firstArg.arguments.length > 0) {
40
- const structEventArg = firstArg.arguments[0];
41
- if (structEventArg.type === 'ObjectExpression') {
42
- const actionProperty = structEventArg.properties.find(prop => prop.key.name === 'action');
43
- eventName = actionProperty ? actionProperty.value.value : null;
44
- propertiesNode = structEventArg;
45
- }
46
- }
47
- } else if (source === 'mparticle' && node.arguments.length >= 3) {
48
- eventName = node.arguments[0]?.value || null;
49
- propertiesNode = node.arguments[2];
50
- } else if (node.arguments.length >= 2) {
51
- eventName = node.arguments[0]?.value || null;
52
- propertiesNode = node.arguments[1];
53
- }
54
-
55
- const line = node.loc.start.line;
56
- const functionName = findWrappingFunctionJs(node, ancestors);
57
-
58
- if (eventName && propertiesNode && propertiesNode.type === 'ObjectExpression') {
59
- let properties = extractJsProperties(propertiesNode);
60
-
61
- // For Snowplow, remove 'action' from properties since it's used as the event name
62
- if (source === 'snowplow' && properties.action) {
63
- delete properties.action;
64
- }
65
-
66
- events.push({
67
- eventName,
68
- source,
69
- properties,
70
- filePath,
71
- line,
72
- functionName
73
- });
74
- }
75
- } catch (nodeError) {
76
- console.error(`Error processing node in ${filePath}`);
77
- }
78
- },
79
- });
80
- } catch (fileError) {
81
- console.error(`Error reading or processing file ${filePath}`);
82
- }
83
-
84
- return events;
85
- }
86
-
87
- module.exports = { analyzeJsFile };
@@ -1,42 +0,0 @@
1
- const fs = require('fs');
2
- const path = require('path');
3
-
4
- let pyodide = null;
5
-
6
- async function initPyodide() {
7
- if (!pyodide) {
8
- const { loadPyodide } = await import('pyodide');
9
- pyodide = await loadPyodide();
10
- await pyodide.loadPackagesFromImports('import ast, json');
11
- }
12
- return pyodide;
13
- }
14
-
15
- async function analyzePythonFile(filePath, customFunction) {
16
- try {
17
- const code = fs.readFileSync(filePath, 'utf8');
18
- const py = await initPyodide();
19
-
20
- // Read the Python analyzer code
21
- const analyzerPath = path.join(__dirname, 'pythonTrackingAnalyzer.py');
22
- const analyzerCode = fs.readFileSync(analyzerPath, 'utf8');
23
-
24
- // Add file content and analyzer code to Python environment
25
- py.globals.set('code', code);
26
- py.globals.set('filepath', filePath);
27
- py.globals.set('custom_function', customFunction || null);
28
- py.globals.set('__name__', null);
29
-
30
- // Run the Python analyzer
31
- py.runPython(analyzerCode);
32
- const result = py.runPython('analyze_python_code(code, filepath, custom_function)');
33
- const events = JSON.parse(result);
34
-
35
- return events;
36
- } catch (error) {
37
- console.error(`Error analyzing Python file ${filePath}:`, error);
38
- return [];
39
- }
40
- }
41
-
42
- module.exports = { analyzePythonFile };
@@ -1,419 +0,0 @@
1
- const fs = require('fs');
2
-
3
- let parse = null;
4
-
5
- // Create a visitor to traverse the AST
6
- class TrackingVisitor {
7
- constructor(code, filePath, customFunction=null) {
8
- this.code = code;
9
- this.lines = code.split('\n');
10
- this.ancestors = [];
11
- this.events = [];
12
- this.filePath = filePath;
13
- this.customFunction = customFunction;
14
- }
15
-
16
- getLineNumber(location) {
17
- // Count the number of newlines before the start offset
18
- const beforeStart = this.code.slice(0, location.startOffset);
19
- return beforeStart.split('\n').length;
20
- }
21
-
22
- async findWrappingFunction(node, ancestors) {
23
- const { DefNode, BlockNode, LambdaNode } = await import('@ruby/prism');
24
-
25
- for (let i = ancestors.length - 1; i >= 0; i--) {
26
- const current = ancestors[i];
27
-
28
- // Handle method definitions
29
- if (current instanceof DefNode) {
30
- return current.name;
31
- }
32
-
33
- // Handle blocks and lambdas
34
- if (current instanceof BlockNode || current instanceof LambdaNode) {
35
- return 'block';
36
- }
37
- }
38
- return 'global';
39
- }
40
-
41
- detectSource(node) {
42
- if (!node) return null;
43
-
44
- // Check for analytics libraries
45
- if (node.receiver) {
46
- const objectName = node.receiver.name;
47
- const methodName = node.name;
48
-
49
- // Segment and Rudderstack (both use similar format)
50
- // Analytics.track (Segment) or analytics.track (Rudderstack)
51
- if ((objectName === 'Analytics' || objectName === 'analytics') && methodName === 'track') {
52
- // Try to determine if it's Rudderstack based on context
53
- // For now, we'll treat lowercase 'analytics' as Rudderstack
54
- return objectName === 'analytics' ? 'rudderstack' : 'segment';
55
- }
56
-
57
- // Mixpanel (Ruby SDK uses Mixpanel::Tracker instance)
58
- if (methodName === 'track' && objectName === 'tracker') return 'mixpanel';
59
-
60
- // PostHog
61
- if (objectName === 'posthog' && methodName === 'capture') return 'posthog';
62
- }
63
-
64
- // Snowplow (typically tracker.track_struct_event)
65
- if (node.name === 'track_struct_event') return 'snowplow';
66
-
67
- // Custom tracking function
68
- if (this.customFunction && node.name === this.customFunction) return 'custom';
69
-
70
- return null;
71
- }
72
-
73
- extractEventName(node, source) {
74
- if (source === 'segment' || source === 'rudderstack') {
75
- // Both Segment and Rudderstack use the same format
76
- const params = node.arguments_.arguments_[0].elements;
77
- const eventProperty = params.find(param => param?.key?.unescaped?.value === 'event');
78
- return eventProperty?.value?.unescaped?.value || null;
79
- }
80
-
81
- if (source === 'mixpanel') {
82
- // Mixpanel Ruby SDK format: tracker.track('distinct_id', 'event_name', {...})
83
- const args = node.arguments_.arguments_;
84
- if (args && args.length > 1 && args[1]?.unescaped?.value) {
85
- return args[1].unescaped.value;
86
- }
87
- }
88
-
89
- if (source === 'posthog') {
90
- // PostHog Ruby SDK format: posthog.capture({distinct_id: '...', event: '...', properties: {...}})
91
- const hashArg = node.arguments_.arguments_[0];
92
- if (hashArg && hashArg.elements) {
93
- const eventProperty = hashArg.elements.find(elem => elem?.key?.unescaped?.value === 'event');
94
- return eventProperty?.value?.unescaped?.value || null;
95
- }
96
- }
97
-
98
- if (source === 'snowplow') {
99
- // Snowplow Ruby SDK: tracker.track_struct_event(category: '...', action: '...', ...)
100
- const params = node.arguments_.arguments_[0].elements;
101
- const actionProperty = params.find(param => param?.key?.unescaped?.value === 'action');
102
- return actionProperty?.value?.unescaped?.value || null;
103
- }
104
-
105
- if (source === 'custom') {
106
- // Custom function format: customFunction('event_name', {...})
107
- const args = node.arguments_.arguments_;
108
- if (args && args.length > 0 && args[0]?.unescaped?.value) {
109
- return args[0].unescaped.value;
110
- }
111
- }
112
-
113
- return null;
114
- }
115
-
116
- async extractProperties(node, source) {
117
- const { HashNode, ArrayNode } = await import('@ruby/prism');
118
-
119
- if (source === 'segment' || source === 'rudderstack') {
120
- // Both Segment and Rudderstack use the same format
121
- const params = node.arguments_.arguments_[0].elements;
122
- const properties = {};
123
-
124
- // Process all top-level fields except 'event'
125
- for (const param of params) {
126
- const key = param?.key?.unescaped?.value;
127
-
128
- if (key && key !== 'event') {
129
- const value = param?.value;
130
-
131
- if (key === 'properties' && value instanceof HashNode) {
132
- // Merge properties from the 'properties' hash into the top level
133
- const nestedProperties = await this.extractHashProperties(value);
134
- Object.assign(properties, nestedProperties);
135
- } else if (value instanceof HashNode) {
136
- // Handle other nested hash objects
137
- const hashProperties = await this.extractHashProperties(value);
138
- properties[key] = {
139
- type: 'object',
140
- properties: hashProperties
141
- };
142
- } else if (value instanceof ArrayNode) {
143
- // Handle arrays
144
- const arrayItems = await this.extractArrayItemProperties(value);
145
- properties[key] = {
146
- type: 'array',
147
- items: arrayItems
148
- };
149
- } else {
150
- // Handle primitive values
151
- const valueType = await this.getValueType(value);
152
- properties[key] = {
153
- type: valueType
154
- };
155
- }
156
- }
157
- }
158
-
159
- return properties;
160
- }
161
-
162
- if (source === 'mixpanel') {
163
- // Mixpanel Ruby SDK: tracker.track('distinct_id', 'event_name', {properties})
164
- const args = node.arguments_.arguments_;
165
- const properties = {};
166
-
167
- // Add distinct_id as property (even if it's a variable)
168
- if (args && args.length > 0) {
169
- properties.distinct_id = {
170
- type: await this.getValueType(args[0])
171
- };
172
- }
173
-
174
- // Extract properties from third argument if it exists
175
- if (args && args.length > 2 && args[2] instanceof HashNode) {
176
- const propsHash = await this.extractHashProperties(args[2]);
177
- Object.assign(properties, propsHash);
178
- }
179
-
180
- return properties;
181
- }
182
-
183
- if (source === 'posthog') {
184
- // PostHog Ruby SDK: posthog.capture({distinct_id: '...', event: '...', properties: {...}})
185
- const hashArg = node.arguments_.arguments_[0];
186
- const properties = {};
187
-
188
- if (hashArg && hashArg.elements) {
189
- // Extract distinct_id if present
190
- const distinctIdProperty = hashArg.elements.find(elem => elem?.key?.unescaped?.value === 'distinct_id');
191
- if (distinctIdProperty?.value) {
192
- properties.distinct_id = {
193
- type: await this.getValueType(distinctIdProperty.value)
194
- };
195
- }
196
-
197
- // Extract properties
198
- const propsProperty = hashArg.elements.find(elem => elem?.key?.unescaped?.value === 'properties');
199
- if (propsProperty?.value instanceof HashNode) {
200
- const props = await this.extractHashProperties(propsProperty.value);
201
- Object.assign(properties, props);
202
- }
203
- }
204
-
205
- return properties;
206
- }
207
-
208
- if (source === 'snowplow') {
209
- // Snowplow Ruby SDK: tracker.track_struct_event(category: '...', action: '...', ...)
210
- const params = node.arguments_.arguments_[0].elements;
211
- const properties = {};
212
-
213
- // Extract all struct event parameters except 'action' (which is used as the event name)
214
- for (const param of params) {
215
- const key = param?.key?.unescaped?.value;
216
- if (key && key !== 'action') {
217
- properties[key] = {
218
- type: await this.getValueType(param.value)
219
- };
220
- }
221
- }
222
-
223
- return properties;
224
- }
225
-
226
- if (source === 'custom') {
227
- // Custom function format: customFunction('event_name', {properties})
228
- const args = node.arguments_.arguments_;
229
- if (args && args.length > 1 && args[1] instanceof HashNode) {
230
- return await this.extractHashProperties(args[1]);
231
- }
232
- }
233
-
234
- return null;
235
- }
236
-
237
- async extractHashProperties(hashNode) {
238
- const { AssocNode, HashNode, ArrayNode } = await import('@ruby/prism');
239
- const properties = {};
240
-
241
- for (const element of hashNode.elements) {
242
- if (element instanceof AssocNode) {
243
- const key = element.key.unescaped?.value;
244
- const value = element.value;
245
-
246
- if (key) {
247
- if (value instanceof HashNode) {
248
- // Handle nested hash objects
249
- const nestedProperties = await this.extractHashProperties(value);
250
- properties[key] = {
251
- type: 'object',
252
- properties: nestedProperties
253
- };
254
- } else if (value instanceof ArrayNode) {
255
- // Handle arrays
256
- const items = await this.extractArrayItemProperties(value);
257
- properties[key] = {
258
- type: 'array',
259
- items
260
- };
261
- } else {
262
- // Handle primitive values
263
- const valueType = await this.getValueType(value);
264
- properties[key] = {
265
- type: valueType
266
- };
267
- }
268
- }
269
- }
270
- }
271
-
272
- return properties;
273
- }
274
-
275
- async extractArrayItemProperties(arrayNode) {
276
- const { HashNode } = await import('@ruby/prism');
277
-
278
- if (arrayNode.elements.length === 0) {
279
- return { type: 'any' };
280
- }
281
-
282
- const firstItem = arrayNode.elements[0];
283
- if (firstItem instanceof HashNode) {
284
- return {
285
- type: 'object',
286
- properties: this.extractHashProperties(firstItem)
287
- };
288
- } else {
289
- const valueType = await this.getValueType(firstItem);
290
- return {
291
- type: valueType
292
- };
293
- }
294
- }
295
-
296
- async getValueType(node) {
297
- const { StringNode, IntegerNode, FloatNode, TrueNode, FalseNode, NilNode, SymbolNode, CallNode } = await import('@ruby/prism');
298
-
299
- if (node instanceof StringNode) return 'string';
300
- if (node instanceof IntegerNode || node instanceof FloatNode) return 'number';
301
- if (node instanceof TrueNode || node instanceof FalseNode) return 'boolean';
302
- if (node instanceof NilNode) return 'null';
303
- if (node instanceof SymbolNode) return 'string';
304
- if (node instanceof CallNode) return 'any'; // Dynamic values
305
- return 'any'; // Default type
306
- }
307
-
308
- async visit(node) {
309
- const { CallNode, ProgramNode, StatementsNode, DefNode, IfNode, BlockNode, ArgumentsNode, HashNode, AssocNode, ClassNode, ModuleNode } = await import('@ruby/prism');
310
- if (!node) return;
311
-
312
- this.ancestors.push(node);
313
-
314
- // Check if this is a tracking call
315
- if (node instanceof CallNode) {
316
- try {
317
- const source = this.detectSource(node);
318
- const eventName = this.extractEventName(node, source);
319
-
320
- if (!source || !eventName) {
321
- this.ancestors.pop();
322
- return;
323
- }
324
-
325
- const line = this.getLineNumber(node.location);
326
- const functionName = await this.findWrappingFunction(node, this.ancestors);
327
- const properties = await this.extractProperties(node, source);
328
-
329
- this.events.push({
330
- eventName,
331
- source,
332
- properties,
333
- filePath: this.filePath,
334
- line,
335
- functionName
336
- });
337
- } catch (nodeError) {
338
- console.error(`Error processing node in ${this.filePath}`);
339
- }
340
- }
341
-
342
- // Visit all child nodes
343
- if (node instanceof ProgramNode) {
344
- await this.visit(node.statements);
345
- } else if (node instanceof StatementsNode) {
346
- for (const child of node.body) {
347
- await this.visit(child);
348
- }
349
- } else if (node instanceof ClassNode) {
350
- if (node.body) {
351
- await this.visit(node.body);
352
- }
353
- } else if (node instanceof ModuleNode) {
354
- if (node.body) {
355
- await this.visit(node.body);
356
- }
357
- } else if (node instanceof DefNode) {
358
- if (node.body) {
359
- await this.visit(node.body);
360
- }
361
- } else if (node instanceof IfNode) {
362
- if (node.statements) {
363
- await this.visit(node.statements);
364
- }
365
- if (node.subsequent) {
366
- await this.visit(node.subsequent);
367
- }
368
- } else if (node instanceof BlockNode) {
369
- if (node.body) {
370
- await this.visit(node.body);
371
- }
372
- } else if (node instanceof ArgumentsNode) {
373
- for (const arg of node.arguments) {
374
- await this.visit(arg);
375
- }
376
- } else if (node instanceof HashNode) {
377
- for (const element of node.elements) {
378
- await this.visit(element);
379
- }
380
- } else if (node instanceof AssocNode) {
381
- await this.visit(node.key);
382
- await this.visit(node.value);
383
- }
384
-
385
- this.ancestors.pop();
386
- }
387
- }
388
-
389
- async function analyzeRubyFile(filePath, customFunction) {
390
- // Lazy load the ruby prism parser
391
- if (!parse) {
392
- const { loadPrism } = await import('@ruby/prism');
393
- parse = await loadPrism();
394
- }
395
-
396
- try {
397
- const code = fs.readFileSync(filePath, 'utf8');
398
- let ast;
399
- try {
400
- ast = await parse(code);
401
- } catch (parseError) {
402
- console.error(`Error parsing file ${filePath}`);
403
- return []; // Return empty events array if parsing fails
404
- }
405
-
406
- // Traverse the AST starting from the program node
407
- const visitor = new TrackingVisitor(code, filePath, customFunction);
408
- await visitor.visit(ast.value);
409
-
410
- return visitor.events;
411
-
412
- } catch (fileError) {
413
- console.error(`Error reading or processing file ${filePath}`);
414
- }
415
-
416
- return [];
417
- }
418
-
419
- module.exports = { analyzeRubyFile };