@flisk/analyze-tracking 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/bin/cli.js +1 -1
  2. package/package.json +9 -7
  3. package/src/analyze/go/astTraversal.js +121 -0
  4. package/src/analyze/go/constants.js +20 -0
  5. package/src/analyze/go/eventDeduplicator.js +47 -0
  6. package/src/analyze/go/eventExtractor.js +156 -0
  7. package/src/analyze/go/goAstParser/constants.js +39 -0
  8. package/src/analyze/go/goAstParser/expressionParser.js +281 -0
  9. package/src/analyze/go/goAstParser/index.js +52 -0
  10. package/src/analyze/go/goAstParser/statementParser.js +387 -0
  11. package/src/analyze/go/goAstParser/tokenizer.js +196 -0
  12. package/src/analyze/go/goAstParser/typeParser.js +202 -0
  13. package/src/analyze/go/goAstParser/utils.js +99 -0
  14. package/src/analyze/go/index.js +55 -0
  15. package/src/analyze/go/propertyExtractor.js +670 -0
  16. package/src/analyze/go/trackingDetector.js +71 -0
  17. package/src/analyze/go/trackingExtractor.js +54 -0
  18. package/src/analyze/go/typeContext.js +88 -0
  19. package/src/analyze/go/utils.js +215 -0
  20. package/src/analyze/index.js +11 -6
  21. package/src/analyze/javascript/constants.js +115 -0
  22. package/src/analyze/javascript/detectors/analytics-source.js +119 -0
  23. package/src/analyze/javascript/detectors/index.js +10 -0
  24. package/src/analyze/javascript/extractors/event-extractor.js +179 -0
  25. package/src/analyze/javascript/extractors/index.js +13 -0
  26. package/src/analyze/javascript/extractors/property-extractor.js +172 -0
  27. package/src/analyze/javascript/index.js +38 -0
  28. package/src/analyze/javascript/parser.js +126 -0
  29. package/src/analyze/javascript/utils/function-finder.js +123 -0
  30. package/src/analyze/python/index.js +111 -0
  31. package/src/analyze/python/pythonTrackingAnalyzer.py +814 -0
  32. package/src/analyze/ruby/detectors.js +46 -0
  33. package/src/analyze/ruby/extractors.js +258 -0
  34. package/src/analyze/ruby/index.js +51 -0
  35. package/src/analyze/ruby/traversal.js +123 -0
  36. package/src/analyze/ruby/types.js +30 -0
  37. package/src/analyze/ruby/visitor.js +66 -0
  38. package/src/analyze/typescript/constants.js +109 -0
  39. package/src/analyze/typescript/detectors/analytics-source.js +120 -0
  40. package/src/analyze/typescript/detectors/index.js +10 -0
  41. package/src/analyze/typescript/extractors/event-extractor.js +269 -0
  42. package/src/analyze/typescript/extractors/index.js +14 -0
  43. package/src/analyze/typescript/extractors/property-extractor.js +395 -0
  44. package/src/analyze/typescript/index.js +48 -0
  45. package/src/analyze/typescript/parser.js +131 -0
  46. package/src/analyze/typescript/utils/function-finder.js +114 -0
  47. package/src/analyze/typescript/utils/type-resolver.js +193 -0
  48. package/src/generateDescriptions/index.js +81 -0
  49. package/src/generateDescriptions/llmUtils.js +33 -0
  50. package/src/generateDescriptions/promptUtils.js +62 -0
  51. package/src/generateDescriptions/schemaUtils.js +61 -0
  52. package/src/index.js +7 -2
  53. package/src/{fileProcessor.js → utils/fileProcessor.js} +5 -0
  54. package/src/{repoDetails.js → utils/repoDetails.js} +5 -0
  55. package/src/{yamlGenerator.js → utils/yamlGenerator.js} +5 -0
  56. package/src/analyze/analyzeGoFile.js +0 -1164
  57. package/src/analyze/analyzeJsFile.js +0 -87
  58. package/src/analyze/analyzePythonFile.js +0 -42
  59. package/src/analyze/analyzeRubyFile.js +0 -419
  60. package/src/analyze/analyzeTsFile.js +0 -192
  61. package/src/analyze/go2json.js +0 -1069
  62. package/src/analyze/helpers.js +0 -656
  63. package/src/analyze/pythonTrackingAnalyzer.py +0 -541
  64. package/src/generateDescriptions.js +0 -196
package/bin/cli.js CHANGED
@@ -31,7 +31,7 @@ const optionDefinitions = [
31
31
  name: 'model',
32
32
  alias: 'm',
33
33
  type: String,
34
- defaultValue: 'gpt-4o-mini',
34
+ defaultValue: 'gpt-4.1-nano',
35
35
  },
36
36
  {
37
37
  name: 'output',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flisk/analyze-tracking",
3
- "version": "0.7.2",
3
+ "version": "0.7.3",
4
4
  "description": "Analyzes tracking code in a project and generates data schemas",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -8,13 +8,15 @@
8
8
  },
9
9
  "scripts": {
10
10
  "test": "node tests",
11
- "test:js": "node --test tests/analyzeJsFile.test.js",
12
- "test:ts": "node --test tests/analyzeTsFile.test.js",
13
- "test:python": "node --experimental-vm-modules --test tests/analyzePythonFile.test.js",
14
- "test:ruby": "node --experimental-vm-modules --test tests/analyzeRubyFile.test.js",
15
- "test:go": "node --test tests/analyzeGoFile.test.js",
11
+ "test:js": "node --test tests/analyzeJavaScript.test.js",
12
+ "test:ts": "node --test tests/analyzeTypeScript.test.js",
13
+ "test:python": "node --experimental-vm-modules --test tests/analyzePython.test.js",
14
+ "test:ruby": "node --experimental-vm-modules --test tests/analyzeRuby.test.js",
15
+ "test:go": "node --test tests/analyzeGo.test.js",
16
16
  "test:cli": "node --test tests/cli.test.js",
17
- "test:schema": "node --test tests/schema.test.js"
17
+ "test:schema": "node --test tests/schema.test.js",
18
+ "test:generateDescriptions": "node --test tests/generateDescriptions.test.js",
19
+ "test:utils": "node --test tests/utils.test.js"
18
20
  },
19
21
  "files": [
20
22
  "bin",
@@ -0,0 +1,121 @@
1
+ /**
2
+ * @fileoverview AST traversal utilities for Go code analysis
3
+ * @module analyze/go/astTraversal
4
+ */
5
+
6
+ const { MAX_RECURSION_DEPTH } = require('./constants');
7
+ const { extractTrackingEvent } = require('./trackingExtractor');
8
+
9
+ /**
10
+ * Extract events from a body of statements
11
+ * @param {Array<Object>} body - Array of AST statement nodes to process
12
+ * @param {Array<Object>} events - Array to collect found tracking events (modified in place)
13
+ * @param {string} filePath - Path to the file being analyzed
14
+ * @param {string} functionName - Name of the current function being processed
15
+ * @param {string|null} customFunction - Name of custom tracking function to detect
16
+ * @param {Object} typeContext - Type information context for variable resolution
17
+ * @param {string} currentFunction - Current function context for type lookups
18
+ */
19
+ function extractEventsFromBody(body, events, filePath, functionName, customFunction, typeContext, currentFunction) {
20
+ for (const stmt of body) {
21
+ if (stmt.tag === 'exec' && stmt.expr) {
22
+ processExpression(stmt.expr, events, filePath, functionName, customFunction, typeContext, currentFunction);
23
+ } else if (stmt.tag === 'declare' && stmt.value) {
24
+ // Handle variable declarations with tracking calls
25
+ processExpression(stmt.value, events, filePath, functionName, customFunction, typeContext, currentFunction);
26
+ } else if (stmt.tag === 'assign' && stmt.rhs) {
27
+ // Handle assignments with tracking calls
28
+ processExpression(stmt.rhs, events, filePath, functionName, customFunction, typeContext, currentFunction);
29
+ } else if (stmt.tag === 'if' && stmt.body) {
30
+ extractEventsFromBody(stmt.body, events, filePath, functionName, customFunction, typeContext, currentFunction);
31
+ } else if (stmt.tag === 'elseif' && stmt.body) {
32
+ extractEventsFromBody(stmt.body, events, filePath, functionName, customFunction, typeContext, currentFunction);
33
+ } else if (stmt.tag === 'else' && stmt.body) {
34
+ extractEventsFromBody(stmt.body, events, filePath, functionName, customFunction, typeContext, currentFunction);
35
+ } else if (stmt.tag === 'for' && stmt.body) {
36
+ extractEventsFromBody(stmt.body, events, filePath, functionName, customFunction, typeContext, currentFunction);
37
+ } else if (stmt.tag === 'foreach' && stmt.body) {
38
+ extractEventsFromBody(stmt.body, events, filePath, functionName, customFunction, typeContext, currentFunction);
39
+ } else if (stmt.tag === 'switch' && stmt.cases) {
40
+ for (const caseNode of stmt.cases) {
41
+ if (caseNode.body) {
42
+ extractEventsFromBody(caseNode.body, events, filePath, functionName, customFunction, typeContext, currentFunction);
43
+ }
44
+ }
45
+ }
46
+ }
47
+ }
48
+
49
+ /**
50
+ * Process an expression to find tracking calls
51
+ * @param {Object} expr - AST expression node to process
52
+ * @param {Array<Object>} events - Array to collect found tracking events (modified in place)
53
+ * @param {string} filePath - Path to the file being analyzed
54
+ * @param {string} functionName - Name of the current function being processed
55
+ * @param {string|null} customFunction - Name of custom tracking function to detect
56
+ * @param {Object} typeContext - Type information context for variable resolution
57
+ * @param {string} currentFunction - Current function context for type lookups
58
+ * @param {number} [depth=0] - Current recursion depth (used to prevent infinite recursion)
59
+ */
60
+ function processExpression(expr, events, filePath, functionName, customFunction, typeContext, currentFunction, depth = 0) {
61
+ if (!expr || depth > MAX_RECURSION_DEPTH) return; // Prevent infinite recursion with depth limit
62
+
63
+ // Handle array of expressions
64
+ if (Array.isArray(expr)) {
65
+ for (const item of expr) {
66
+ processExpression(item, events, filePath, functionName, customFunction, typeContext, currentFunction, depth + 1);
67
+ }
68
+ return;
69
+ }
70
+
71
+ // Handle single expression with body
72
+ if (expr.body) {
73
+ for (const item of expr.body) {
74
+ processExpression(item, events, filePath, functionName, customFunction, typeContext, currentFunction, depth + 1);
75
+ }
76
+ return;
77
+ }
78
+
79
+ // Handle specific node types
80
+ if (expr.tag === 'call') {
81
+ const trackingCall = extractTrackingEvent(expr, filePath, functionName, customFunction, typeContext, currentFunction);
82
+ if (trackingCall) {
83
+ events.push(trackingCall);
84
+ }
85
+
86
+ // Also process call arguments
87
+ if (expr.args) {
88
+ processExpression(expr.args, events, filePath, functionName, customFunction, typeContext, currentFunction, depth + 1);
89
+ }
90
+ } else if (expr.tag === 'structlit') {
91
+ // Check if this struct literal is a tracking event
92
+ const trackingCall = extractTrackingEvent(expr, filePath, functionName, customFunction, typeContext, currentFunction);
93
+ if (trackingCall) {
94
+ events.push(trackingCall);
95
+ }
96
+
97
+ // Process fields (but don't recurse into field values for tracking structs)
98
+ if (!trackingCall && expr.fields) {
99
+ for (const field of expr.fields) {
100
+ if (field.value) {
101
+ processExpression(field.value, events, filePath, functionName, customFunction, typeContext, currentFunction, depth + 1);
102
+ }
103
+ }
104
+ }
105
+ }
106
+
107
+ // Process other common properties that might contain expressions
108
+ if (expr.value && expr.tag !== 'structlit') {
109
+ processExpression(expr.value, events, filePath, functionName, customFunction, typeContext, currentFunction, depth + 1);
110
+ }
111
+ if (expr.lhs) {
112
+ processExpression(expr.lhs, events, filePath, functionName, customFunction, typeContext, currentFunction, depth + 1);
113
+ }
114
+ if (expr.rhs) {
115
+ processExpression(expr.rhs, events, filePath, functionName, customFunction, typeContext, currentFunction, depth + 1);
116
+ }
117
+ }
118
+
119
+ module.exports = {
120
+ extractEventsFromBody
121
+ };
@@ -0,0 +1,20 @@
1
+ /**
2
+ * @fileoverview Constants for Go analytics tracking analysis
3
+ * @module analyze/go/constants
4
+ */
5
+
6
+ const ANALYTICS_SOURCES = {
7
+ SEGMENT: 'segment',
8
+ POSTHOG: 'posthog',
9
+ AMPLITUDE: 'amplitude',
10
+ MIXPANEL: 'mixpanel',
11
+ SNOWPLOW: 'snowplow',
12
+ CUSTOM: 'custom'
13
+ };
14
+
15
+ const MAX_RECURSION_DEPTH = 20;
16
+
17
+ module.exports = {
18
+ ANALYTICS_SOURCES,
19
+ MAX_RECURSION_DEPTH
20
+ };
@@ -0,0 +1,47 @@
1
+ /**
2
+ * @fileoverview Event deduplication utilities for Go analytics tracking
3
+ * @module analyze/go/eventDeduplicator
4
+ */
5
+
6
+ const { ANALYTICS_SOURCES } = require('./constants');
7
+
8
+ /**
9
+ * Deduplicate events based on eventName, source, and function
10
+ * For Amplitude, prefer struct literal line numbers over function call line numbers
11
+ * @param {Array<Object>} events - Array of tracking events to deduplicate
12
+ * @returns {Array<Object>} Array of unique tracking events
13
+ */
14
+ function deduplicateEvents(events) {
15
+ const uniqueEvents = [];
16
+ const seen = new Set();
17
+
18
+ for (const event of events) {
19
+ // For Amplitude, we want to keep the line number from the struct literal
20
+ // For other sources, we can use any line number since they don't have this issue
21
+ const key = `${event.eventName}:${event.source}:${event.functionName}`;
22
+ if (!seen.has(key)) {
23
+ seen.add(key);
24
+ uniqueEvents.push(event);
25
+ } else {
26
+ // If we've seen this event before and it's Amplitude, check if this is the struct literal version
27
+ const existingEvent = uniqueEvents.find(e =>
28
+ e.eventName === event.eventName &&
29
+ e.source === event.source &&
30
+ e.functionName === event.functionName
31
+ );
32
+
33
+ // If this is Amplitude and the existing event is from the function call (higher line number),
34
+ // replace it with this one (from the struct literal)
35
+ if (event.source === ANALYTICS_SOURCES.AMPLITUDE && existingEvent && existingEvent.line > event.line) {
36
+ const index = uniqueEvents.indexOf(existingEvent);
37
+ uniqueEvents[index] = event;
38
+ }
39
+ }
40
+ }
41
+
42
+ return uniqueEvents;
43
+ }
44
+
45
+ module.exports = {
46
+ deduplicateEvents
47
+ };
@@ -0,0 +1,156 @@
1
+ /**
2
+ * @fileoverview Event extraction logic for Go analytics tracking
3
+ * @module analyze/go/eventExtractor
4
+ */
5
+
6
+ const { ANALYTICS_SOURCES } = require('./constants');
7
+ const { extractStringValue, findStructLiteral, findStructField, extractSnowplowValue } = require('./utils');
8
+
9
+ /**
10
+ * Extract event name from a tracking call based on the source
11
+ * @param {Object} callNode - AST node representing a function call or struct literal
12
+ * @param {string} source - Analytics source (e.g., 'segment', 'amplitude')
13
+ * @returns {string|null} Event name or null if not found
14
+ */
15
+ function extractEventName(callNode, source) {
16
+ if (!callNode.args || callNode.args.length === 0) {
17
+ // For struct literals, we need to check fields instead of args
18
+ if (!callNode.fields || callNode.fields.length === 0) {
19
+ return null;
20
+ }
21
+ }
22
+
23
+ switch (source) {
24
+ case ANALYTICS_SOURCES.MIXPANEL:
25
+ return extractMixpanelEventName(callNode);
26
+
27
+ case ANALYTICS_SOURCES.SEGMENT:
28
+ case ANALYTICS_SOURCES.POSTHOG:
29
+ return extractSegmentPosthogEventName(callNode);
30
+
31
+ case ANALYTICS_SOURCES.AMPLITUDE:
32
+ return extractAmplitudeEventName(callNode);
33
+
34
+ case ANALYTICS_SOURCES.SNOWPLOW:
35
+ return extractSnowplowEventName(callNode);
36
+
37
+ case ANALYTICS_SOURCES.CUSTOM:
38
+ return extractCustomEventName(callNode);
39
+ }
40
+
41
+ return null;
42
+ }
43
+
44
+ /**
45
+ * Extract Mixpanel event name
46
+ * Pattern: mp.Track(ctx, []*mixpanel.Event{mp.NewEvent("event_name", "", props)})
47
+ * @param {Object} callNode - AST node for Mixpanel tracking call
48
+ * @returns {string|null} Event name or null if not found
49
+ */
50
+ function extractMixpanelEventName(callNode) {
51
+ if (callNode.args && callNode.args.length > 1) {
52
+ const arrayArg = callNode.args[1];
53
+ if (arrayArg.tag === 'expr' && arrayArg.body) {
54
+ const arrayLit = arrayArg.body.find(item => item.tag === 'arraylit');
55
+ if (arrayLit && arrayLit.items && arrayLit.items.length > 0) {
56
+ // Each item is an array of tokens that needs to be parsed
57
+ const firstItem = arrayLit.items[0];
58
+ if (Array.isArray(firstItem)) {
59
+ // Look for pattern: mp.NewEvent("event_name", ...)
60
+ for (let i = 0; i < firstItem.length - 4; i++) {
61
+ if (firstItem[i].tag === 'ident' && firstItem[i].value === 'mp' &&
62
+ firstItem[i+1].tag === 'sigil' && firstItem[i+1].value === '.' &&
63
+ firstItem[i+2].tag === 'ident' && firstItem[i+2].value === 'NewEvent' &&
64
+ firstItem[i+3].tag === 'sigil' && firstItem[i+3].value === '(') {
65
+ // Found mp.NewEvent( - next token should be the event name
66
+ if (firstItem[i+4] && firstItem[i+4].tag === 'string') {
67
+ return firstItem[i+4].value.slice(1, -1); // Remove quotes
68
+ }
69
+ }
70
+ }
71
+ }
72
+ }
73
+ }
74
+ }
75
+ return null;
76
+ }
77
+
78
+ /**
79
+ * Extract Segment/PostHog event name
80
+ * Pattern: analytics.Track{Event: "event_name", ...} or posthog.Capture{Event: "event_name", ...}
81
+ * @param {Object} callNode - AST node for Segment/PostHog struct literal
82
+ * @returns {string|null} Event name or null if not found
83
+ */
84
+ function extractSegmentPosthogEventName(callNode) {
85
+ if (callNode.fields) {
86
+ const eventField = findStructField(callNode, 'Event');
87
+ if (eventField) {
88
+ return extractStringValue(eventField.value);
89
+ }
90
+ }
91
+ return null;
92
+ }
93
+
94
+ /**
95
+ * Extract Amplitude event name
96
+ * Pattern: amplitude.Event{EventType: "event_name", ...} or client.Track(amplitude.Event{EventType: "event_name", ...})
97
+ * @param {Object} callNode - AST node for Amplitude tracking call
98
+ * @returns {string|null} Event name or null if not found
99
+ */
100
+ function extractAmplitudeEventName(callNode) {
101
+ // For struct literals: amplitude.Event{EventType: "event_name", ...}
102
+ if (callNode.tag === 'structlit' && callNode.fields) {
103
+ const eventTypeField = findStructField(callNode, 'EventType');
104
+ if (eventTypeField) {
105
+ return extractStringValue(eventTypeField.value);
106
+ }
107
+ }
108
+ // For function calls: client.Track(amplitude.Event{EventType: "event_name", ...})
109
+ else if (callNode.args && callNode.args.length > 0) {
110
+ const eventStruct = findStructLiteral(callNode.args[0]);
111
+ if (eventStruct && eventStruct.fields) {
112
+ const eventTypeField = findStructField(eventStruct, 'EventType');
113
+ if (eventTypeField) {
114
+ return extractStringValue(eventTypeField.value);
115
+ }
116
+ }
117
+ }
118
+ return null;
119
+ }
120
+
121
+ /**
122
+ * Extract Snowplow event name
123
+ * Pattern: tracker.TrackStructEvent(sp.StructuredEvent{Action: sphelp.NewString("event_name"), ...})
124
+ * @param {Object} callNode - AST node for Snowplow tracking call
125
+ * @returns {string|null} Event name or null if not found
126
+ */
127
+ function extractSnowplowEventName(callNode) {
128
+ if (callNode.args && callNode.args.length > 0) {
129
+ const structEvent = findStructLiteral(callNode.args[0]);
130
+ if (structEvent && structEvent.fields) {
131
+ const actionField = findStructField(structEvent, 'Action');
132
+ if (actionField) {
133
+ // Snowplow uses sphelp.NewString("value")
134
+ return extractSnowplowValue(actionField.value);
135
+ }
136
+ }
137
+ }
138
+ return null;
139
+ }
140
+
141
+ /**
142
+ * Extract custom event name
143
+ * Pattern: customFunction("event_name", props)
144
+ * @param {Object} callNode - AST node for custom tracking function call
145
+ * @returns {string|null} Event name or null if not found
146
+ */
147
+ function extractCustomEventName(callNode) {
148
+ if (callNode.args && callNode.args.length > 0) {
149
+ return extractStringValue(callNode.args[0]);
150
+ }
151
+ return null;
152
+ }
153
+
154
+ module.exports = {
155
+ extractEventName
156
+ };
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Constants used by the Go AST parser
3
+ */
4
+
5
+ // Token sigils (operators and delimiters)
6
+ const SIGIL = [
7
+ /*TRIPLE*/ "<<=", ">>=",
8
+ /*DOUBLE*/ "+=", "-=", "*=", "/=", "%=", "++", "--", ":=", "==", "&&", "||", ">=", "<=", "<<", ">>", "&=", "^=", "|=", "!=", "<-",
9
+ /*SINGLE*/ "=", "+", "-", "*", "/", "%", "{", "}", "[", "]", "(", ")", ",", "&", "|", "!", "<", ">", "^", ";", ":"
10
+ ];
11
+
12
+ // Operators (sigils excluding delimiters)
13
+ const OPERATOR = SIGIL.filter(x => !["{", "}", "[", "]", ";", ":=", "="].includes(x));
14
+
15
+ // Character classes
16
+ const DOT = ".";
17
+ const WHITESPACE = " \t";
18
+ const NEWLINE = "\n\r";
19
+ const NUMBER = "01234567890";
20
+ const QUOTE = "\"'`";
21
+
22
+ // Primitive Go types
23
+ const PRIMTYPES = [
24
+ "int", "byte", "bool", "float32", "float64",
25
+ "int8", "int32", "int16", "int64",
26
+ "uint8", "uint32", "uint16", "uint64",
27
+ "rune", "string"
28
+ ];
29
+
30
+ module.exports = {
31
+ SIGIL,
32
+ OPERATOR,
33
+ DOT,
34
+ WHITESPACE,
35
+ NEWLINE,
36
+ NUMBER,
37
+ QUOTE,
38
+ PRIMTYPES
39
+ };