@flisk/analyze-tracking 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -61
- package/bin/cli.js +1 -1
- package/package.json +18 -3
- package/src/analyze/go/astTraversal.js +121 -0
- package/src/analyze/go/constants.js +20 -0
- package/src/analyze/go/eventDeduplicator.js +47 -0
- package/src/analyze/go/eventExtractor.js +156 -0
- package/src/analyze/go/goAstParser/constants.js +39 -0
- package/src/analyze/go/goAstParser/expressionParser.js +281 -0
- package/src/analyze/go/goAstParser/index.js +52 -0
- package/src/analyze/go/goAstParser/statementParser.js +387 -0
- package/src/analyze/go/goAstParser/tokenizer.js +196 -0
- package/src/analyze/go/goAstParser/typeParser.js +202 -0
- package/src/analyze/go/goAstParser/utils.js +99 -0
- package/src/analyze/go/index.js +55 -0
- package/src/analyze/go/propertyExtractor.js +670 -0
- package/src/analyze/go/trackingDetector.js +71 -0
- package/src/analyze/go/trackingExtractor.js +54 -0
- package/src/analyze/go/typeContext.js +88 -0
- package/src/analyze/go/utils.js +215 -0
- package/src/analyze/index.js +11 -7
- package/src/analyze/javascript/constants.js +115 -0
- package/src/analyze/javascript/detectors/analytics-source.js +119 -0
- package/src/analyze/javascript/detectors/index.js +10 -0
- package/src/analyze/javascript/extractors/event-extractor.js +179 -0
- package/src/analyze/javascript/extractors/index.js +13 -0
- package/src/analyze/javascript/extractors/property-extractor.js +172 -0
- package/src/analyze/javascript/index.js +38 -0
- package/src/analyze/javascript/parser.js +126 -0
- package/src/analyze/javascript/utils/function-finder.js +123 -0
- package/src/analyze/python/index.js +111 -0
- package/src/analyze/python/pythonTrackingAnalyzer.py +814 -0
- package/src/analyze/ruby/detectors.js +46 -0
- package/src/analyze/ruby/extractors.js +258 -0
- package/src/analyze/ruby/index.js +51 -0
- package/src/analyze/ruby/traversal.js +123 -0
- package/src/analyze/ruby/types.js +30 -0
- package/src/analyze/ruby/visitor.js +66 -0
- package/src/analyze/typescript/constants.js +109 -0
- package/src/analyze/typescript/detectors/analytics-source.js +120 -0
- package/src/analyze/typescript/detectors/index.js +10 -0
- package/src/analyze/typescript/extractors/event-extractor.js +269 -0
- package/src/analyze/typescript/extractors/index.js +14 -0
- package/src/analyze/typescript/extractors/property-extractor.js +395 -0
- package/src/analyze/typescript/index.js +48 -0
- package/src/analyze/typescript/parser.js +131 -0
- package/src/analyze/typescript/utils/function-finder.js +114 -0
- package/src/analyze/typescript/utils/type-resolver.js +193 -0
- package/src/generateDescriptions/index.js +81 -0
- package/src/generateDescriptions/llmUtils.js +33 -0
- package/src/generateDescriptions/promptUtils.js +62 -0
- package/src/generateDescriptions/schemaUtils.js +61 -0
- package/src/index.js +7 -2
- package/src/{fileProcessor.js → utils/fileProcessor.js} +5 -0
- package/src/{repoDetails.js → utils/repoDetails.js} +5 -0
- package/src/{yamlGenerator.js → utils/yamlGenerator.js} +5 -0
- package/.github/workflows/npm-publish.yml +0 -33
- package/.github/workflows/pr-check.yml +0 -17
- package/jest.config.js +0 -7
- package/src/analyze/analyzeGoFile.js +0 -1164
- package/src/analyze/analyzeJsFile.js +0 -72
- package/src/analyze/analyzePythonFile.js +0 -41
- package/src/analyze/analyzeRubyFile.js +0 -409
- package/src/analyze/analyzeTsFile.js +0 -69
- package/src/analyze/go2json.js +0 -1069
- package/src/analyze/helpers.js +0 -217
- package/src/analyze/pythonTrackingAnalyzer.py +0 -439
- package/src/generateDescriptions.js +0 -196
- package/tests/detectSource.test.js +0 -20
- package/tests/extractProperties.test.js +0 -109
- package/tests/findWrappingFunction.test.js +0 -30
package/src/analyze/helpers.js
DELETED
|
@@ -1,217 +0,0 @@
|
|
|
1
|
-
const ts = require('typescript');
|
|
2
|
-
|
|
3
|
-
function detectSourceJs(node, customFunction) {
|
|
4
|
-
if (!node.callee) return 'unknown';
|
|
5
|
-
|
|
6
|
-
if (node.callee.type === 'Identifier' && node.callee.name === 'gtag') {
|
|
7
|
-
return 'googleanalytics';
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
if (node.callee.type === 'MemberExpression') {
|
|
11
|
-
const objectName = node.callee.object.name;
|
|
12
|
-
const methodName = node.callee.property.name;
|
|
13
|
-
|
|
14
|
-
if (objectName === 'analytics' && methodName === 'track') return 'segment';
|
|
15
|
-
if (objectName === 'mixpanel' && methodName === 'track') return 'mixpanel';
|
|
16
|
-
if (objectName === 'amplitude' && methodName === 'logEvent') return 'amplitude';
|
|
17
|
-
if (objectName === 'rudderanalytics' && methodName === 'track') return 'rudderstack';
|
|
18
|
-
if (objectName === 'mParticle' && methodName === 'logEvent') return 'mparticle';
|
|
19
|
-
if (objectName === 'posthog' && methodName === 'capture') return 'posthog';
|
|
20
|
-
if (objectName === 'pendo' && methodName === 'track') return 'pendo';
|
|
21
|
-
if (objectName === 'heap' && methodName === 'track') return 'heap';
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
if (node.callee.type === 'Identifier' && node.callee.name === 'snowplow') {
|
|
25
|
-
return 'snowplow';
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
if (node.callee.type === 'Identifier' && node.callee.name === customFunction) {
|
|
29
|
-
return 'custom';
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
return 'unknown';
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
function detectSourceTs(node, customFunction) {
|
|
36
|
-
if (!node.expression) return 'unknown';
|
|
37
|
-
|
|
38
|
-
if (ts.isIdentifier(node.expression) && node.expression.escapedText === 'gtag') {
|
|
39
|
-
return 'googleanalytics';
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
if (ts.isPropertyAccessExpression(node.expression)) {
|
|
43
|
-
const objectName = node.expression.expression.escapedText;
|
|
44
|
-
const methodName = node.expression.name.escapedText;
|
|
45
|
-
|
|
46
|
-
if (objectName === 'analytics' && methodName === 'track') return 'segment';
|
|
47
|
-
if (objectName === 'mixpanel' && methodName === 'track') return 'mixpanel';
|
|
48
|
-
if (objectName === 'amplitude' && methodName === 'logEvent') return 'amplitude';
|
|
49
|
-
if (objectName === 'rudderanalytics' && methodName === 'track') return 'rudderstack';
|
|
50
|
-
if (objectName === 'mParticle' && methodName === 'logEvent') return 'mparticle';
|
|
51
|
-
if (objectName === 'posthog' && methodName === 'capture') return 'posthog';
|
|
52
|
-
if (objectName === 'pendo' && methodName === 'track') return 'pendo';
|
|
53
|
-
if (objectName === 'heap' && methodName === 'track') return 'heap';
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
if (ts.isIdentifier(node.expression) && node.expression.escapedText === 'snowplow') {
|
|
57
|
-
return 'snowplow';
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
if (ts.isIdentifier(node.expression) && node.expression.escapedText === customFunction) {
|
|
61
|
-
return 'custom';
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
return 'unknown';
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
function findWrappingFunctionTs(node) {
|
|
68
|
-
let current = node;
|
|
69
|
-
while (current) {
|
|
70
|
-
if (ts.isFunctionDeclaration(current) || ts.isMethodDeclaration(current) || ts.isArrowFunction(current)) {
|
|
71
|
-
return current.name ? current.name.escapedText : 'anonymous';
|
|
72
|
-
}
|
|
73
|
-
current = current.parent;
|
|
74
|
-
}
|
|
75
|
-
return 'global';
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
function findWrappingFunctionJs(node, ancestors) {
|
|
79
|
-
for (let i = ancestors.length - 1; i >= 0; i--) {
|
|
80
|
-
const current = ancestors[i];
|
|
81
|
-
|
|
82
|
-
// Handle direct variable assignments (e.g., const myFunc = () => {})
|
|
83
|
-
if (current.type === 'VariableDeclarator' && current.init === node) {
|
|
84
|
-
return current.id.name;
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
// Handle arrow functions or function expressions assigned to variables
|
|
88
|
-
if (current.type === 'VariableDeclarator' && (current.init.type === 'ArrowFunctionExpression' || current.init.type === 'FunctionExpression')) {
|
|
89
|
-
return current.id.name;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
// Handle named function declarations
|
|
93
|
-
if (current.type === 'FunctionDeclaration') {
|
|
94
|
-
return current.id ? current.id.name : 'anonymous';
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
// Handle exported variable/function (e.g., export const myFunc = () => {})
|
|
98
|
-
if (current.type === 'ExportNamedDeclaration' && current.declaration) {
|
|
99
|
-
const declaration = current.declaration.declarations ? current.declaration.declarations[0] : null;
|
|
100
|
-
if (declaration && (declaration.init.type === 'ArrowFunctionExpression' || declaration.init.type === 'FunctionExpression')) {
|
|
101
|
-
return declaration.id.name;
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
// Handle methods within object literals
|
|
106
|
-
if (current.type === 'Property' && current.value === node) {
|
|
107
|
-
return current.key.name || current.key.value;
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
return 'global';
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
function extractJsProperties(node) {
|
|
114
|
-
const properties = {};
|
|
115
|
-
|
|
116
|
-
node.properties.forEach((prop) => {
|
|
117
|
-
const key = prop.key?.name || prop.key?.value;
|
|
118
|
-
if (key) {
|
|
119
|
-
let valueType = typeof prop.value.value;
|
|
120
|
-
if (prop.value.type === 'ObjectExpression') {
|
|
121
|
-
properties[key] = {
|
|
122
|
-
type: 'object',
|
|
123
|
-
properties: extractJsProperties(prop.value),
|
|
124
|
-
};
|
|
125
|
-
} else {
|
|
126
|
-
if (valueType === 'undefined') {
|
|
127
|
-
valueType = 'any';
|
|
128
|
-
} else if (valueType === 'object') {
|
|
129
|
-
valueType = 'any';
|
|
130
|
-
}
|
|
131
|
-
properties[key] = { type: valueType };
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
});
|
|
135
|
-
|
|
136
|
-
return properties;
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
function extractTsProperties(checker, node) {
|
|
140
|
-
const properties = {};
|
|
141
|
-
|
|
142
|
-
for (const prop of node.properties) {
|
|
143
|
-
const key = !!prop.name ? prop.name.text : (!!prop.key ? (prop.key.text || prop.key.value) : undefined);
|
|
144
|
-
if (!key) continue;
|
|
145
|
-
let valueType = 'any';
|
|
146
|
-
|
|
147
|
-
if (ts.isShorthandPropertyAssignment(prop)) {
|
|
148
|
-
const symbol = checker.getSymbolAtLocation(prop.name);
|
|
149
|
-
if (symbol) {
|
|
150
|
-
valueType = getTypeOfNode(checker, symbol.valueDeclaration);
|
|
151
|
-
properties[key] = { type: valueType };
|
|
152
|
-
}
|
|
153
|
-
} else if (prop.initializer) {
|
|
154
|
-
if (ts.isObjectLiteralExpression(prop.initializer)) {
|
|
155
|
-
properties[key] = {
|
|
156
|
-
type: 'object',
|
|
157
|
-
properties: extractTsProperties(checker, prop.initializer),
|
|
158
|
-
};
|
|
159
|
-
} else if (ts.isArrayLiteralExpression(prop.initializer)) {
|
|
160
|
-
properties[key] = {
|
|
161
|
-
type: 'array',
|
|
162
|
-
items: {
|
|
163
|
-
type: getTypeOfNode(checker, prop.initializer.elements[0]) || 'any',
|
|
164
|
-
},
|
|
165
|
-
};
|
|
166
|
-
} else {
|
|
167
|
-
// Handle hard-coded values
|
|
168
|
-
switch (prop.initializer.kind) {
|
|
169
|
-
case ts.SyntaxKind.StringLiteral:
|
|
170
|
-
valueType = 'string';
|
|
171
|
-
break;
|
|
172
|
-
case ts.SyntaxKind.NumericLiteral:
|
|
173
|
-
valueType = 'number';
|
|
174
|
-
break;
|
|
175
|
-
case ts.SyntaxKind.TrueKeyword:
|
|
176
|
-
case ts.SyntaxKind.FalseKeyword:
|
|
177
|
-
valueType = 'boolean';
|
|
178
|
-
break;
|
|
179
|
-
case ts.SyntaxKind.ArrayLiteralExpression:
|
|
180
|
-
valueType = 'array';
|
|
181
|
-
break;
|
|
182
|
-
case ts.SyntaxKind.ObjectLiteralExpression:
|
|
183
|
-
valueType = 'object';
|
|
184
|
-
break;
|
|
185
|
-
default:
|
|
186
|
-
valueType = 'any';
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
if (valueType === 'any') {
|
|
190
|
-
valueType = getTypeOfNode(checker, prop.initializer) || 'any';
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
properties[key] = { type: valueType };
|
|
194
|
-
}
|
|
195
|
-
} else if (prop.type) {
|
|
196
|
-
valueType = checker.typeToString(checker.getTypeFromTypeNode(prop.type)) || 'any';
|
|
197
|
-
properties[key] = { type: valueType };
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
return properties;
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
function getTypeOfNode(checker, node) {
|
|
205
|
-
const type = checker.getTypeAtLocation(node);
|
|
206
|
-
return checker.typeToString(type);
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
module.exports = {
|
|
210
|
-
detectSourceJs,
|
|
211
|
-
detectSourceTs,
|
|
212
|
-
findWrappingFunctionTs,
|
|
213
|
-
findWrappingFunctionJs,
|
|
214
|
-
extractJsProperties,
|
|
215
|
-
extractTsProperties,
|
|
216
|
-
getTypeOfNode,
|
|
217
|
-
};
|
|
@@ -1,439 +0,0 @@
|
|
|
1
|
-
import ast
|
|
2
|
-
import json
|
|
3
|
-
|
|
4
|
-
class TrackingVisitor(ast.NodeVisitor):
|
|
5
|
-
def __init__(self, filepath, custom_function=None):
|
|
6
|
-
self.events = []
|
|
7
|
-
self.filepath = filepath
|
|
8
|
-
self.current_function = 'global'
|
|
9
|
-
self.function_stack = []
|
|
10
|
-
# Track variable types in the current scope
|
|
11
|
-
self.var_types = {}
|
|
12
|
-
# Stack of variable type scopes
|
|
13
|
-
self.var_types_stack = []
|
|
14
|
-
# Custom tracking function name
|
|
15
|
-
self.custom_function = custom_function
|
|
16
|
-
|
|
17
|
-
def visit_FunctionDef(self, node):
|
|
18
|
-
# Save previous function context and variable types
|
|
19
|
-
self.function_stack.append(self.current_function)
|
|
20
|
-
self.var_types_stack.append(self.var_types)
|
|
21
|
-
|
|
22
|
-
# Create new scope for variable types
|
|
23
|
-
self.var_types = {}
|
|
24
|
-
self.current_function = node.name
|
|
25
|
-
|
|
26
|
-
# Extract parameter type annotations
|
|
27
|
-
for arg in node.args.args:
|
|
28
|
-
if arg.annotation:
|
|
29
|
-
# Store the type annotation for this parameter
|
|
30
|
-
self.var_types[arg.arg] = self.extract_type_annotation(arg.annotation)
|
|
31
|
-
|
|
32
|
-
# Visit children
|
|
33
|
-
self.generic_visit(node)
|
|
34
|
-
|
|
35
|
-
# Restore function context and variable types
|
|
36
|
-
self.current_function = self.function_stack.pop()
|
|
37
|
-
self.var_types = self.var_types_stack.pop()
|
|
38
|
-
|
|
39
|
-
def extract_type_annotation(self, annotation):
|
|
40
|
-
"""Extract type information from a type annotation node"""
|
|
41
|
-
if isinstance(annotation, ast.Name):
|
|
42
|
-
# Simple types like int, str, bool
|
|
43
|
-
type_name = annotation.id
|
|
44
|
-
if type_name == 'int' or type_name == 'float':
|
|
45
|
-
return 'number'
|
|
46
|
-
elif type_name == 'str':
|
|
47
|
-
return 'string'
|
|
48
|
-
elif type_name == 'bool':
|
|
49
|
-
return 'boolean'
|
|
50
|
-
elif type_name == 'None' or type_name == 'NoneType':
|
|
51
|
-
return 'null'
|
|
52
|
-
# Could add more type mappings here
|
|
53
|
-
elif isinstance(annotation, ast.Subscript):
|
|
54
|
-
# Handle generic types like List[int], Dict[str, int]
|
|
55
|
-
if hasattr(annotation.value, 'id'):
|
|
56
|
-
container_type = annotation.value.id
|
|
57
|
-
if container_type in ('List', 'Tuple', 'Set', 'list', 'tuple', 'set'):
|
|
58
|
-
# Try to get the type parameter
|
|
59
|
-
if isinstance(annotation.slice, ast.Name):
|
|
60
|
-
element_type = self.extract_type_annotation(annotation.slice)
|
|
61
|
-
return {
|
|
62
|
-
'type': 'array',
|
|
63
|
-
'items': {'type': element_type}
|
|
64
|
-
}
|
|
65
|
-
return 'array'
|
|
66
|
-
elif container_type in ('Dict', 'dict'):
|
|
67
|
-
return 'object'
|
|
68
|
-
# Default for unknown or complex types
|
|
69
|
-
return 'any'
|
|
70
|
-
|
|
71
|
-
def visit_AnnAssign(self, node):
|
|
72
|
-
"""Visit variable assignments with type annotations"""
|
|
73
|
-
if isinstance(node.target, ast.Name) and node.annotation:
|
|
74
|
-
# Store the type annotation for this variable
|
|
75
|
-
self.var_types[node.target.id] = self.extract_type_annotation(node.annotation)
|
|
76
|
-
self.generic_visit(node)
|
|
77
|
-
|
|
78
|
-
def visit_Assign(self, node):
|
|
79
|
-
"""Visit regular assignments to track simple type inferences"""
|
|
80
|
-
if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
|
|
81
|
-
var_name = node.targets[0].id
|
|
82
|
-
# Try to infer type from literal values
|
|
83
|
-
if isinstance(node.value, ast.Constant):
|
|
84
|
-
self.var_types[var_name] = self.get_value_type(node.value.value)
|
|
85
|
-
self.generic_visit(node)
|
|
86
|
-
|
|
87
|
-
def visit_ClassDef(self, node):
|
|
88
|
-
# Track class context for methods
|
|
89
|
-
class_name = node.name
|
|
90
|
-
self.function_stack.append(self.current_function)
|
|
91
|
-
self.var_types_stack.append(self.var_types)
|
|
92
|
-
|
|
93
|
-
# Create new scope for the class
|
|
94
|
-
self.var_types = {}
|
|
95
|
-
self.current_function = class_name
|
|
96
|
-
|
|
97
|
-
self.generic_visit(node)
|
|
98
|
-
|
|
99
|
-
# Restore context
|
|
100
|
-
self.current_function = self.function_stack.pop()
|
|
101
|
-
self.var_types = self.var_types_stack.pop()
|
|
102
|
-
|
|
103
|
-
def visit_Call(self, node):
|
|
104
|
-
# Check for analytics tracking calls
|
|
105
|
-
source = self.detect_source(node)
|
|
106
|
-
if source:
|
|
107
|
-
event_name = self.extract_event_name(node, source)
|
|
108
|
-
if event_name:
|
|
109
|
-
properties = self.extract_properties(node, source)
|
|
110
|
-
self.events.append({
|
|
111
|
-
"eventName": event_name,
|
|
112
|
-
"source": source,
|
|
113
|
-
"properties": properties,
|
|
114
|
-
"filePath": self.filepath,
|
|
115
|
-
"line": node.lineno,
|
|
116
|
-
"functionName": self.current_function
|
|
117
|
-
})
|
|
118
|
-
|
|
119
|
-
# Continue visiting child nodes
|
|
120
|
-
self.generic_visit(node)
|
|
121
|
-
|
|
122
|
-
def detect_source(self, node):
|
|
123
|
-
# Check for analytics tracking libraries
|
|
124
|
-
if isinstance(node.func, ast.Attribute):
|
|
125
|
-
if hasattr(node.func.value, 'id'):
|
|
126
|
-
obj_id = node.func.value.id
|
|
127
|
-
method_name = node.func.attr
|
|
128
|
-
|
|
129
|
-
# Segment analytics
|
|
130
|
-
if obj_id == 'analytics' and method_name == 'track':
|
|
131
|
-
return 'segment'
|
|
132
|
-
# Mixpanel
|
|
133
|
-
if obj_id == 'mixpanel' and method_name == 'track':
|
|
134
|
-
return 'mixpanel'
|
|
135
|
-
# Amplitude
|
|
136
|
-
if obj_id == 'amplitude' and method_name == 'track':
|
|
137
|
-
return 'amplitude'
|
|
138
|
-
# Rudderstack
|
|
139
|
-
if obj_id == 'rudder_analytics' and method_name == 'track':
|
|
140
|
-
return 'rudderstack'
|
|
141
|
-
# mParticle
|
|
142
|
-
if obj_id == 'mParticle' and method_name == 'logEvent':
|
|
143
|
-
return 'mparticle'
|
|
144
|
-
# PostHog
|
|
145
|
-
if obj_id == 'posthog' and method_name == 'capture':
|
|
146
|
-
return 'posthog'
|
|
147
|
-
# Pendo
|
|
148
|
-
if obj_id == 'pendo' and method_name == 'track':
|
|
149
|
-
return 'pendo'
|
|
150
|
-
# Heap
|
|
151
|
-
if obj_id == 'heap' and method_name == 'track':
|
|
152
|
-
return 'heap'
|
|
153
|
-
|
|
154
|
-
# Check for Snowplow struct event patterns
|
|
155
|
-
if isinstance(node.func, ast.Name) and node.func.id in ['trackStructEvent', 'buildStructEvent']:
|
|
156
|
-
return 'snowplow'
|
|
157
|
-
|
|
158
|
-
# Check for Snowplow's snowplow('trackStructEvent', {...}) pattern
|
|
159
|
-
if isinstance(node.func, ast.Name) and node.func.id == 'snowplow':
|
|
160
|
-
if len(node.args) >= 1 and isinstance(node.args[0], ast.Constant):
|
|
161
|
-
if node.args[0].value == 'trackStructEvent':
|
|
162
|
-
return 'snowplow'
|
|
163
|
-
|
|
164
|
-
# Check for custom tracking function
|
|
165
|
-
if self.custom_function and isinstance(node.func, ast.Name) and node.func.id == self.custom_function:
|
|
166
|
-
return 'custom'
|
|
167
|
-
|
|
168
|
-
return None
|
|
169
|
-
|
|
170
|
-
def extract_event_name(self, node, source):
|
|
171
|
-
try:
|
|
172
|
-
if source in ['segment', 'mixpanel', 'amplitude', 'rudderstack', 'pendo', 'heap', 'custom']:
|
|
173
|
-
# Standard format: library.track('event_name', {...})
|
|
174
|
-
# Custom function follows same format: customFunction('event_name', {...})
|
|
175
|
-
if len(node.args) >= 1 and isinstance(node.args[0], ast.Constant):
|
|
176
|
-
return node.args[0].value
|
|
177
|
-
|
|
178
|
-
elif source == 'mparticle':
|
|
179
|
-
# mParticle: mParticle.logEvent('event_name', {...})
|
|
180
|
-
if len(node.args) >= 1 and isinstance(node.args[0], ast.Constant):
|
|
181
|
-
return node.args[0].value
|
|
182
|
-
|
|
183
|
-
elif source == 'posthog':
|
|
184
|
-
# PostHog has multiple formats:
|
|
185
|
-
# 1. posthog.capture('distinct_id', 'event_name', {...})
|
|
186
|
-
# 2. posthog.capture('distinct_id', event='event_name', properties={...})
|
|
187
|
-
|
|
188
|
-
# Check for named parameters first (event='event_name')
|
|
189
|
-
for keyword in node.keywords:
|
|
190
|
-
if keyword.arg == 'event' and isinstance(keyword.value, ast.Constant):
|
|
191
|
-
return keyword.value.value
|
|
192
|
-
|
|
193
|
-
# If no named event parameter, check positional args (second arg is event name)
|
|
194
|
-
if len(node.args) >= 2 and isinstance(node.args[1], ast.Constant):
|
|
195
|
-
return node.args[1].value
|
|
196
|
-
|
|
197
|
-
elif source == 'snowplow':
|
|
198
|
-
# Snowplow struct events use 'action' as the event name
|
|
199
|
-
if len(node.args) >= 1:
|
|
200
|
-
# Handle different snowplow call patterns
|
|
201
|
-
props_node = None
|
|
202
|
-
|
|
203
|
-
# Direct trackStructEvent/buildStructEvent call
|
|
204
|
-
if isinstance(node.func, ast.Name) and node.func.id in ['trackStructEvent', 'buildStructEvent']:
|
|
205
|
-
if len(node.args) >= 1:
|
|
206
|
-
props_node = node.args[0]
|
|
207
|
-
|
|
208
|
-
# snowplow('trackStructEvent', {...}) pattern
|
|
209
|
-
elif isinstance(node.func, ast.Name) and node.func.id == 'snowplow':
|
|
210
|
-
if len(node.args) >= 2:
|
|
211
|
-
props_node = node.args[1]
|
|
212
|
-
|
|
213
|
-
# Extract 'action' from properties
|
|
214
|
-
if props_node and isinstance(props_node, ast.Dict):
|
|
215
|
-
for i, key_node in enumerate(props_node.keys):
|
|
216
|
-
if isinstance(key_node, ast.Constant) and key_node.value == 'action':
|
|
217
|
-
value_node = props_node.values[i]
|
|
218
|
-
if isinstance(value_node, ast.Constant):
|
|
219
|
-
return value_node.value
|
|
220
|
-
except:
|
|
221
|
-
pass
|
|
222
|
-
return None
|
|
223
|
-
|
|
224
|
-
def extract_properties(self, node, source):
|
|
225
|
-
properties = {}
|
|
226
|
-
try:
|
|
227
|
-
props_node = None
|
|
228
|
-
|
|
229
|
-
# Get the properties object based on source
|
|
230
|
-
if source in ['segment', 'mixpanel', 'amplitude', 'rudderstack', 'mparticle', 'pendo', 'heap', 'custom']:
|
|
231
|
-
# Standard format: library.track('event_name', {properties})
|
|
232
|
-
# Custom function follows same format: customFunction('event_name', {...})
|
|
233
|
-
if len(node.args) > 1:
|
|
234
|
-
props_node = node.args[1]
|
|
235
|
-
|
|
236
|
-
elif source == 'posthog':
|
|
237
|
-
# PostHog has multiple formats
|
|
238
|
-
is_anonymous = False
|
|
239
|
-
distinct_id = None
|
|
240
|
-
|
|
241
|
-
# Check for properties in named parameters first
|
|
242
|
-
for keyword in node.keywords:
|
|
243
|
-
if keyword.arg == 'properties' and isinstance(keyword.value, ast.Dict):
|
|
244
|
-
props_node = keyword.value
|
|
245
|
-
|
|
246
|
-
# Check if event is anonymous
|
|
247
|
-
for i, key_node in enumerate(props_node.keys):
|
|
248
|
-
if (isinstance(key_node, ast.Constant) and
|
|
249
|
-
key_node.value == '$process_person_profile'):
|
|
250
|
-
value_node = props_node.values[i]
|
|
251
|
-
if (isinstance(value_node, ast.Constant) and
|
|
252
|
-
value_node.value is False):
|
|
253
|
-
is_anonymous = True
|
|
254
|
-
|
|
255
|
-
# If no named properties, check positional args (third arg)
|
|
256
|
-
if props_node is None and len(node.args) > 2:
|
|
257
|
-
props_node = node.args[2]
|
|
258
|
-
|
|
259
|
-
# Add distinct_id as property if it exists and event is not anonymous
|
|
260
|
-
if not is_anonymous and len(node.args) > 0 and isinstance(node.args[0], ast.Constant):
|
|
261
|
-
distinct_id = node.args[0].value
|
|
262
|
-
if distinct_id:
|
|
263
|
-
properties["distinct_id"] = {"type": "string"}
|
|
264
|
-
|
|
265
|
-
elif source == 'snowplow':
|
|
266
|
-
# For Snowplow struct events
|
|
267
|
-
if isinstance(node.func, ast.Name) and node.func.id in ['trackStructEvent', 'buildStructEvent']:
|
|
268
|
-
if len(node.args) >= 1:
|
|
269
|
-
props_node = node.args[0]
|
|
270
|
-
|
|
271
|
-
# snowplow('trackStructEvent', {...}) pattern
|
|
272
|
-
elif isinstance(node.func, ast.Name) and node.func.id == 'snowplow':
|
|
273
|
-
if len(node.args) >= 2:
|
|
274
|
-
props_node = node.args[1]
|
|
275
|
-
|
|
276
|
-
# Extract properties from the dictionary
|
|
277
|
-
if props_node and isinstance(props_node, ast.Dict):
|
|
278
|
-
for i, key_node in enumerate(props_node.keys):
|
|
279
|
-
if isinstance(key_node, ast.Constant) and hasattr(key_node, 'value'):
|
|
280
|
-
key = key_node.value
|
|
281
|
-
value_node = props_node.values[i]
|
|
282
|
-
|
|
283
|
-
# Special handling for PostHog $set and $set_once
|
|
284
|
-
if source == 'posthog' and key in ['$set', '$set_once']:
|
|
285
|
-
if isinstance(value_node, ast.Dict):
|
|
286
|
-
nested_props = self.extract_nested_dict(value_node)
|
|
287
|
-
for nested_key, nested_value in nested_props.items():
|
|
288
|
-
properties[f"{key}.{nested_key}"] = nested_value
|
|
289
|
-
continue
|
|
290
|
-
|
|
291
|
-
# Skip PostHog internal properties
|
|
292
|
-
if source == 'posthog' and key == '$process_person_profile':
|
|
293
|
-
continue
|
|
294
|
-
|
|
295
|
-
# Handle different value types
|
|
296
|
-
if isinstance(value_node, ast.Constant):
|
|
297
|
-
value_type = self.get_value_type(value_node.value)
|
|
298
|
-
properties[key] = {"type": value_type}
|
|
299
|
-
elif isinstance(value_node, ast.Name):
|
|
300
|
-
# Check if we know the type of this variable
|
|
301
|
-
var_name = value_node.id
|
|
302
|
-
if var_name in self.var_types:
|
|
303
|
-
# Get the type for this variable
|
|
304
|
-
var_type = self.var_types[var_name]
|
|
305
|
-
|
|
306
|
-
# Handle structured types (arrays or objects)
|
|
307
|
-
if isinstance(var_type, dict):
|
|
308
|
-
properties[key] = var_type
|
|
309
|
-
else:
|
|
310
|
-
properties[key] = {"type": var_type}
|
|
311
|
-
else:
|
|
312
|
-
properties[key] = {"type": "any"}
|
|
313
|
-
elif isinstance(value_node, ast.Dict):
|
|
314
|
-
# Nested dictionary
|
|
315
|
-
nested_props = self.extract_nested_dict(value_node)
|
|
316
|
-
properties[key] = {
|
|
317
|
-
"type": "object",
|
|
318
|
-
"properties": nested_props
|
|
319
|
-
}
|
|
320
|
-
elif isinstance(value_node, ast.List) or isinstance(value_node, ast.Tuple):
|
|
321
|
-
# Array/list/tuple
|
|
322
|
-
item_type = self.infer_sequence_item_type(value_node)
|
|
323
|
-
properties[key] = {
|
|
324
|
-
"type": "array",
|
|
325
|
-
"items": item_type
|
|
326
|
-
}
|
|
327
|
-
except:
|
|
328
|
-
pass
|
|
329
|
-
return properties
|
|
330
|
-
|
|
331
|
-
def infer_sequence_item_type(self, seq_node):
|
|
332
|
-
"""Analyze a sequence (list or tuple) to determine the type of its items"""
|
|
333
|
-
if not hasattr(seq_node, 'elts') or not seq_node.elts:
|
|
334
|
-
return {"type": "any"}
|
|
335
|
-
|
|
336
|
-
# Get types of all elements
|
|
337
|
-
element_types = []
|
|
338
|
-
for element in seq_node.elts:
|
|
339
|
-
if isinstance(element, ast.Constant):
|
|
340
|
-
element_types.append(self.get_value_type(element.value))
|
|
341
|
-
elif isinstance(element, ast.Name):
|
|
342
|
-
if element.id in self.var_types:
|
|
343
|
-
element_types.append(self.var_types[element.id])
|
|
344
|
-
else:
|
|
345
|
-
element_types.append("any")
|
|
346
|
-
elif isinstance(element, ast.Dict):
|
|
347
|
-
element_types.append("object")
|
|
348
|
-
elif isinstance(element, ast.List) or isinstance(element, ast.Tuple):
|
|
349
|
-
element_types.append("array")
|
|
350
|
-
else:
|
|
351
|
-
element_types.append("any")
|
|
352
|
-
|
|
353
|
-
# Check if all elements are the same type
|
|
354
|
-
if len(set(element_types)) == 1:
|
|
355
|
-
return {"type": element_types[0]}
|
|
356
|
-
|
|
357
|
-
# Check if all types are either number or string (common mixed case)
|
|
358
|
-
if set(element_types) <= {"number", "string"}:
|
|
359
|
-
return {"type": "string"}
|
|
360
|
-
|
|
361
|
-
# Check if all types are either number or boolean
|
|
362
|
-
if set(element_types) <= {"number", "boolean"}:
|
|
363
|
-
return {"type": "number"}
|
|
364
|
-
|
|
365
|
-
# Otherwise, it's a mixed type array
|
|
366
|
-
return {"type": "any"}
|
|
367
|
-
|
|
368
|
-
def extract_nested_dict(self, dict_node):
|
|
369
|
-
nested_props = {}
|
|
370
|
-
for i, key_node in enumerate(dict_node.keys):
|
|
371
|
-
if isinstance(key_node, ast.Constant) and hasattr(key_node, 'value'):
|
|
372
|
-
key = key_node.value
|
|
373
|
-
value_node = dict_node.values[i]
|
|
374
|
-
|
|
375
|
-
if isinstance(value_node, ast.Constant):
|
|
376
|
-
value_type = self.get_value_type(value_node.value)
|
|
377
|
-
nested_props[key] = {"type": value_type}
|
|
378
|
-
elif isinstance(value_node, ast.Name):
|
|
379
|
-
# Check if we know the type of this variable
|
|
380
|
-
var_name = value_node.id
|
|
381
|
-
if var_name in self.var_types:
|
|
382
|
-
nested_props[key] = {"type": self.var_types[var_name]}
|
|
383
|
-
else:
|
|
384
|
-
nested_props[key] = {"type": "any"}
|
|
385
|
-
elif isinstance(value_node, ast.Dict):
|
|
386
|
-
sub_props = self.extract_nested_dict(value_node)
|
|
387
|
-
nested_props[key] = {
|
|
388
|
-
"type": "object",
|
|
389
|
-
"properties": sub_props
|
|
390
|
-
}
|
|
391
|
-
elif isinstance(value_node, ast.List) or isinstance(value_node, ast.Tuple):
|
|
392
|
-
# Array/list/tuple
|
|
393
|
-
item_type = self.infer_sequence_item_type(value_node)
|
|
394
|
-
nested_props[key] = {
|
|
395
|
-
"type": "array",
|
|
396
|
-
"items": item_type
|
|
397
|
-
}
|
|
398
|
-
return nested_props
|
|
399
|
-
|
|
400
|
-
def get_value_type(self, value):
|
|
401
|
-
if isinstance(value, str):
|
|
402
|
-
return "string"
|
|
403
|
-
elif isinstance(value, (int, float)):
|
|
404
|
-
return "number"
|
|
405
|
-
elif isinstance(value, bool):
|
|
406
|
-
return "boolean"
|
|
407
|
-
elif value is None:
|
|
408
|
-
return "null"
|
|
409
|
-
return "any"
|
|
410
|
-
|
|
411
|
-
def analyze_python_code(code, filepath, custom_function=None):
|
|
412
|
-
# Parse the Python code
|
|
413
|
-
tree = ast.parse(code)
|
|
414
|
-
visitor = TrackingVisitor(filepath, custom_function)
|
|
415
|
-
visitor.visit(tree)
|
|
416
|
-
|
|
417
|
-
# Return events as JSON
|
|
418
|
-
return json.dumps(visitor.events)
|
|
419
|
-
|
|
420
|
-
if __name__ == "__main__":
|
|
421
|
-
import sys
|
|
422
|
-
import argparse
|
|
423
|
-
|
|
424
|
-
parser = argparse.ArgumentParser(description='Analyze Python code for tracking calls')
|
|
425
|
-
parser.add_argument('file', help='Python file to analyze')
|
|
426
|
-
parser.add_argument('-c', '--custom-function', help='Name of custom tracking function')
|
|
427
|
-
args = parser.parse_args()
|
|
428
|
-
|
|
429
|
-
try:
|
|
430
|
-
with open(args.file, 'r') as f:
|
|
431
|
-
code = f.read()
|
|
432
|
-
result = analyze_python_code(code, args.file, args.custom_function)
|
|
433
|
-
print(result)
|
|
434
|
-
except FileNotFoundError:
|
|
435
|
-
print(f"Error: File '{args.file}' not found")
|
|
436
|
-
sys.exit(1)
|
|
437
|
-
except Exception as e:
|
|
438
|
-
print(f"Error analyzing file: {str(e)}")
|
|
439
|
-
sys.exit(1)
|