@flisk/analyze-tracking 0.7.5 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -21
- package/bin/cli.js +1 -0
- package/package.json +1 -1
- package/src/analyze/go/astTraversal.js +22 -22
- package/src/analyze/go/eventExtractor.js +10 -7
- package/src/analyze/go/index.js +39 -19
- package/src/analyze/go/propertyExtractor.js +25 -5
- package/src/analyze/go/trackingExtractor.js +5 -5
- package/src/analyze/index.js +9 -6
- package/src/analyze/javascript/detectors/analytics-source.js +55 -2
- package/src/analyze/javascript/extractors/event-extractor.js +69 -2
- package/src/analyze/javascript/index.js +14 -8
- package/src/analyze/javascript/parser.js +87 -14
- package/src/analyze/python/index.js +32 -26
- package/src/analyze/python/pythonTrackingAnalyzer.py +113 -39
- package/src/analyze/ruby/extractors.js +46 -10
- package/src/analyze/ruby/index.js +14 -7
- package/src/analyze/ruby/visitor.js +24 -7
- package/src/analyze/typescript/detectors/analytics-source.js +4 -1
- package/src/analyze/typescript/extractors/event-extractor.js +186 -8
- package/src/analyze/typescript/extractors/property-extractor.js +53 -1
- package/src/analyze/typescript/index.js +16 -10
- package/src/analyze/typescript/parser.js +37 -14
- package/src/analyze/typescript/utils/function-finder.js +11 -0
- package/src/analyze/typescript/utils/type-resolver.js +1 -1
- package/src/analyze/utils/customFunctionParser.js +55 -0
- package/src/index.js +2 -2
|
@@ -11,16 +11,22 @@ const { parseFile, findTrackingEvents, FileReadError, ParseError } = require('./
|
|
|
11
11
|
* @param {string} [customFunction] - Optional custom function name to detect
|
|
12
12
|
* @returns {Array<Object>} Array of tracking events found in the file
|
|
13
13
|
*/
|
|
14
|
-
function analyzeJsFile(filePath,
|
|
15
|
-
const events = [];
|
|
16
|
-
|
|
14
|
+
function analyzeJsFile(filePath, customFunctionSignatures = null) {
|
|
17
15
|
try {
|
|
18
|
-
// Parse the file into an AST
|
|
16
|
+
// Parse the file into an AST once
|
|
19
17
|
const ast = parseFile(filePath);
|
|
20
18
|
|
|
21
|
-
//
|
|
22
|
-
const
|
|
23
|
-
|
|
19
|
+
// Single pass extraction covering built-in + all custom configs
|
|
20
|
+
const events = findTrackingEvents(ast, filePath, customFunctionSignatures || []);
|
|
21
|
+
|
|
22
|
+
// Deduplicate events (by source | eventName | line | functionName)
|
|
23
|
+
const unique = new Map();
|
|
24
|
+
for (const evt of events) {
|
|
25
|
+
const key = `${evt.source}|${evt.eventName}|${evt.line}|${evt.functionName}`;
|
|
26
|
+
if (!unique.has(key)) unique.set(key, evt);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return Array.from(unique.values());
|
|
24
30
|
|
|
25
31
|
} catch (error) {
|
|
26
32
|
if (error instanceof FileReadError) {
|
|
@@ -32,7 +38,7 @@ function analyzeJsFile(filePath, customFunction) {
|
|
|
32
38
|
}
|
|
33
39
|
}
|
|
34
40
|
|
|
35
|
-
return
|
|
41
|
+
return [];
|
|
36
42
|
}
|
|
37
43
|
|
|
38
44
|
module.exports = { analyzeJsFile };
|
|
@@ -66,22 +66,95 @@ function parseFile(filePath) {
|
|
|
66
66
|
}
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
+
// ---------------------------------------------
|
|
70
|
+
// Helper – custom function matcher
|
|
71
|
+
// ---------------------------------------------
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Determines whether a CallExpression node matches the provided custom function name.
|
|
75
|
+
* Supports both simple identifiers (e.g. myTrack) and dot-separated members (e.g. Custom.track).
|
|
76
|
+
* The logic mirrors isCustomFunction from detectors/analytics-source.js but is kept local to avoid
|
|
77
|
+
* circular dependencies.
|
|
78
|
+
* @param {Object} node – CallExpression AST node
|
|
79
|
+
* @param {string} fnName – Custom function name (could include dots)
|
|
80
|
+
* @returns {boolean}
|
|
81
|
+
*/
|
|
82
|
+
function nodeMatchesCustomFunction(node, fnName) {
|
|
83
|
+
if (!fnName || !node.callee) return false;
|
|
84
|
+
|
|
85
|
+
const parts = fnName.split('.');
|
|
86
|
+
|
|
87
|
+
// Simple identifier case
|
|
88
|
+
if (parts.length === 1) {
|
|
89
|
+
return node.callee.type === NODE_TYPES.IDENTIFIER && node.callee.name === fnName;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Member expression chain case
|
|
93
|
+
if (node.callee.type !== NODE_TYPES.MEMBER_EXPRESSION) {
|
|
94
|
+
return false;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Walk the chain from the right-most property to the leftmost object
|
|
98
|
+
let currentNode = node.callee;
|
|
99
|
+
let idx = parts.length - 1;
|
|
100
|
+
|
|
101
|
+
while (currentNode && idx >= 0) {
|
|
102
|
+
const expected = parts[idx];
|
|
103
|
+
|
|
104
|
+
if (currentNode.type === NODE_TYPES.MEMBER_EXPRESSION) {
|
|
105
|
+
if (
|
|
106
|
+
currentNode.property.type !== NODE_TYPES.IDENTIFIER ||
|
|
107
|
+
currentNode.property.name !== expected
|
|
108
|
+
) {
|
|
109
|
+
return false;
|
|
110
|
+
}
|
|
111
|
+
currentNode = currentNode.object;
|
|
112
|
+
idx -= 1;
|
|
113
|
+
} else if (currentNode.type === NODE_TYPES.IDENTIFIER) {
|
|
114
|
+
return idx === 0 && currentNode.name === expected;
|
|
115
|
+
} else {
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return false;
|
|
121
|
+
}
|
|
122
|
+
|
|
69
123
|
/**
|
|
70
|
-
*
|
|
71
|
-
*
|
|
72
|
-
*
|
|
73
|
-
* @param {
|
|
74
|
-
* @
|
|
124
|
+
* Walk the AST once and find tracking events for built-in providers plus any number of custom
|
|
125
|
+
* function configurations. This avoids the previous O(n * customConfigs) behaviour.
|
|
126
|
+
*
|
|
127
|
+
* @param {Object} ast – Parsed AST of the source file
|
|
128
|
+
* @param {string} filePath – Absolute/relative path to the source file
|
|
129
|
+
* @param {Object[]} [customConfigs=[]] – Array of parsed custom function configurations
|
|
130
|
+
* @returns {Array<Object>} – List of extracted tracking events
|
|
75
131
|
*/
|
|
76
|
-
function findTrackingEvents(ast, filePath,
|
|
132
|
+
function findTrackingEvents(ast, filePath, customConfigs = []) {
|
|
77
133
|
const events = [];
|
|
78
134
|
|
|
79
135
|
walk.ancestor(ast, {
|
|
80
136
|
[NODE_TYPES.CALL_EXPRESSION]: (node, ancestors) => {
|
|
81
137
|
try {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
138
|
+
let matchedCustomConfig = null;
|
|
139
|
+
|
|
140
|
+
// Attempt to match any custom function first to avoid mis-classifying built-in providers
|
|
141
|
+
if (Array.isArray(customConfigs) && customConfigs.length > 0) {
|
|
142
|
+
for (const cfg of customConfigs) {
|
|
143
|
+
if (cfg && nodeMatchesCustomFunction(node, cfg.functionName)) {
|
|
144
|
+
matchedCustomConfig = cfg;
|
|
145
|
+
break;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (matchedCustomConfig) {
|
|
151
|
+
// Force source to 'custom' and use matched config
|
|
152
|
+
const event = extractTrackingEvent(node, ancestors, filePath, matchedCustomConfig);
|
|
153
|
+
if (event) events.push(event);
|
|
154
|
+
} else {
|
|
155
|
+
// Let built-in detector figure out source (pass undefined customFunction)
|
|
156
|
+
const event = extractTrackingEvent(node, ancestors, filePath, null);
|
|
157
|
+
if (event) events.push(event);
|
|
85
158
|
}
|
|
86
159
|
} catch (error) {
|
|
87
160
|
console.error(`Error processing node in ${filePath}:`, error.message);
|
|
@@ -97,25 +170,25 @@ function findTrackingEvents(ast, filePath, customFunction) {
|
|
|
97
170
|
* @param {Object} node - CallExpression node
|
|
98
171
|
* @param {Array<Object>} ancestors - Ancestor nodes
|
|
99
172
|
* @param {string} filePath - File path
|
|
100
|
-
* @param {
|
|
173
|
+
* @param {Object} [customConfig] - Custom function configuration object
|
|
101
174
|
* @returns {Object|null} Extracted event or null
|
|
102
175
|
*/
|
|
103
|
-
function extractTrackingEvent(node, ancestors, filePath,
|
|
176
|
+
function extractTrackingEvent(node, ancestors, filePath, customConfig) {
|
|
104
177
|
// Detect the analytics source
|
|
105
|
-
const source = detectAnalyticsSource(node,
|
|
178
|
+
const source = detectAnalyticsSource(node, customConfig?.functionName);
|
|
106
179
|
if (source === 'unknown') {
|
|
107
180
|
return null;
|
|
108
181
|
}
|
|
109
182
|
|
|
110
183
|
// Extract event data based on the source
|
|
111
|
-
const eventData = extractEventData(node, source);
|
|
184
|
+
const eventData = extractEventData(node, source, customConfig);
|
|
112
185
|
|
|
113
186
|
// Get location and context information
|
|
114
187
|
const line = node.loc.start.line;
|
|
115
188
|
const functionName = findWrappingFunction(node, ancestors);
|
|
116
189
|
|
|
117
190
|
// Process the event data into final format
|
|
118
|
-
return processEventData(eventData, source, filePath, line, functionName);
|
|
191
|
+
return processEventData(eventData, source, filePath, line, functionName, customConfig);
|
|
119
192
|
}
|
|
120
193
|
|
|
121
194
|
module.exports = {
|
|
@@ -40,7 +40,7 @@ async function initPyodide() {
|
|
|
40
40
|
* libraries, extracting event names, properties, and metadata.
|
|
41
41
|
*
|
|
42
42
|
* @param {string} filePath - Path to the Python file to analyze
|
|
43
|
-
* @param {string} [
|
|
43
|
+
* @param {string} [customFunctionSignature=null] - Signature of a custom tracking function to detect
|
|
44
44
|
* @returns {Promise<Array<Object>>} Array of tracking events found in the file
|
|
45
45
|
* @returns {Promise<Array>} Empty array if an error occurs
|
|
46
46
|
*
|
|
@@ -52,48 +52,54 @@ async function initPyodide() {
|
|
|
52
52
|
* // With custom tracking function
|
|
53
53
|
* const events = await analyzePythonFile('./app.py', 'track_event');
|
|
54
54
|
*/
|
|
55
|
-
async function analyzePythonFile(filePath,
|
|
55
|
+
async function analyzePythonFile(filePath, customFunctionSignatures = null) {
|
|
56
56
|
// Validate inputs
|
|
57
57
|
if (!filePath || typeof filePath !== 'string') {
|
|
58
58
|
console.error('Invalid file path provided');
|
|
59
59
|
return [];
|
|
60
60
|
}
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
}
|
|
62
|
+
// Check if file exists before reading
|
|
63
|
+
if (!fs.existsSync(filePath)) {
|
|
64
|
+
console.error(`File not found: ${filePath}`);
|
|
65
|
+
return [];
|
|
66
|
+
}
|
|
68
67
|
|
|
69
|
-
|
|
68
|
+
try {
|
|
69
|
+
// Read the Python file only once
|
|
70
70
|
const code = fs.readFileSync(filePath, 'utf8');
|
|
71
|
-
|
|
71
|
+
|
|
72
72
|
// Initialize Pyodide if not already done
|
|
73
73
|
const py = await initPyodide();
|
|
74
|
-
|
|
75
|
-
// Load the Python analyzer code
|
|
74
|
+
|
|
75
|
+
// Load the Python analyzer code (idempotent – redefining functions is fine)
|
|
76
76
|
const analyzerPath = path.join(__dirname, 'pythonTrackingAnalyzer.py');
|
|
77
77
|
if (!fs.existsSync(analyzerPath)) {
|
|
78
78
|
throw new Error(`Python analyzer not found at: ${analyzerPath}`);
|
|
79
79
|
}
|
|
80
|
-
|
|
81
80
|
const analyzerCode = fs.readFileSync(analyzerPath, 'utf8');
|
|
82
|
-
|
|
83
|
-
// Set up Python environment with necessary variables
|
|
84
|
-
py.globals.set('code', code);
|
|
85
|
-
py.globals.set('filepath', filePath);
|
|
86
|
-
py.globals.set('custom_function', customFunction);
|
|
87
|
-
// Set __name__ to null to prevent execution of main block
|
|
81
|
+
// Prevent the analyzer from executing any __main__ blocks that expect CLI usage
|
|
88
82
|
py.globals.set('__name__', null);
|
|
89
|
-
|
|
90
|
-
// Load and run the analyzer
|
|
91
83
|
py.runPython(analyzerCode);
|
|
92
|
-
|
|
93
|
-
//
|
|
94
|
-
const
|
|
95
|
-
|
|
96
|
-
|
|
84
|
+
|
|
85
|
+
// Helper to run analysis with a given custom config (can be null)
|
|
86
|
+
const runAnalysis = (customConfig) => {
|
|
87
|
+
py.globals.set('code', code);
|
|
88
|
+
py.globals.set('filepath', filePath);
|
|
89
|
+
py.globals.set('custom_config_json', customConfig ? JSON.stringify(customConfig) : null);
|
|
90
|
+
py.runPython('import json');
|
|
91
|
+
py.runPython('custom_config = None if custom_config_json == None else json.loads(custom_config_json)');
|
|
92
|
+
const result = py.runPython('analyze_python_code(code, filepath, custom_config)');
|
|
93
|
+
return JSON.parse(result);
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
// Prepare config argument (array or null)
|
|
97
|
+
const configArg = Array.isArray(customFunctionSignatures) && customFunctionSignatures.length > 0
|
|
98
|
+
? customFunctionSignatures
|
|
99
|
+
: null;
|
|
100
|
+
|
|
101
|
+
const events = runAnalysis(configArg);
|
|
102
|
+
|
|
97
103
|
return events;
|
|
98
104
|
} catch (error) {
|
|
99
105
|
// Log detailed error information for debugging
|
|
@@ -53,6 +53,14 @@ ARRAY_TYPES = {'List', 'Tuple', 'Set', 'list', 'tuple', 'set'}
|
|
|
53
53
|
# Container types that map to objects
|
|
54
54
|
OBJECT_TYPES = {'Dict', 'dict'}
|
|
55
55
|
|
|
56
|
+
def _safe_id(node: ast.AST) -> Optional[str]:
|
|
57
|
+
"""Return the .id attribute of a node if present."""
|
|
58
|
+
return getattr(node, 'id', None)
|
|
59
|
+
|
|
60
|
+
# -------------------------------------------
|
|
61
|
+
# Tracking Visitor
|
|
62
|
+
# -------------------------------------------
|
|
63
|
+
|
|
56
64
|
class TrackingVisitor(ast.NodeVisitor):
|
|
57
65
|
"""
|
|
58
66
|
AST visitor that identifies and extracts analytics tracking calls from Python code.
|
|
@@ -68,16 +76,16 @@ class TrackingVisitor(ast.NodeVisitor):
|
|
|
68
76
|
function_stack: Stack of function contexts for nested functions
|
|
69
77
|
var_types: Dictionary of variable types in the current scope
|
|
70
78
|
var_types_stack: Stack of variable type scopes
|
|
71
|
-
|
|
79
|
+
custom_config: Optional custom configuration for custom tracking functions
|
|
72
80
|
"""
|
|
73
81
|
|
|
74
|
-
def __init__(self, filepath: str,
|
|
82
|
+
def __init__(self, filepath: str, custom_config: Optional[Dict[str, Any]] = None):
|
|
75
83
|
"""
|
|
76
84
|
Initialize the tracking visitor.
|
|
77
85
|
|
|
78
86
|
Args:
|
|
79
87
|
filepath: Path to the Python file being analyzed
|
|
80
|
-
|
|
88
|
+
custom_config: Optional custom configuration for custom tracking functions
|
|
81
89
|
"""
|
|
82
90
|
self.events: List[AnalyticsEvent] = []
|
|
83
91
|
self.filepath = filepath
|
|
@@ -85,7 +93,18 @@ class TrackingVisitor(ast.NodeVisitor):
|
|
|
85
93
|
self.function_stack: List[str] = []
|
|
86
94
|
self.var_types: Dict[str, PropertyType] = {}
|
|
87
95
|
self.var_types_stack: List[Dict[str, PropertyType]] = []
|
|
88
|
-
self.
|
|
96
|
+
self.custom_config = custom_config or None
|
|
97
|
+
# Store convenience attributes if config provided
|
|
98
|
+
if self.custom_config:
|
|
99
|
+
self._custom_fn_name: str = self.custom_config.get('functionName', '')
|
|
100
|
+
self._event_idx: int = self.custom_config.get('eventIndex', 0)
|
|
101
|
+
self._props_idx: int = self.custom_config.get('propertiesIndex', 1)
|
|
102
|
+
self._extra_params = self.custom_config.get('extraParams', [])
|
|
103
|
+
else:
|
|
104
|
+
self._custom_fn_name = None
|
|
105
|
+
self._event_idx = 0
|
|
106
|
+
self._props_idx = 1
|
|
107
|
+
self._extra_params = []
|
|
89
108
|
|
|
90
109
|
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
91
110
|
"""
|
|
@@ -164,8 +183,11 @@ class TrackingVisitor(ast.NodeVisitor):
|
|
|
164
183
|
|
|
165
184
|
elif isinstance(annotation, ast.Subscript):
|
|
166
185
|
# Handle generic types like List[int], Dict[str, int]
|
|
167
|
-
|
|
168
|
-
|
|
186
|
+
container_type = getattr(annotation.value, 'id', None)
|
|
187
|
+
if container_type is None and isinstance(annotation.value, ast.Attribute):
|
|
188
|
+
container_type = getattr(annotation.value, 'attr', None)
|
|
189
|
+
|
|
190
|
+
if container_type:
|
|
169
191
|
|
|
170
192
|
if container_type in ARRAY_TYPES:
|
|
171
193
|
# Try to get the type parameter for arrays
|
|
@@ -272,11 +294,12 @@ class TrackingVisitor(ast.NodeVisitor):
|
|
|
272
294
|
|
|
273
295
|
def _detect_method_call_source(self, node: ast.Call) -> Optional[str]:
|
|
274
296
|
"""Helper method to detect analytics source from method calls."""
|
|
275
|
-
|
|
297
|
+
obj_val = getattr(node.func, 'value', None)
|
|
298
|
+
if obj_val is None:
|
|
276
299
|
return None
|
|
277
300
|
|
|
278
|
-
obj_id =
|
|
279
|
-
method_name = node.func
|
|
301
|
+
obj_id = _safe_id(obj_val) or ''
|
|
302
|
+
method_name = getattr(node.func, 'attr', '')
|
|
280
303
|
|
|
281
304
|
# Check standard analytics libraries
|
|
282
305
|
for source, config in ANALYTICS_SOURCES.items():
|
|
@@ -292,11 +315,17 @@ class TrackingVisitor(ast.NodeVisitor):
|
|
|
292
315
|
if method_name == 'track' and self._is_snowplow_tracker_call(node):
|
|
293
316
|
return 'snowplow'
|
|
294
317
|
|
|
318
|
+
# Handle dot-separated custom function names like CustomModule.track
|
|
319
|
+
if self._custom_fn_name and '.' in self._custom_fn_name:
|
|
320
|
+
full_name = f"{obj_id}.{method_name}"
|
|
321
|
+
if full_name == self._custom_fn_name:
|
|
322
|
+
return 'custom'
|
|
323
|
+
|
|
295
324
|
return None
|
|
296
325
|
|
|
297
326
|
def _detect_function_call_source(self, node: ast.Call) -> Optional[str]:
|
|
298
327
|
"""Helper method to detect analytics source from direct function calls."""
|
|
299
|
-
func_name = node.func
|
|
328
|
+
func_name = _safe_id(node.func) or ''
|
|
300
329
|
|
|
301
330
|
# Check for Snowplow direct functions
|
|
302
331
|
if func_name in ['trackStructEvent', 'buildStructEvent']:
|
|
@@ -307,7 +336,7 @@ class TrackingVisitor(ast.NodeVisitor):
|
|
|
307
336
|
return 'snowplow'
|
|
308
337
|
|
|
309
338
|
# Check for custom tracking function
|
|
310
|
-
if self.
|
|
339
|
+
if self._custom_fn_name and func_name == self._custom_fn_name:
|
|
311
340
|
return 'custom'
|
|
312
341
|
|
|
313
342
|
return None
|
|
@@ -319,7 +348,7 @@ class TrackingVisitor(ast.NodeVisitor):
|
|
|
319
348
|
|
|
320
349
|
first_arg = node.args[0]
|
|
321
350
|
if isinstance(first_arg, ast.Call) and isinstance(first_arg.func, ast.Name):
|
|
322
|
-
return first_arg.func
|
|
351
|
+
return _safe_id(first_arg.func) == 'BaseEvent'
|
|
323
352
|
return False
|
|
324
353
|
|
|
325
354
|
def _is_snowplow_tracker_call(self, node: ast.Call) -> bool:
|
|
@@ -330,7 +359,7 @@ class TrackingVisitor(ast.NodeVisitor):
|
|
|
330
359
|
first_arg = node.args[0]
|
|
331
360
|
# Check if first argument is StructuredEvent
|
|
332
361
|
if isinstance(first_arg, ast.Call) and isinstance(first_arg.func, ast.Name):
|
|
333
|
-
return first_arg.func
|
|
362
|
+
return _safe_id(first_arg.func) == 'StructuredEvent'
|
|
334
363
|
|
|
335
364
|
# Also check if it might be a variable (simple heuristic)
|
|
336
365
|
if isinstance(first_arg, ast.Name) and hasattr(node.func, 'value'):
|
|
@@ -418,7 +447,7 @@ class TrackingVisitor(ast.NodeVisitor):
|
|
|
418
447
|
if len(node.args) >= 1:
|
|
419
448
|
first_arg = node.args[0]
|
|
420
449
|
if isinstance(first_arg, ast.Call) and isinstance(first_arg.func, ast.Name):
|
|
421
|
-
if first_arg.func
|
|
450
|
+
if _safe_id(first_arg.func) == 'StructuredEvent':
|
|
422
451
|
# Look for action in keyword arguments
|
|
423
452
|
for keyword in first_arg.keywords:
|
|
424
453
|
if keyword.arg == 'action' and isinstance(keyword.value, ast.Constant):
|
|
@@ -430,9 +459,15 @@ class TrackingVisitor(ast.NodeVisitor):
|
|
|
430
459
|
|
|
431
460
|
def _extract_custom_event_name(self, node: ast.Call) -> Optional[str]:
|
|
432
461
|
"""Extract event name for custom tracking function."""
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
462
|
+
args = node.args
|
|
463
|
+
|
|
464
|
+
# Use configured index if available
|
|
465
|
+
if len(args) > self._event_idx and isinstance(args[self._event_idx], ast.Constant):
|
|
466
|
+
return args[self._event_idx].value
|
|
467
|
+
|
|
468
|
+
# Fallback heuristics
|
|
469
|
+
if len(args) >= 1 and isinstance(args[0], ast.Constant):
|
|
470
|
+
return args[0].value
|
|
436
471
|
return None
|
|
437
472
|
|
|
438
473
|
def extract_properties(self, node: ast.Call, source: str) -> EventProperties:
|
|
@@ -502,6 +537,19 @@ class TrackingVisitor(ast.NodeVisitor):
|
|
|
502
537
|
# Check if event is not anonymous and extract distinct_id
|
|
503
538
|
user_id_props.update(self._extract_posthog_user_id(node))
|
|
504
539
|
|
|
540
|
+
elif source == 'custom':
|
|
541
|
+
# Populate extra params defined in custom config as properties
|
|
542
|
+
if self._extra_params:
|
|
543
|
+
for extra in self._extra_params:
|
|
544
|
+
idx = extra.get('idx')
|
|
545
|
+
name = extra.get('name')
|
|
546
|
+
if idx is None or name is None:
|
|
547
|
+
continue
|
|
548
|
+
if idx < len(node.args):
|
|
549
|
+
prop_type = self._extract_property_type(node.args[idx])
|
|
550
|
+
if prop_type:
|
|
551
|
+
user_id_props[name] = prop_type
|
|
552
|
+
|
|
505
553
|
return user_id_props
|
|
506
554
|
|
|
507
555
|
def _is_non_null_value(self, node: ast.AST) -> bool:
|
|
@@ -559,10 +607,13 @@ class TrackingVisitor(ast.NodeVisitor):
|
|
|
559
607
|
return keyword.value
|
|
560
608
|
|
|
561
609
|
elif source == 'custom':
|
|
562
|
-
#
|
|
563
|
-
if len(node.args) >
|
|
610
|
+
# Use configured indices where possible
|
|
611
|
+
if len(node.args) > self._props_idx and isinstance(node.args[self._props_idx], ast.Dict):
|
|
612
|
+
return node.args[self._props_idx]
|
|
613
|
+
# Fallbacks (legacy)
|
|
614
|
+
if len(node.args) >= 2 and isinstance(node.args[1], ast.Dict):
|
|
564
615
|
return node.args[1]
|
|
565
|
-
|
|
616
|
+
|
|
566
617
|
elif source == 'posthog':
|
|
567
618
|
# Check named parameters first, then positional
|
|
568
619
|
for keyword in node.keywords:
|
|
@@ -576,7 +627,7 @@ class TrackingVisitor(ast.NodeVisitor):
|
|
|
576
627
|
if len(node.args) >= 1:
|
|
577
628
|
first_arg = node.args[0]
|
|
578
629
|
if isinstance(first_arg, ast.Call) and isinstance(first_arg.func, ast.Name):
|
|
579
|
-
if first_arg.func
|
|
630
|
+
if _safe_id(first_arg.func) == 'StructuredEvent':
|
|
580
631
|
# Return None as properties are handled differently for Snowplow
|
|
581
632
|
return None
|
|
582
633
|
|
|
@@ -618,7 +669,7 @@ class TrackingVisitor(ast.NodeVisitor):
|
|
|
618
669
|
if len(node.args) >= 1:
|
|
619
670
|
first_arg = node.args[0]
|
|
620
671
|
if isinstance(first_arg, ast.Call) and isinstance(first_arg.func, ast.Name):
|
|
621
|
-
if first_arg.func
|
|
672
|
+
if _safe_id(first_arg.func) == 'StructuredEvent':
|
|
622
673
|
# Extract all keyword arguments except 'action'
|
|
623
674
|
for keyword in first_arg.keywords:
|
|
624
675
|
if keyword.arg and keyword.arg != 'action':
|
|
@@ -755,33 +806,56 @@ class TrackingVisitor(ast.NodeVisitor):
|
|
|
755
806
|
return "null"
|
|
756
807
|
return "any"
|
|
757
808
|
|
|
758
|
-
def analyze_python_code(code: str, filepath: str,
|
|
809
|
+
def analyze_python_code(code: str, filepath: str, custom_config: Optional[any] = None) -> str:
|
|
759
810
|
"""
|
|
760
811
|
Analyze Python code for analytics tracking calls.
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
812
|
+
|
|
813
|
+
The function supports either a single custom configuration object or a list
|
|
814
|
+
of such objects, allowing detection of multiple custom tracking functions
|
|
815
|
+
without parsing the source code multiple times.
|
|
816
|
+
|
|
765
817
|
Args:
|
|
766
818
|
code: The Python source code to analyze
|
|
767
819
|
filepath: Path to the file being analyzed
|
|
768
|
-
|
|
769
|
-
|
|
820
|
+
custom_config: None, a single custom config dict, or a list of configs
|
|
821
|
+
|
|
770
822
|
Returns:
|
|
771
823
|
JSON string containing array of tracking events
|
|
772
824
|
"""
|
|
773
825
|
try:
|
|
774
|
-
# Parse the Python code
|
|
826
|
+
# Parse the Python code only once
|
|
775
827
|
tree = ast.parse(code)
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
#
|
|
828
|
+
|
|
829
|
+
events: List[AnalyticsEvent] = []
|
|
830
|
+
|
|
831
|
+
def run_visitor(cfg: Optional[dict]) -> None:
|
|
832
|
+
vis = TrackingVisitor(filepath, cfg)
|
|
833
|
+
vis.visit(tree)
|
|
834
|
+
events.extend(vis.events)
|
|
835
|
+
|
|
836
|
+
# Built-in providers pass (no custom config)
|
|
837
|
+
run_visitor(None)
|
|
838
|
+
|
|
839
|
+
# Handle list or single custom configuration
|
|
840
|
+
if custom_config:
|
|
841
|
+
if isinstance(custom_config, list):
|
|
842
|
+
for cfg in custom_config:
|
|
843
|
+
if cfg:
|
|
844
|
+
run_visitor(cfg)
|
|
845
|
+
else:
|
|
846
|
+
run_visitor(custom_config) # single config for backward compat
|
|
847
|
+
|
|
848
|
+
# Deduplicate events (source|eventName|line|functionName)
|
|
849
|
+
unique: Dict[str, AnalyticsEvent] = {}
|
|
850
|
+
for evt in events:
|
|
851
|
+
key = f"{evt['source']}|{evt['eventName']}|{evt['line']}|{evt['functionName']}"
|
|
852
|
+
if key not in unique:
|
|
853
|
+
unique[key] = evt
|
|
854
|
+
|
|
855
|
+
return json.dumps(list(unique.values()))
|
|
856
|
+
|
|
857
|
+
except Exception:
|
|
858
|
+
# Return empty array on failure
|
|
785
859
|
return json.dumps([])
|
|
786
860
|
|
|
787
861
|
# Command-line interface
|
|
@@ -9,9 +9,10 @@ const { getValueType } = require('./types');
|
|
|
9
9
|
* Extracts the event name from a tracking call based on the source
|
|
10
10
|
* @param {Object} node - The AST CallNode
|
|
11
11
|
* @param {string} source - The detected analytics source
|
|
12
|
+
* @param {Object} customConfig - Custom configuration for custom functions
|
|
12
13
|
* @returns {string|null} - The extracted event name or null
|
|
13
14
|
*/
|
|
14
|
-
function extractEventName(node, source) {
|
|
15
|
+
function extractEventName(node, source, customConfig = null) {
|
|
15
16
|
if (source === 'segment' || source === 'rudderstack') {
|
|
16
17
|
// Both Segment and Rudderstack use the same format
|
|
17
18
|
const params = node.arguments_?.arguments_?.[0]?.elements;
|
|
@@ -50,11 +51,21 @@ function extractEventName(node, source) {
|
|
|
50
51
|
}
|
|
51
52
|
|
|
52
53
|
if (source === 'custom') {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
if (
|
|
56
|
-
|
|
54
|
+
const args = node.arguments_?.arguments_ || [];
|
|
55
|
+
|
|
56
|
+
if (!customConfig) {
|
|
57
|
+
// Fallback: first argument string literal event name
|
|
58
|
+
if (args[0]?.unescaped?.value) {
|
|
59
|
+
return args[0].unescaped.value;
|
|
60
|
+
}
|
|
61
|
+
return null;
|
|
57
62
|
}
|
|
63
|
+
|
|
64
|
+
const eventArg = args[customConfig.eventIndex];
|
|
65
|
+
if (eventArg?.unescaped?.value) {
|
|
66
|
+
return eventArg.unescaped.value;
|
|
67
|
+
}
|
|
68
|
+
return null;
|
|
58
69
|
}
|
|
59
70
|
|
|
60
71
|
return null;
|
|
@@ -64,9 +75,10 @@ function extractEventName(node, source) {
|
|
|
64
75
|
* Extracts properties from a tracking call based on the source
|
|
65
76
|
* @param {Object} node - The AST CallNode
|
|
66
77
|
* @param {string} source - The detected analytics source
|
|
78
|
+
* @param {Object} customConfig - Custom configuration for custom functions
|
|
67
79
|
* @returns {Object|null} - The extracted properties or null
|
|
68
80
|
*/
|
|
69
|
-
async function extractProperties(node, source) {
|
|
81
|
+
async function extractProperties(node, source, customConfig = null) {
|
|
70
82
|
const { HashNode, ArrayNode } = await import('@ruby/prism');
|
|
71
83
|
|
|
72
84
|
if (source === 'segment' || source === 'rudderstack') {
|
|
@@ -183,11 +195,35 @@ async function extractProperties(node, source) {
|
|
|
183
195
|
}
|
|
184
196
|
|
|
185
197
|
if (source === 'custom') {
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
if (
|
|
189
|
-
|
|
198
|
+
const args = node.arguments_?.arguments_ || [];
|
|
199
|
+
|
|
200
|
+
if (!customConfig) {
|
|
201
|
+
// Legacy fallback behavior
|
|
202
|
+
if (args.length > 1 && args[1] instanceof HashNode) {
|
|
203
|
+
return await extractHashProperties(args[1]);
|
|
204
|
+
}
|
|
205
|
+
return null;
|
|
190
206
|
}
|
|
207
|
+
|
|
208
|
+
const properties = {};
|
|
209
|
+
|
|
210
|
+
// Handle extra params first
|
|
211
|
+
for (const extra of customConfig.extraParams) {
|
|
212
|
+
const argNode = args[extra.idx];
|
|
213
|
+
if (!argNode) continue;
|
|
214
|
+
properties[extra.name] = {
|
|
215
|
+
type: await getValueType(argNode)
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Handle properties object
|
|
220
|
+
const propsArg = args[customConfig.propertiesIndex];
|
|
221
|
+
if (propsArg instanceof HashNode) {
|
|
222
|
+
const hashProps = await extractHashProperties(propsArg);
|
|
223
|
+
Object.assign(properties, hashProps);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
return Object.keys(properties).length > 0 ? properties : null;
|
|
191
227
|
}
|
|
192
228
|
|
|
193
229
|
return null;
|