@flisk/analyze-tracking 0.7.6 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,6 +53,14 @@ ARRAY_TYPES = {'List', 'Tuple', 'Set', 'list', 'tuple', 'set'}
53
53
  # Container types that map to objects
54
54
  OBJECT_TYPES = {'Dict', 'dict'}
55
55
 
56
+ def _safe_id(node: ast.AST) -> Optional[str]:
57
+ """Return the .id attribute of a node if present."""
58
+ return getattr(node, 'id', None)
59
+
60
+ # -------------------------------------------
61
+ # Tracking Visitor
62
+ # -------------------------------------------
63
+
56
64
  class TrackingVisitor(ast.NodeVisitor):
57
65
  """
58
66
  AST visitor that identifies and extracts analytics tracking calls from Python code.
@@ -68,16 +76,16 @@ class TrackingVisitor(ast.NodeVisitor):
68
76
  function_stack: Stack of function contexts for nested functions
69
77
  var_types: Dictionary of variable types in the current scope
70
78
  var_types_stack: Stack of variable type scopes
71
- custom_function: Optional name of a custom tracking function
79
+ custom_config: Optional custom configuration for custom tracking functions
72
80
  """
73
81
 
74
- def __init__(self, filepath: str, custom_function: Optional[str] = None):
82
+ def __init__(self, filepath: str, custom_config: Optional[Dict[str, Any]] = None):
75
83
  """
76
84
  Initialize the tracking visitor.
77
85
 
78
86
  Args:
79
87
  filepath: Path to the Python file being analyzed
80
- custom_function: Optional name of a custom tracking function to detect
88
+ custom_config: Optional custom configuration for custom tracking functions
81
89
  """
82
90
  self.events: List[AnalyticsEvent] = []
83
91
  self.filepath = filepath
@@ -85,7 +93,18 @@ class TrackingVisitor(ast.NodeVisitor):
85
93
  self.function_stack: List[str] = []
86
94
  self.var_types: Dict[str, PropertyType] = {}
87
95
  self.var_types_stack: List[Dict[str, PropertyType]] = []
88
- self.custom_function = custom_function
96
+ self.custom_config = custom_config or None
97
+ # Store convenience attributes if config provided
98
+ if self.custom_config:
99
+ self._custom_fn_name: str = self.custom_config.get('functionName', '')
100
+ self._event_idx: int = self.custom_config.get('eventIndex', 0)
101
+ self._props_idx: int = self.custom_config.get('propertiesIndex', 1)
102
+ self._extra_params = self.custom_config.get('extraParams', [])
103
+ else:
104
+ self._custom_fn_name = None
105
+ self._event_idx = 0
106
+ self._props_idx = 1
107
+ self._extra_params = []
89
108
 
90
109
  def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
91
110
  """
@@ -164,8 +183,11 @@ class TrackingVisitor(ast.NodeVisitor):
164
183
 
165
184
  elif isinstance(annotation, ast.Subscript):
166
185
  # Handle generic types like List[int], Dict[str, int]
167
- if hasattr(annotation.value, 'id'):
168
- container_type = annotation.value.id
186
+ container_type = getattr(annotation.value, 'id', None)
187
+ if container_type is None and isinstance(annotation.value, ast.Attribute):
188
+ container_type = getattr(annotation.value, 'attr', None)
189
+
190
+ if container_type:
169
191
 
170
192
  if container_type in ARRAY_TYPES:
171
193
  # Try to get the type parameter for arrays
@@ -272,11 +294,12 @@ class TrackingVisitor(ast.NodeVisitor):
272
294
 
273
295
  def _detect_method_call_source(self, node: ast.Call) -> Optional[str]:
274
296
  """Helper method to detect analytics source from method calls."""
275
- if not hasattr(node.func.value, 'id'):
297
+ obj_val = getattr(node.func, 'value', None)
298
+ if obj_val is None:
276
299
  return None
277
300
 
278
- obj_id = node.func.value.id
279
- method_name = node.func.attr
301
+ obj_id = _safe_id(obj_val) or ''
302
+ method_name = getattr(node.func, 'attr', '')
280
303
 
281
304
  # Check standard analytics libraries
282
305
  for source, config in ANALYTICS_SOURCES.items():
@@ -292,11 +315,17 @@ class TrackingVisitor(ast.NodeVisitor):
292
315
  if method_name == 'track' and self._is_snowplow_tracker_call(node):
293
316
  return 'snowplow'
294
317
 
318
+ # Handle dot-separated custom function names like CustomModule.track
319
+ if self._custom_fn_name and '.' in self._custom_fn_name:
320
+ full_name = f"{obj_id}.{method_name}"
321
+ if full_name == self._custom_fn_name:
322
+ return 'custom'
323
+
295
324
  return None
296
325
 
297
326
  def _detect_function_call_source(self, node: ast.Call) -> Optional[str]:
298
327
  """Helper method to detect analytics source from direct function calls."""
299
- func_name = node.func.id
328
+ func_name = _safe_id(node.func) or ''
300
329
 
301
330
  # Check for Snowplow direct functions
302
331
  if func_name in ['trackStructEvent', 'buildStructEvent']:
@@ -307,7 +336,7 @@ class TrackingVisitor(ast.NodeVisitor):
307
336
  return 'snowplow'
308
337
 
309
338
  # Check for custom tracking function
310
- if self.custom_function and func_name == self.custom_function:
339
+ if self._custom_fn_name and func_name == self._custom_fn_name:
311
340
  return 'custom'
312
341
 
313
342
  return None
@@ -319,7 +348,7 @@ class TrackingVisitor(ast.NodeVisitor):
319
348
 
320
349
  first_arg = node.args[0]
321
350
  if isinstance(first_arg, ast.Call) and isinstance(first_arg.func, ast.Name):
322
- return first_arg.func.id == 'BaseEvent'
351
+ return _safe_id(first_arg.func) == 'BaseEvent'
323
352
  return False
324
353
 
325
354
  def _is_snowplow_tracker_call(self, node: ast.Call) -> bool:
@@ -330,7 +359,7 @@ class TrackingVisitor(ast.NodeVisitor):
330
359
  first_arg = node.args[0]
331
360
  # Check if first argument is StructuredEvent
332
361
  if isinstance(first_arg, ast.Call) and isinstance(first_arg.func, ast.Name):
333
- return first_arg.func.id == 'StructuredEvent'
362
+ return _safe_id(first_arg.func) == 'StructuredEvent'
334
363
 
335
364
  # Also check if it might be a variable (simple heuristic)
336
365
  if isinstance(first_arg, ast.Name) and hasattr(node.func, 'value'):
@@ -418,7 +447,7 @@ class TrackingVisitor(ast.NodeVisitor):
418
447
  if len(node.args) >= 1:
419
448
  first_arg = node.args[0]
420
449
  if isinstance(first_arg, ast.Call) and isinstance(first_arg.func, ast.Name):
421
- if first_arg.func.id == 'StructuredEvent':
450
+ if _safe_id(first_arg.func) == 'StructuredEvent':
422
451
  # Look for action in keyword arguments
423
452
  for keyword in first_arg.keywords:
424
453
  if keyword.arg == 'action' and isinstance(keyword.value, ast.Constant):
@@ -430,9 +459,15 @@ class TrackingVisitor(ast.NodeVisitor):
430
459
 
431
460
  def _extract_custom_event_name(self, node: ast.Call) -> Optional[str]:
432
461
  """Extract event name for custom tracking function."""
433
- # Standard format: customFunction('event_name', {...})
434
- if len(node.args) >= 1 and isinstance(node.args[0], ast.Constant):
435
- return node.args[0].value
462
+ args = node.args
463
+
464
+ # Use configured index if available
465
+ if len(args) > self._event_idx and isinstance(args[self._event_idx], ast.Constant):
466
+ return args[self._event_idx].value
467
+
468
+ # Fallback heuristics
469
+ if len(args) >= 1 and isinstance(args[0], ast.Constant):
470
+ return args[0].value
436
471
  return None
437
472
 
438
473
  def extract_properties(self, node: ast.Call, source: str) -> EventProperties:
@@ -502,6 +537,19 @@ class TrackingVisitor(ast.NodeVisitor):
502
537
  # Check if event is not anonymous and extract distinct_id
503
538
  user_id_props.update(self._extract_posthog_user_id(node))
504
539
 
540
+ elif source == 'custom':
541
+ # Populate extra params defined in custom config as properties
542
+ if self._extra_params:
543
+ for extra in self._extra_params:
544
+ idx = extra.get('idx')
545
+ name = extra.get('name')
546
+ if idx is None or name is None:
547
+ continue
548
+ if idx < len(node.args):
549
+ prop_type = self._extract_property_type(node.args[idx])
550
+ if prop_type:
551
+ user_id_props[name] = prop_type
552
+
505
553
  return user_id_props
506
554
 
507
555
  def _is_non_null_value(self, node: ast.AST) -> bool:
@@ -559,10 +607,13 @@ class TrackingVisitor(ast.NodeVisitor):
559
607
  return keyword.value
560
608
 
561
609
  elif source == 'custom':
562
- # Properties are in the second argument
563
- if len(node.args) > 1:
610
+ # Use configured indices where possible
611
+ if len(node.args) > self._props_idx and isinstance(node.args[self._props_idx], ast.Dict):
612
+ return node.args[self._props_idx]
613
+ # Fallbacks (legacy)
614
+ if len(node.args) >= 2 and isinstance(node.args[1], ast.Dict):
564
615
  return node.args[1]
565
-
616
+
566
617
  elif source == 'posthog':
567
618
  # Check named parameters first, then positional
568
619
  for keyword in node.keywords:
@@ -576,7 +627,7 @@ class TrackingVisitor(ast.NodeVisitor):
576
627
  if len(node.args) >= 1:
577
628
  first_arg = node.args[0]
578
629
  if isinstance(first_arg, ast.Call) and isinstance(first_arg.func, ast.Name):
579
- if first_arg.func.id == 'StructuredEvent':
630
+ if _safe_id(first_arg.func) == 'StructuredEvent':
580
631
  # Return None as properties are handled differently for Snowplow
581
632
  return None
582
633
 
@@ -618,7 +669,7 @@ class TrackingVisitor(ast.NodeVisitor):
618
669
  if len(node.args) >= 1:
619
670
  first_arg = node.args[0]
620
671
  if isinstance(first_arg, ast.Call) and isinstance(first_arg.func, ast.Name):
621
- if first_arg.func.id == 'StructuredEvent':
672
+ if _safe_id(first_arg.func) == 'StructuredEvent':
622
673
  # Extract all keyword arguments except 'action'
623
674
  for keyword in first_arg.keywords:
624
675
  if keyword.arg and keyword.arg != 'action':
@@ -755,33 +806,56 @@ class TrackingVisitor(ast.NodeVisitor):
755
806
  return "null"
756
807
  return "any"
757
808
 
758
- def analyze_python_code(code: str, filepath: str, custom_function: Optional[str] = None) -> str:
809
+ def analyze_python_code(code: str, filepath: str, custom_config: Optional[any] = None) -> str:
759
810
  """
760
811
  Analyze Python code for analytics tracking calls.
761
-
762
- This function parses Python code and identifies analytics tracking calls,
763
- extracting event names, properties, and metadata.
764
-
812
+
813
+ The function supports either a single custom configuration object or a list
814
+ of such objects, allowing detection of multiple custom tracking functions
815
+ without parsing the source code multiple times.
816
+
765
817
  Args:
766
818
  code: The Python source code to analyze
767
819
  filepath: Path to the file being analyzed
768
- custom_function: Optional name of a custom tracking function
769
-
820
+ custom_config: None, a single custom config dict, or a list of configs
821
+
770
822
  Returns:
771
823
  JSON string containing array of tracking events
772
824
  """
773
825
  try:
774
- # Parse the Python code
826
+ # Parse the Python code only once
775
827
  tree = ast.parse(code)
776
-
777
- # Create visitor and analyze
778
- visitor = TrackingVisitor(filepath, custom_function)
779
- visitor.visit(tree)
780
-
781
- # Return events as JSON
782
- return json.dumps(visitor.events)
783
- except Exception as e:
784
- # Return empty array on parse errors
828
+
829
+ events: List[AnalyticsEvent] = []
830
+
831
+ def run_visitor(cfg: Optional[dict]) -> None:
832
+ vis = TrackingVisitor(filepath, cfg)
833
+ vis.visit(tree)
834
+ events.extend(vis.events)
835
+
836
+ # Built-in providers pass (no custom config)
837
+ run_visitor(None)
838
+
839
+ # Handle list or single custom configuration
840
+ if custom_config:
841
+ if isinstance(custom_config, list):
842
+ for cfg in custom_config:
843
+ if cfg:
844
+ run_visitor(cfg)
845
+ else:
846
+ run_visitor(custom_config) # single config for backward compat
847
+
848
+ # Deduplicate events (source|eventName|line|functionName)
849
+ unique: Dict[str, AnalyticsEvent] = {}
850
+ for evt in events:
851
+ key = f"{evt['source']}|{evt['eventName']}|{evt['line']}|{evt['functionName']}"
852
+ if key not in unique:
853
+ unique[key] = evt
854
+
855
+ return json.dumps(list(unique.values()))
856
+
857
+ except Exception:
858
+ # Return empty array on failure
785
859
  return json.dumps([])
786
860
 
787
861
  # Command-line interface
@@ -9,9 +9,10 @@ const { getValueType } = require('./types');
9
9
  * Extracts the event name from a tracking call based on the source
10
10
  * @param {Object} node - The AST CallNode
11
11
  * @param {string} source - The detected analytics source
12
+ * @param {Object} customConfig - Custom configuration for custom functions
12
13
  * @returns {string|null} - The extracted event name or null
13
14
  */
14
- function extractEventName(node, source) {
15
+ function extractEventName(node, source, customConfig = null) {
15
16
  if (source === 'segment' || source === 'rudderstack') {
16
17
  // Both Segment and Rudderstack use the same format
17
18
  const params = node.arguments_?.arguments_?.[0]?.elements;
@@ -50,11 +51,21 @@ function extractEventName(node, source) {
50
51
  }
51
52
 
52
53
  if (source === 'custom') {
53
- // Custom function format: customFunction('event_name', {...})
54
- const args = node.arguments_?.arguments_;
55
- if (args && args.length > 0 && args[0]?.unescaped?.value) {
56
- return args[0].unescaped.value;
54
+ const args = node.arguments_?.arguments_ || [];
55
+
56
+ if (!customConfig) {
57
+ // Fallback: first argument string literal event name
58
+ if (args[0]?.unescaped?.value) {
59
+ return args[0].unescaped.value;
60
+ }
61
+ return null;
57
62
  }
63
+
64
+ const eventArg = args[customConfig.eventIndex];
65
+ if (eventArg?.unescaped?.value) {
66
+ return eventArg.unescaped.value;
67
+ }
68
+ return null;
58
69
  }
59
70
 
60
71
  return null;
@@ -64,9 +75,10 @@ function extractEventName(node, source) {
64
75
  * Extracts properties from a tracking call based on the source
65
76
  * @param {Object} node - The AST CallNode
66
77
  * @param {string} source - The detected analytics source
78
+ * @param {Object} customConfig - Custom configuration for custom functions
67
79
  * @returns {Object|null} - The extracted properties or null
68
80
  */
69
- async function extractProperties(node, source) {
81
+ async function extractProperties(node, source, customConfig = null) {
70
82
  const { HashNode, ArrayNode } = await import('@ruby/prism');
71
83
 
72
84
  if (source === 'segment' || source === 'rudderstack') {
@@ -183,11 +195,35 @@ async function extractProperties(node, source) {
183
195
  }
184
196
 
185
197
  if (source === 'custom') {
186
- // Custom function format: customFunction('event_name', {properties})
187
- const args = node.arguments_?.arguments_;
188
- if (args && args.length > 1 && args[1] instanceof HashNode) {
189
- return await extractHashProperties(args[1]);
198
+ const args = node.arguments_?.arguments_ || [];
199
+
200
+ if (!customConfig) {
201
+ // Legacy fallback behavior
202
+ if (args.length > 1 && args[1] instanceof HashNode) {
203
+ return await extractHashProperties(args[1]);
204
+ }
205
+ return null;
190
206
  }
207
+
208
+ const properties = {};
209
+
210
+ // Handle extra params first
211
+ for (const extra of customConfig.extraParams) {
212
+ const argNode = args[extra.idx];
213
+ if (!argNode) continue;
214
+ properties[extra.name] = {
215
+ type: await getValueType(argNode)
216
+ };
217
+ }
218
+
219
+ // Handle properties object
220
+ const propsArg = args[customConfig.propertiesIndex];
221
+ if (propsArg instanceof HashNode) {
222
+ const hashProps = await extractHashProperties(propsArg);
223
+ Object.assign(properties, hashProps);
224
+ }
225
+
226
+ return Object.keys(properties).length > 0 ? properties : null;
191
227
  }
192
228
 
193
229
  return null;
@@ -16,7 +16,7 @@ let parse = null;
16
16
  * @returns {Promise<Array>} Array of tracking events found in the file
17
17
  * @throws {Error} If the file cannot be read or parsed
18
18
  */
19
- async function analyzeRubyFile(filePath, customFunction) {
19
+ async function analyzeRubyFile(filePath, customFunctionSignatures = null) {
20
20
  // Lazy load the Ruby Prism parser
21
21
  if (!parse) {
22
22
  const { loadPrism } = await import('@ruby/prism');
@@ -26,21 +26,28 @@ async function analyzeRubyFile(filePath, customFunction) {
26
26
  try {
27
27
  // Read the file content
28
28
  const code = fs.readFileSync(filePath, 'utf8');
29
-
30
- // Parse the Ruby code into an AST
29
+
30
+ // Parse the Ruby code into an AST once
31
31
  let ast;
32
32
  try {
33
33
  ast = await parse(code);
34
34
  } catch (parseError) {
35
35
  console.error(`Error parsing file ${filePath}:`, parseError.message);
36
- return []; // Return empty events array if parsing fails
36
+ return [];
37
37
  }
38
38
 
39
- // Create a visitor and analyze the AST
40
- const visitor = new TrackingVisitor(code, filePath, customFunction);
39
+ // Single visitor pass covering all custom configs
40
+ const visitor = new TrackingVisitor(code, filePath, customFunctionSignatures || []);
41
41
  const events = await visitor.analyze(ast);
42
42
 
43
- return events;
43
+ // Deduplicate events
44
+ const unique = new Map();
45
+ for (const evt of events) {
46
+ const key = `${evt.source}|${evt.eventName}|${evt.line}|${evt.functionName}`;
47
+ if (!unique.has(key)) unique.set(key, evt);
48
+ }
49
+
50
+ return Array.from(unique.values());
44
51
 
45
52
  } catch (fileError) {
46
53
  console.error(`Error reading or processing file ${filePath}:`, fileError.message);
@@ -8,10 +8,10 @@ const { extractEventName, extractProperties } = require('./extractors');
8
8
  const { findWrappingFunction, traverseNode, getLineNumber } = require('./traversal');
9
9
 
10
10
  class TrackingVisitor {
11
- constructor(code, filePath, customFunction = null) {
11
+ constructor(code, filePath, customConfigs = []) {
12
12
  this.code = code;
13
13
  this.filePath = filePath;
14
- this.customFunction = customFunction;
14
+ this.customConfigs = Array.isArray(customConfigs) ? customConfigs : [];
15
15
  this.events = [];
16
16
  }
17
17
 
@@ -22,10 +22,27 @@ class TrackingVisitor {
22
22
  */
23
23
  async processCallNode(node, ancestors) {
24
24
  try {
25
- const source = detectSource(node, this.customFunction);
25
+ let matchedConfig = null;
26
+ let source = null;
27
+
28
+ // Try to match any custom config first
29
+ for (const cfg of this.customConfigs) {
30
+ if (!cfg) continue;
31
+ if (detectSource(node, cfg.functionName) === 'custom') {
32
+ matchedConfig = cfg;
33
+ source = 'custom';
34
+ break;
35
+ }
36
+ }
37
+
38
+ // If no custom match, attempt built-in providers
39
+ if (!source) {
40
+ source = detectSource(node, null);
41
+ }
42
+
26
43
  if (!source) return;
27
44
 
28
- const eventName = extractEventName(node, source);
45
+ const eventName = extractEventName(node, source, matchedConfig);
29
46
  if (!eventName) return;
30
47
 
31
48
  const line = getLineNumber(this.code, node.location);
@@ -33,13 +50,13 @@ class TrackingVisitor {
33
50
  // For module-scoped custom functions, use the custom function name as the functionName
34
51
  // For simple custom functions, use the wrapping function name
35
52
  let functionName;
36
- if (source === 'custom' && this.customFunction && this.customFunction.includes('.')) {
37
- functionName = this.customFunction;
53
+ if (source === 'custom' && matchedConfig && matchedConfig.functionName.includes('.')) {
54
+ functionName = matchedConfig.functionName;
38
55
  } else {
39
56
  functionName = await findWrappingFunction(node, ancestors);
40
57
  }
41
58
 
42
- const properties = await extractProperties(node, source);
59
+ const properties = await extractProperties(node, source, matchedConfig);
43
60
 
44
61
  this.events.push({
45
62
  eventName,