@flisk/analyze-tracking 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +1 -1
- package/package.json +9 -7
- package/src/analyze/go/astTraversal.js +121 -0
- package/src/analyze/go/constants.js +20 -0
- package/src/analyze/go/eventDeduplicator.js +47 -0
- package/src/analyze/go/eventExtractor.js +156 -0
- package/src/analyze/go/goAstParser/constants.js +39 -0
- package/src/analyze/go/goAstParser/expressionParser.js +281 -0
- package/src/analyze/go/goAstParser/index.js +52 -0
- package/src/analyze/go/goAstParser/statementParser.js +387 -0
- package/src/analyze/go/goAstParser/tokenizer.js +196 -0
- package/src/analyze/go/goAstParser/typeParser.js +202 -0
- package/src/analyze/go/goAstParser/utils.js +99 -0
- package/src/analyze/go/index.js +55 -0
- package/src/analyze/go/propertyExtractor.js +670 -0
- package/src/analyze/go/trackingDetector.js +71 -0
- package/src/analyze/go/trackingExtractor.js +54 -0
- package/src/analyze/go/typeContext.js +88 -0
- package/src/analyze/go/utils.js +215 -0
- package/src/analyze/index.js +11 -6
- package/src/analyze/javascript/constants.js +115 -0
- package/src/analyze/javascript/detectors/analytics-source.js +119 -0
- package/src/analyze/javascript/detectors/index.js +10 -0
- package/src/analyze/javascript/extractors/event-extractor.js +179 -0
- package/src/analyze/javascript/extractors/index.js +13 -0
- package/src/analyze/javascript/extractors/property-extractor.js +172 -0
- package/src/analyze/javascript/index.js +38 -0
- package/src/analyze/javascript/parser.js +126 -0
- package/src/analyze/javascript/utils/function-finder.js +123 -0
- package/src/analyze/python/index.js +111 -0
- package/src/analyze/python/pythonTrackingAnalyzer.py +814 -0
- package/src/analyze/ruby/detectors.js +46 -0
- package/src/analyze/ruby/extractors.js +258 -0
- package/src/analyze/ruby/index.js +51 -0
- package/src/analyze/ruby/traversal.js +123 -0
- package/src/analyze/ruby/types.js +30 -0
- package/src/analyze/ruby/visitor.js +66 -0
- package/src/analyze/typescript/constants.js +109 -0
- package/src/analyze/typescript/detectors/analytics-source.js +120 -0
- package/src/analyze/typescript/detectors/index.js +10 -0
- package/src/analyze/typescript/extractors/event-extractor.js +269 -0
- package/src/analyze/typescript/extractors/index.js +14 -0
- package/src/analyze/typescript/extractors/property-extractor.js +395 -0
- package/src/analyze/typescript/index.js +48 -0
- package/src/analyze/typescript/parser.js +131 -0
- package/src/analyze/typescript/utils/function-finder.js +114 -0
- package/src/analyze/typescript/utils/type-resolver.js +193 -0
- package/src/generateDescriptions/index.js +81 -0
- package/src/generateDescriptions/llmUtils.js +33 -0
- package/src/generateDescriptions/promptUtils.js +62 -0
- package/src/generateDescriptions/schemaUtils.js +61 -0
- package/src/index.js +7 -2
- package/src/{fileProcessor.js → utils/fileProcessor.js} +5 -0
- package/src/{repoDetails.js → utils/repoDetails.js} +5 -0
- package/src/{yamlGenerator.js → utils/yamlGenerator.js} +5 -0
- package/src/analyze/analyzeGoFile.js +0 -1164
- package/src/analyze/analyzeJsFile.js +0 -87
- package/src/analyze/analyzePythonFile.js +0 -42
- package/src/analyze/analyzeRubyFile.js +0 -419
- package/src/analyze/analyzeTsFile.js +0 -192
- package/src/analyze/go2json.js +0 -1069
- package/src/analyze/helpers.js +0 -656
- package/src/analyze/pythonTrackingAnalyzer.py +0 -541
- package/src/generateDescriptions.js +0 -196
|
@@ -1,541 +0,0 @@
|
|
|
1
|
-
import ast
|
|
2
|
-
import json
|
|
3
|
-
|
|
4
|
-
class TrackingVisitor(ast.NodeVisitor):
|
|
5
|
-
def __init__(self, filepath, custom_function=None):
|
|
6
|
-
self.events = []
|
|
7
|
-
self.filepath = filepath
|
|
8
|
-
self.current_function = 'global'
|
|
9
|
-
self.function_stack = []
|
|
10
|
-
# Track variable types in the current scope
|
|
11
|
-
self.var_types = {}
|
|
12
|
-
# Stack of variable type scopes
|
|
13
|
-
self.var_types_stack = []
|
|
14
|
-
# Custom tracking function name
|
|
15
|
-
self.custom_function = custom_function
|
|
16
|
-
|
|
17
|
-
def visit_FunctionDef(self, node):
|
|
18
|
-
# Save previous function context and variable types
|
|
19
|
-
self.function_stack.append(self.current_function)
|
|
20
|
-
self.var_types_stack.append(self.var_types)
|
|
21
|
-
|
|
22
|
-
# Create new scope for variable types
|
|
23
|
-
self.var_types = {}
|
|
24
|
-
self.current_function = node.name
|
|
25
|
-
|
|
26
|
-
# Extract parameter type annotations
|
|
27
|
-
for arg in node.args.args:
|
|
28
|
-
if arg.annotation:
|
|
29
|
-
# Store the type annotation for this parameter
|
|
30
|
-
self.var_types[arg.arg] = self.extract_type_annotation(arg.annotation)
|
|
31
|
-
|
|
32
|
-
# Visit children
|
|
33
|
-
self.generic_visit(node)
|
|
34
|
-
|
|
35
|
-
# Restore function context and variable types
|
|
36
|
-
self.current_function = self.function_stack.pop()
|
|
37
|
-
self.var_types = self.var_types_stack.pop()
|
|
38
|
-
|
|
39
|
-
def extract_type_annotation(self, annotation):
|
|
40
|
-
"""Extract type information from a type annotation node"""
|
|
41
|
-
if isinstance(annotation, ast.Name):
|
|
42
|
-
# Simple types like int, str, bool
|
|
43
|
-
type_name = annotation.id
|
|
44
|
-
if type_name == 'int' or type_name == 'float':
|
|
45
|
-
return 'number'
|
|
46
|
-
elif type_name == 'str':
|
|
47
|
-
return 'string'
|
|
48
|
-
elif type_name == 'bool':
|
|
49
|
-
return 'boolean'
|
|
50
|
-
elif type_name == 'None' or type_name == 'NoneType':
|
|
51
|
-
return 'null'
|
|
52
|
-
# Could add more type mappings here
|
|
53
|
-
elif isinstance(annotation, ast.Subscript):
|
|
54
|
-
# Handle generic types like List[int], Dict[str, int]
|
|
55
|
-
if hasattr(annotation.value, 'id'):
|
|
56
|
-
container_type = annotation.value.id
|
|
57
|
-
if container_type in ('List', 'Tuple', 'Set', 'list', 'tuple', 'set'):
|
|
58
|
-
# Try to get the type parameter
|
|
59
|
-
if isinstance(annotation.slice, ast.Name):
|
|
60
|
-
element_type = self.extract_type_annotation(annotation.slice)
|
|
61
|
-
return {
|
|
62
|
-
'type': 'array',
|
|
63
|
-
'items': {'type': element_type}
|
|
64
|
-
}
|
|
65
|
-
return 'array'
|
|
66
|
-
elif container_type in ('Dict', 'dict'):
|
|
67
|
-
return 'object'
|
|
68
|
-
# Default for unknown or complex types
|
|
69
|
-
return 'any'
|
|
70
|
-
|
|
71
|
-
def visit_AnnAssign(self, node):
|
|
72
|
-
"""Visit variable assignments with type annotations"""
|
|
73
|
-
if isinstance(node.target, ast.Name) and node.annotation:
|
|
74
|
-
# Store the type annotation for this variable
|
|
75
|
-
self.var_types[node.target.id] = self.extract_type_annotation(node.annotation)
|
|
76
|
-
self.generic_visit(node)
|
|
77
|
-
|
|
78
|
-
def visit_Assign(self, node):
|
|
79
|
-
"""Visit regular assignments to track simple type inferences"""
|
|
80
|
-
if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
|
|
81
|
-
var_name = node.targets[0].id
|
|
82
|
-
# Try to infer type from literal values
|
|
83
|
-
if isinstance(node.value, ast.Constant):
|
|
84
|
-
self.var_types[var_name] = self.get_value_type(node.value.value)
|
|
85
|
-
self.generic_visit(node)
|
|
86
|
-
|
|
87
|
-
def visit_ClassDef(self, node):
|
|
88
|
-
# Track class context for methods
|
|
89
|
-
class_name = node.name
|
|
90
|
-
self.function_stack.append(self.current_function)
|
|
91
|
-
self.var_types_stack.append(self.var_types)
|
|
92
|
-
|
|
93
|
-
# Create new scope for the class
|
|
94
|
-
self.var_types = {}
|
|
95
|
-
self.current_function = class_name
|
|
96
|
-
|
|
97
|
-
self.generic_visit(node)
|
|
98
|
-
|
|
99
|
-
# Restore context
|
|
100
|
-
self.current_function = self.function_stack.pop()
|
|
101
|
-
self.var_types = self.var_types_stack.pop()
|
|
102
|
-
|
|
103
|
-
def visit_Call(self, node):
|
|
104
|
-
# Check for analytics tracking calls
|
|
105
|
-
source = self.detect_source(node)
|
|
106
|
-
if source:
|
|
107
|
-
event_name = self.extract_event_name(node, source)
|
|
108
|
-
if event_name:
|
|
109
|
-
properties = self.extract_properties(node, source)
|
|
110
|
-
self.events.append({
|
|
111
|
-
"eventName": event_name,
|
|
112
|
-
"source": source,
|
|
113
|
-
"properties": properties,
|
|
114
|
-
"filePath": self.filepath,
|
|
115
|
-
"line": node.lineno,
|
|
116
|
-
"functionName": self.current_function
|
|
117
|
-
})
|
|
118
|
-
|
|
119
|
-
# Continue visiting child nodes
|
|
120
|
-
self.generic_visit(node)
|
|
121
|
-
|
|
122
|
-
def detect_source(self, node):
|
|
123
|
-
# Check for analytics tracking libraries
|
|
124
|
-
if isinstance(node.func, ast.Attribute):
|
|
125
|
-
if hasattr(node.func.value, 'id'):
|
|
126
|
-
obj_id = node.func.value.id
|
|
127
|
-
method_name = node.func.attr
|
|
128
|
-
|
|
129
|
-
# Segment analytics
|
|
130
|
-
if obj_id == 'analytics' and method_name == 'track':
|
|
131
|
-
return 'segment'
|
|
132
|
-
# Mixpanel
|
|
133
|
-
if obj_id == 'mp' and method_name == 'track':
|
|
134
|
-
return 'mixpanel'
|
|
135
|
-
# Rudderstack
|
|
136
|
-
if obj_id == 'rudder_analytics' and method_name == 'track':
|
|
137
|
-
return 'rudderstack'
|
|
138
|
-
# PostHog
|
|
139
|
-
if obj_id == 'posthog' and method_name == 'capture':
|
|
140
|
-
return 'posthog'
|
|
141
|
-
|
|
142
|
-
# Amplitude - tracker with BaseEvent
|
|
143
|
-
if method_name == 'track' and len(node.args) >= 1:
|
|
144
|
-
first_arg = node.args[0]
|
|
145
|
-
# Check if the first argument is a BaseEvent call
|
|
146
|
-
if isinstance(first_arg, ast.Call) and isinstance(first_arg.func, ast.Name):
|
|
147
|
-
if first_arg.func.id == 'BaseEvent':
|
|
148
|
-
return 'amplitude'
|
|
149
|
-
|
|
150
|
-
# Snowplow - tracker with StructuredEvent
|
|
151
|
-
if method_name == 'track' and len(node.args) >= 1:
|
|
152
|
-
first_arg = node.args[0]
|
|
153
|
-
# Check if the first argument is a StructuredEvent call
|
|
154
|
-
if isinstance(first_arg, ast.Call) and isinstance(first_arg.func, ast.Name):
|
|
155
|
-
if first_arg.func.id == 'StructuredEvent':
|
|
156
|
-
return 'snowplow'
|
|
157
|
-
# Also check if it's a variable that might be a StructuredEvent
|
|
158
|
-
elif isinstance(first_arg, ast.Name):
|
|
159
|
-
# Check if we can find the assignment of this variable
|
|
160
|
-
# For now, we'll assume any tracker.track() with a single argument is Snowplow
|
|
161
|
-
if obj_id == 'tracker':
|
|
162
|
-
return 'snowplow'
|
|
163
|
-
|
|
164
|
-
# Check for Snowplow struct event patterns
|
|
165
|
-
if isinstance(node.func, ast.Name) and node.func.id in ['trackStructEvent', 'buildStructEvent']:
|
|
166
|
-
return 'snowplow'
|
|
167
|
-
|
|
168
|
-
# Check for Snowplow's snowplow('trackStructEvent', {...}) pattern
|
|
169
|
-
if isinstance(node.func, ast.Name) and node.func.id == 'snowplow':
|
|
170
|
-
if len(node.args) >= 1 and isinstance(node.args[0], ast.Constant):
|
|
171
|
-
if node.args[0].value == 'trackStructEvent':
|
|
172
|
-
return 'snowplow'
|
|
173
|
-
|
|
174
|
-
# Check for custom tracking function
|
|
175
|
-
if self.custom_function and isinstance(node.func, ast.Name) and node.func.id == self.custom_function:
|
|
176
|
-
return 'custom'
|
|
177
|
-
|
|
178
|
-
return None
|
|
179
|
-
|
|
180
|
-
def extract_event_name(self, node, source):
|
|
181
|
-
try:
|
|
182
|
-
if source in ['segment', 'rudderstack', 'mixpanel']:
|
|
183
|
-
# Segment/Rudderstack/Mixpanel format: library.track(user_id/distinct_id, 'event_name', {...})
|
|
184
|
-
if len(node.args) >= 2 and isinstance(node.args[1], ast.Constant):
|
|
185
|
-
return node.args[1].value
|
|
186
|
-
elif source == 'amplitude':
|
|
187
|
-
# Amplitude format: client.track(BaseEvent(event_type='event_name', ...))
|
|
188
|
-
if len(node.args) >= 1 and isinstance(node.args[0], ast.Call):
|
|
189
|
-
base_event_call = node.args[0]
|
|
190
|
-
# Look for event_type in keyword arguments
|
|
191
|
-
for keyword in base_event_call.keywords:
|
|
192
|
-
if keyword.arg == 'event_type' and isinstance(keyword.value, ast.Constant):
|
|
193
|
-
return keyword.value.value
|
|
194
|
-
elif source in ['custom']:
|
|
195
|
-
# Standard format: customFunction('event_name', {...})
|
|
196
|
-
if len(node.args) >= 1 and isinstance(node.args[0], ast.Constant):
|
|
197
|
-
return node.args[0].value
|
|
198
|
-
|
|
199
|
-
elif source == 'posthog':
|
|
200
|
-
# PostHog has multiple formats:
|
|
201
|
-
# 1. posthog.capture('distinct_id', 'event_name', {...})
|
|
202
|
-
# 2. posthog.capture('distinct_id', event='event_name', properties={...})
|
|
203
|
-
|
|
204
|
-
# Check for named parameters first (event='event_name')
|
|
205
|
-
for keyword in node.keywords:
|
|
206
|
-
if keyword.arg == 'event' and isinstance(keyword.value, ast.Constant):
|
|
207
|
-
return keyword.value.value
|
|
208
|
-
|
|
209
|
-
# If no named event parameter, check positional args (second arg is event name)
|
|
210
|
-
if len(node.args) >= 2 and isinstance(node.args[1], ast.Constant):
|
|
211
|
-
return node.args[1].value
|
|
212
|
-
|
|
213
|
-
elif source == 'snowplow':
|
|
214
|
-
# Snowplow has multiple patterns
|
|
215
|
-
if len(node.args) >= 1:
|
|
216
|
-
first_arg = node.args[0]
|
|
217
|
-
|
|
218
|
-
# Pattern 1: tracker.track(StructuredEvent(...))
|
|
219
|
-
if isinstance(first_arg, ast.Call) and isinstance(first_arg.func, ast.Name):
|
|
220
|
-
if first_arg.func.id == 'StructuredEvent':
|
|
221
|
-
# Look for action in keyword arguments
|
|
222
|
-
for keyword in first_arg.keywords:
|
|
223
|
-
if keyword.arg == 'action' and isinstance(keyword.value, ast.Constant):
|
|
224
|
-
return keyword.value.value
|
|
225
|
-
|
|
226
|
-
# Pattern 2 & 3: For other Snowplow patterns
|
|
227
|
-
# For Snowplow struct events
|
|
228
|
-
if isinstance(node.func, ast.Name) and node.func.id in ['trackStructEvent', 'buildStructEvent']:
|
|
229
|
-
if len(node.args) >= 1:
|
|
230
|
-
props_node = node.args[0]
|
|
231
|
-
|
|
232
|
-
# snowplow('trackStructEvent', {...}) pattern
|
|
233
|
-
elif isinstance(node.func, ast.Name) and node.func.id == 'snowplow':
|
|
234
|
-
if len(node.args) >= 2:
|
|
235
|
-
props_node = node.args[1]
|
|
236
|
-
except:
|
|
237
|
-
pass
|
|
238
|
-
return None
|
|
239
|
-
|
|
240
|
-
def extract_properties(self, node, source):
|
|
241
|
-
properties = {}
|
|
242
|
-
try:
|
|
243
|
-
props_node = None
|
|
244
|
-
|
|
245
|
-
# Get the properties object based on source
|
|
246
|
-
if source in ['segment', 'rudderstack']:
|
|
247
|
-
# Segment/Rudderstack format: analytics.track(user_id, 'event_name', {properties})
|
|
248
|
-
# Add user_id as a property if it's not null
|
|
249
|
-
if len(node.args) > 0:
|
|
250
|
-
user_id_node = node.args[0]
|
|
251
|
-
if isinstance(user_id_node, ast.Constant) and user_id_node.value is not None:
|
|
252
|
-
properties["user_id"] = {"type": "string"}
|
|
253
|
-
elif isinstance(user_id_node, ast.Name):
|
|
254
|
-
# It's a variable reference, include it as a property
|
|
255
|
-
properties["user_id"] = {"type": "string"}
|
|
256
|
-
|
|
257
|
-
if len(node.args) > 2:
|
|
258
|
-
props_node = node.args[2]
|
|
259
|
-
elif source == 'mixpanel':
|
|
260
|
-
# Mixpanel format: mp.track(distinct_id, 'event_name', {properties})
|
|
261
|
-
# Add distinct_id as a property if it's not null
|
|
262
|
-
if len(node.args) > 0:
|
|
263
|
-
distinct_id_node = node.args[0]
|
|
264
|
-
if isinstance(distinct_id_node, ast.Constant) and distinct_id_node.value is not None:
|
|
265
|
-
properties["distinct_id"] = {"type": "string"}
|
|
266
|
-
elif isinstance(distinct_id_node, ast.Name):
|
|
267
|
-
# It's a variable reference, include it as a property
|
|
268
|
-
properties["distinct_id"] = {"type": "string"}
|
|
269
|
-
|
|
270
|
-
if len(node.args) > 2:
|
|
271
|
-
props_node = node.args[2]
|
|
272
|
-
elif source == 'amplitude':
|
|
273
|
-
# Amplitude format: client.track(BaseEvent(event_type='...', user_id='...', event_properties={...}))
|
|
274
|
-
if len(node.args) >= 1 and isinstance(node.args[0], ast.Call):
|
|
275
|
-
base_event_call = node.args[0]
|
|
276
|
-
|
|
277
|
-
# First, check for user_id parameter
|
|
278
|
-
for keyword in base_event_call.keywords:
|
|
279
|
-
if keyword.arg == 'user_id':
|
|
280
|
-
if isinstance(keyword.value, ast.Constant) and keyword.value.value is not None:
|
|
281
|
-
properties["user_id"] = {"type": "string"}
|
|
282
|
-
elif isinstance(keyword.value, ast.Name):
|
|
283
|
-
# It's a variable reference, include it as a property
|
|
284
|
-
properties["user_id"] = {"type": "string"}
|
|
285
|
-
|
|
286
|
-
# Then look for event_properties
|
|
287
|
-
for keyword in base_event_call.keywords:
|
|
288
|
-
if keyword.arg == 'event_properties' and isinstance(keyword.value, ast.Dict):
|
|
289
|
-
props_node = keyword.value
|
|
290
|
-
break
|
|
291
|
-
elif source in ['custom']:
|
|
292
|
-
# Standard format: customFunction('event_name', {properties})
|
|
293
|
-
if len(node.args) > 1:
|
|
294
|
-
props_node = node.args[1]
|
|
295
|
-
|
|
296
|
-
elif source == 'posthog':
|
|
297
|
-
# PostHog has multiple formats
|
|
298
|
-
is_anonymous = False
|
|
299
|
-
distinct_id = None
|
|
300
|
-
|
|
301
|
-
# Check for properties in named parameters first
|
|
302
|
-
for keyword in node.keywords:
|
|
303
|
-
if keyword.arg == 'properties' and isinstance(keyword.value, ast.Dict):
|
|
304
|
-
props_node = keyword.value
|
|
305
|
-
|
|
306
|
-
# Check if event is anonymous
|
|
307
|
-
for i, key_node in enumerate(props_node.keys):
|
|
308
|
-
if (isinstance(key_node, ast.Constant) and
|
|
309
|
-
key_node.value == '$process_person_profile'):
|
|
310
|
-
value_node = props_node.values[i]
|
|
311
|
-
if (isinstance(value_node, ast.Constant) and
|
|
312
|
-
value_node.value is False):
|
|
313
|
-
is_anonymous = True
|
|
314
|
-
|
|
315
|
-
# If no named properties, check positional args (third arg)
|
|
316
|
-
if props_node is None and len(node.args) > 2:
|
|
317
|
-
props_node = node.args[2]
|
|
318
|
-
|
|
319
|
-
# Add distinct_id as property if it exists and event is not anonymous
|
|
320
|
-
if not is_anonymous and len(node.args) > 0 and isinstance(node.args[0], ast.Constant):
|
|
321
|
-
distinct_id = node.args[0].value
|
|
322
|
-
if distinct_id:
|
|
323
|
-
properties["distinct_id"] = {"type": "string"}
|
|
324
|
-
|
|
325
|
-
elif source == 'snowplow':
|
|
326
|
-
# For Snowplow struct events
|
|
327
|
-
if isinstance(node.func, ast.Name) and node.func.id in ['trackStructEvent', 'buildStructEvent']:
|
|
328
|
-
if len(node.args) >= 1:
|
|
329
|
-
props_node = node.args[0]
|
|
330
|
-
|
|
331
|
-
# snowplow('trackStructEvent', {...}) pattern
|
|
332
|
-
elif isinstance(node.func, ast.Name) and node.func.id == 'snowplow':
|
|
333
|
-
if len(node.args) >= 2:
|
|
334
|
-
props_node = node.args[1]
|
|
335
|
-
|
|
336
|
-
# Pattern: tracker.track(StructuredEvent(...))
|
|
337
|
-
elif len(node.args) >= 1:
|
|
338
|
-
first_arg = node.args[0]
|
|
339
|
-
if isinstance(first_arg, ast.Call) and isinstance(first_arg.func, ast.Name):
|
|
340
|
-
if first_arg.func.id == 'StructuredEvent':
|
|
341
|
-
# Extract all keyword arguments from StructuredEvent except 'action'
|
|
342
|
-
for keyword in first_arg.keywords:
|
|
343
|
-
if keyword.arg and keyword.arg != 'action':
|
|
344
|
-
# Map property_ to property for consistency
|
|
345
|
-
prop_name = 'property' if keyword.arg == 'property_' else keyword.arg
|
|
346
|
-
|
|
347
|
-
if isinstance(keyword.value, ast.Constant):
|
|
348
|
-
value_type = self.get_value_type(keyword.value.value)
|
|
349
|
-
properties[prop_name] = {"type": value_type}
|
|
350
|
-
elif isinstance(keyword.value, ast.Name):
|
|
351
|
-
# Check if we know the type of this variable
|
|
352
|
-
var_name = keyword.value.id
|
|
353
|
-
if var_name in self.var_types:
|
|
354
|
-
var_type = self.var_types[var_name]
|
|
355
|
-
if isinstance(var_type, dict):
|
|
356
|
-
properties[prop_name] = var_type
|
|
357
|
-
else:
|
|
358
|
-
properties[prop_name] = {"type": var_type}
|
|
359
|
-
else:
|
|
360
|
-
properties[prop_name] = {"type": "any"}
|
|
361
|
-
elif isinstance(keyword.value, ast.Dict):
|
|
362
|
-
# Nested dictionary
|
|
363
|
-
nested_props = self.extract_nested_dict(keyword.value)
|
|
364
|
-
properties[prop_name] = {
|
|
365
|
-
"type": "object",
|
|
366
|
-
"properties": nested_props
|
|
367
|
-
}
|
|
368
|
-
elif isinstance(keyword.value, ast.List) or isinstance(keyword.value, ast.Tuple):
|
|
369
|
-
# Array/list/tuple
|
|
370
|
-
item_type = self.infer_sequence_item_type(keyword.value)
|
|
371
|
-
properties[prop_name] = {
|
|
372
|
-
"type": "array",
|
|
373
|
-
"items": item_type
|
|
374
|
-
}
|
|
375
|
-
# Don't process props_node if we've already extracted properties
|
|
376
|
-
props_node = None
|
|
377
|
-
|
|
378
|
-
# Extract properties from the dictionary
|
|
379
|
-
if props_node and isinstance(props_node, ast.Dict):
|
|
380
|
-
for i, key_node in enumerate(props_node.keys):
|
|
381
|
-
if isinstance(key_node, ast.Constant) and hasattr(key_node, 'value'):
|
|
382
|
-
key = key_node.value
|
|
383
|
-
value_node = props_node.values[i]
|
|
384
|
-
|
|
385
|
-
# Special handling for PostHog $set and $set_once
|
|
386
|
-
if source == 'posthog' and key in ['$set', '$set_once']:
|
|
387
|
-
if isinstance(value_node, ast.Dict):
|
|
388
|
-
nested_props = self.extract_nested_dict(value_node)
|
|
389
|
-
for nested_key, nested_value in nested_props.items():
|
|
390
|
-
properties[f"{key}.{nested_key}"] = nested_value
|
|
391
|
-
continue
|
|
392
|
-
|
|
393
|
-
# Skip PostHog internal properties
|
|
394
|
-
if source == 'posthog' and key == '$process_person_profile':
|
|
395
|
-
continue
|
|
396
|
-
|
|
397
|
-
# Handle different value types
|
|
398
|
-
if isinstance(value_node, ast.Constant):
|
|
399
|
-
value_type = self.get_value_type(value_node.value)
|
|
400
|
-
properties[key] = {"type": value_type}
|
|
401
|
-
elif isinstance(value_node, ast.Name):
|
|
402
|
-
# Check if we know the type of this variable
|
|
403
|
-
var_name = value_node.id
|
|
404
|
-
if var_name in self.var_types:
|
|
405
|
-
# Get the type for this variable
|
|
406
|
-
var_type = self.var_types[var_name]
|
|
407
|
-
|
|
408
|
-
# Handle structured types (arrays or objects)
|
|
409
|
-
if isinstance(var_type, dict):
|
|
410
|
-
properties[key] = var_type
|
|
411
|
-
else:
|
|
412
|
-
properties[key] = {"type": var_type}
|
|
413
|
-
else:
|
|
414
|
-
properties[key] = {"type": "any"}
|
|
415
|
-
elif isinstance(value_node, ast.Dict):
|
|
416
|
-
# Nested dictionary
|
|
417
|
-
nested_props = self.extract_nested_dict(value_node)
|
|
418
|
-
properties[key] = {
|
|
419
|
-
"type": "object",
|
|
420
|
-
"properties": nested_props
|
|
421
|
-
}
|
|
422
|
-
elif isinstance(value_node, ast.List) or isinstance(value_node, ast.Tuple):
|
|
423
|
-
# Array/list/tuple
|
|
424
|
-
item_type = self.infer_sequence_item_type(value_node)
|
|
425
|
-
properties[key] = {
|
|
426
|
-
"type": "array",
|
|
427
|
-
"items": item_type
|
|
428
|
-
}
|
|
429
|
-
except:
|
|
430
|
-
pass
|
|
431
|
-
return properties
|
|
432
|
-
|
|
433
|
-
def infer_sequence_item_type(self, seq_node):
|
|
434
|
-
"""Analyze a sequence (list or tuple) to determine the type of its items"""
|
|
435
|
-
if not hasattr(seq_node, 'elts') or not seq_node.elts:
|
|
436
|
-
return {"type": "any"}
|
|
437
|
-
|
|
438
|
-
# Get types of all elements
|
|
439
|
-
element_types = []
|
|
440
|
-
for element in seq_node.elts:
|
|
441
|
-
if isinstance(element, ast.Constant):
|
|
442
|
-
element_types.append(self.get_value_type(element.value))
|
|
443
|
-
elif isinstance(element, ast.Name):
|
|
444
|
-
if element.id in self.var_types:
|
|
445
|
-
element_types.append(self.var_types[element.id])
|
|
446
|
-
else:
|
|
447
|
-
element_types.append("any")
|
|
448
|
-
elif isinstance(element, ast.Dict):
|
|
449
|
-
element_types.append("object")
|
|
450
|
-
elif isinstance(element, ast.List) or isinstance(element, ast.Tuple):
|
|
451
|
-
element_types.append("array")
|
|
452
|
-
else:
|
|
453
|
-
element_types.append("any")
|
|
454
|
-
|
|
455
|
-
# Check if all elements are the same type
|
|
456
|
-
if len(set(element_types)) == 1:
|
|
457
|
-
return {"type": element_types[0]}
|
|
458
|
-
|
|
459
|
-
# Check if all types are either number or string (common mixed case)
|
|
460
|
-
if set(element_types) <= {"number", "string"}:
|
|
461
|
-
return {"type": "string"}
|
|
462
|
-
|
|
463
|
-
# Check if all types are either number or boolean
|
|
464
|
-
if set(element_types) <= {"number", "boolean"}:
|
|
465
|
-
return {"type": "number"}
|
|
466
|
-
|
|
467
|
-
# Otherwise, it's a mixed type array
|
|
468
|
-
return {"type": "any"}
|
|
469
|
-
|
|
470
|
-
def extract_nested_dict(self, dict_node):
|
|
471
|
-
nested_props = {}
|
|
472
|
-
for i, key_node in enumerate(dict_node.keys):
|
|
473
|
-
if isinstance(key_node, ast.Constant) and hasattr(key_node, 'value'):
|
|
474
|
-
key = key_node.value
|
|
475
|
-
value_node = dict_node.values[i]
|
|
476
|
-
|
|
477
|
-
if isinstance(value_node, ast.Constant):
|
|
478
|
-
value_type = self.get_value_type(value_node.value)
|
|
479
|
-
nested_props[key] = {"type": value_type}
|
|
480
|
-
elif isinstance(value_node, ast.Name):
|
|
481
|
-
# Check if we know the type of this variable
|
|
482
|
-
var_name = value_node.id
|
|
483
|
-
if var_name in self.var_types:
|
|
484
|
-
nested_props[key] = {"type": self.var_types[var_name]}
|
|
485
|
-
else:
|
|
486
|
-
nested_props[key] = {"type": "any"}
|
|
487
|
-
elif isinstance(value_node, ast.Dict):
|
|
488
|
-
sub_props = self.extract_nested_dict(value_node)
|
|
489
|
-
nested_props[key] = {
|
|
490
|
-
"type": "object",
|
|
491
|
-
"properties": sub_props
|
|
492
|
-
}
|
|
493
|
-
elif isinstance(value_node, ast.List) or isinstance(value_node, ast.Tuple):
|
|
494
|
-
# Array/list/tuple
|
|
495
|
-
item_type = self.infer_sequence_item_type(value_node)
|
|
496
|
-
nested_props[key] = {
|
|
497
|
-
"type": "array",
|
|
498
|
-
"items": item_type
|
|
499
|
-
}
|
|
500
|
-
return nested_props
|
|
501
|
-
|
|
502
|
-
def get_value_type(self, value):
|
|
503
|
-
if isinstance(value, bool):
|
|
504
|
-
return "boolean"
|
|
505
|
-
elif isinstance(value, str):
|
|
506
|
-
return "string"
|
|
507
|
-
elif isinstance(value, (int, float)):
|
|
508
|
-
return "number"
|
|
509
|
-
elif value is None:
|
|
510
|
-
return "null"
|
|
511
|
-
return "any"
|
|
512
|
-
|
|
513
|
-
def analyze_python_code(code, filepath, custom_function=None):
|
|
514
|
-
# Parse the Python code
|
|
515
|
-
tree = ast.parse(code)
|
|
516
|
-
visitor = TrackingVisitor(filepath, custom_function)
|
|
517
|
-
visitor.visit(tree)
|
|
518
|
-
|
|
519
|
-
# Return events as JSON
|
|
520
|
-
return json.dumps(visitor.events)
|
|
521
|
-
|
|
522
|
-
if __name__ == "__main__":
|
|
523
|
-
import sys
|
|
524
|
-
import argparse
|
|
525
|
-
|
|
526
|
-
parser = argparse.ArgumentParser(description='Analyze Python code for tracking calls')
|
|
527
|
-
parser.add_argument('file', help='Python file to analyze')
|
|
528
|
-
parser.add_argument('-c', '--custom-function', help='Name of custom tracking function')
|
|
529
|
-
args = parser.parse_args()
|
|
530
|
-
|
|
531
|
-
try:
|
|
532
|
-
with open(args.file, 'r') as f:
|
|
533
|
-
code = f.read()
|
|
534
|
-
result = analyze_python_code(code, args.file, args.custom_function)
|
|
535
|
-
print(result)
|
|
536
|
-
except FileNotFoundError:
|
|
537
|
-
print(f"Error: File '{args.file}' not found")
|
|
538
|
-
sys.exit(1)
|
|
539
|
-
except Exception as e:
|
|
540
|
-
print(f"Error analyzing file: {str(e)}")
|
|
541
|
-
sys.exit(1)
|