@flisk/analyze-tracking 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +163 -26
- package/bin/help.js +3 -3
- package/package.json +5 -4
- package/src/analyze/analyzePythonFile.js +41 -0
- package/src/analyze/analyzeRubyFile.js +123 -4
- package/src/analyze/index.js +5 -1
- package/src/analyze/pythonTrackingAnalyzer.py +439 -0
- package/src/index.js +2 -2
package/README.md
CHANGED
|
@@ -6,9 +6,9 @@ Automatically document your analytics setup by analyzing tracking code and gener
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
## Why Use @flisk/analyze-tracking?
|
|
9
|
-
📊 **Understand Your Tracking** – Effortlessly analyze your codebase for `track` calls so you can see all your analytics events, properties, and triggers in one place. No more guessing what
|
|
9
|
+
📊 **Understand Your Tracking** – Effortlessly analyze your codebase for `track` calls so you can see all your analytics events, properties, and triggers in one place. No more guessing what's being tracked!
|
|
10
10
|
|
|
11
|
-
🔍 **Auto-Document Events** – Generates a complete YAML schema that captures all events and properties, including where they
|
|
11
|
+
🔍 **Auto-Document Events** – Generates a complete YAML schema that captures all events and properties, including where they're implemented in your codebase.
|
|
12
12
|
|
|
13
13
|
🕵️♂️ **Track Changes Over Time** – Easily spot unintended changes or ensure your analytics setup remains consistent across updates.
|
|
14
14
|
|
|
@@ -26,7 +26,7 @@ npx @flisk/analyze-tracking /path/to/project [options]
|
|
|
26
26
|
### Key Options:
|
|
27
27
|
- `-g, --generateDescription`: Generate descriptions of fields (default: `false`)
|
|
28
28
|
- `-p, --provider <provider>`: Specify a provider (options: `openai`, `gemini`)
|
|
29
|
-
- `-m, --model <model>`: Specify a model (ex: `gpt-4o-mini`, `gemini-2.0-flash-lite-001`)
|
|
29
|
+
- `-m, --model <model>`: Specify a model (ex: `gpt-4.1-nano`, `gpt-4o-mini`, `gemini-2.0-flash-lite-001`)
|
|
30
30
|
- `-o, --output <output_file>`: Name of the output file (default: `tracking-schema.yaml`)
|
|
31
31
|
- `-c, --customFunction <function_name>`: Specify a custom tracking function
|
|
32
32
|
|
|
@@ -46,9 +46,9 @@ npx @flisk/analyze-tracking /path/to/project [options]
|
|
|
46
46
|
</details>
|
|
47
47
|
|
|
48
48
|
|
|
49
|
-
## What
|
|
49
|
+
## What's Generated?
|
|
50
50
|
A clear YAML schema that shows where your events are tracked, their properties, and more.
|
|
51
|
-
Here
|
|
51
|
+
Here's an example:
|
|
52
52
|
|
|
53
53
|
```yaml
|
|
54
54
|
version: 1
|
|
@@ -71,18 +71,36 @@ events:
|
|
|
71
71
|
type: <property_type>
|
|
72
72
|
```
|
|
73
73
|
|
|
74
|
-
Use this to understand where your events live in the code and how they
|
|
74
|
+
Use this to understand where your events live in the code and how they're being tracked.
|
|
75
75
|
|
|
76
76
|
Your LLM of choice is used for generating descriptions of events, properties, and implementations.
|
|
77
77
|
|
|
78
78
|
See [schema.json](schema.json) for a JSON Schema of the output.
|
|
79
79
|
|
|
80
80
|
|
|
81
|
-
## Supported tracking libraries
|
|
81
|
+
## Supported tracking libraries & languages
|
|
82
|
+
|
|
83
|
+
| Library | JavaScript/TypeScript | Python | Ruby |
|
|
84
|
+
|---------|:---------------------:|:------:|:----:|
|
|
85
|
+
| Google Analytics | ✅ | ❌ | ❌ |
|
|
86
|
+
| Segment | ✅ | ✅ | ✅ |
|
|
87
|
+
| Mixpanel | ✅ | ✅ | ✅ |
|
|
88
|
+
| Amplitude | ✅ | ✅ | ❌ |
|
|
89
|
+
| Rudderstack | ✅ | ✅ | ❌ |
|
|
90
|
+
| mParticle | ✅ | ✅ | ❌ |
|
|
91
|
+
| PostHog | ✅ | ✅ | ✅ |
|
|
92
|
+
| Pendo | ✅ | ✅ | ❌ |
|
|
93
|
+
| Heap | ✅ | ✅ | ❌ |
|
|
94
|
+
| Snowplow | ✅ | ✅ | ✅ |
|
|
95
|
+
| Custom Function | ✅ | ✅ | ✅ |
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
## SDKs for supported libraries
|
|
82
99
|
|
|
83
100
|
<details>
|
|
84
101
|
<summary>Google Analytics</summary>
|
|
85
102
|
|
|
103
|
+
**JavaScript/TypeScript**
|
|
86
104
|
```js
|
|
87
105
|
gtag('event', '<event_name>', {
|
|
88
106
|
<event_parameters>
|
|
@@ -93,126 +111,245 @@ See [schema.json](schema.json) for a JSON Schema of the output.
|
|
|
93
111
|
<details>
|
|
94
112
|
<summary>Segment</summary>
|
|
95
113
|
|
|
114
|
+
**JavaScript/TypeScript**
|
|
96
115
|
```js
|
|
97
116
|
analytics.track('<event_name>', {
|
|
98
117
|
<event_parameters>
|
|
99
118
|
});
|
|
100
119
|
```
|
|
120
|
+
|
|
121
|
+
**Python**
|
|
122
|
+
```python
|
|
123
|
+
analytics.track('<event_name>', {
|
|
124
|
+
'<property_name>': '<property_value>'
|
|
125
|
+
})
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
**Ruby**
|
|
129
|
+
```ruby
|
|
130
|
+
Analytics.track(
|
|
131
|
+
event: '<event_name>',
|
|
132
|
+
properties: {
|
|
133
|
+
<event_parameters>
|
|
134
|
+
}
|
|
135
|
+
)
|
|
136
|
+
```
|
|
101
137
|
</details>
|
|
102
138
|
|
|
103
139
|
<details>
|
|
104
140
|
<summary>Mixpanel</summary>
|
|
105
141
|
|
|
142
|
+
**JavaScript/TypeScript**
|
|
106
143
|
```js
|
|
107
144
|
mixpanel.track('<event_name>', {
|
|
108
145
|
<event_parameters>
|
|
109
146
|
});
|
|
110
147
|
```
|
|
148
|
+
|
|
149
|
+
**Python**
|
|
150
|
+
```python
|
|
151
|
+
mixpanel.track('<event_name>', {
|
|
152
|
+
'<property_name>': '<property_value>'
|
|
153
|
+
})
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
**Ruby**
|
|
157
|
+
```ruby
|
|
158
|
+
tracker.track('distinct_id', '<event_name>', {
|
|
159
|
+
'<property_name>': '<property_value>'
|
|
160
|
+
})
|
|
161
|
+
```
|
|
111
162
|
</details>
|
|
112
163
|
|
|
113
164
|
<details>
|
|
114
165
|
<summary>Amplitude</summary>
|
|
115
166
|
|
|
167
|
+
**JavaScript/TypeScript**
|
|
116
168
|
```js
|
|
117
169
|
amplitude.logEvent('<event_name>', {
|
|
118
170
|
<event_parameters>
|
|
119
171
|
});
|
|
120
172
|
```
|
|
173
|
+
|
|
174
|
+
**Python**
|
|
175
|
+
```python
|
|
176
|
+
amplitude.track('<event_name>', {
|
|
177
|
+
'<property_name>': '<property_value>'
|
|
178
|
+
})
|
|
179
|
+
```
|
|
121
180
|
</details>
|
|
122
181
|
|
|
123
182
|
<details>
|
|
124
183
|
<summary>Rudderstack</summary>
|
|
125
184
|
|
|
185
|
+
**JavaScript/TypeScript**
|
|
126
186
|
```js
|
|
127
187
|
rudderanalytics.track('<event_name>', {
|
|
128
188
|
<event_parameters>
|
|
129
189
|
});
|
|
130
190
|
```
|
|
191
|
+
|
|
192
|
+
**Python**
|
|
193
|
+
```python
|
|
194
|
+
rudderanalytics.track('<event_name>', {
|
|
195
|
+
'<property_name>': '<property_value>'
|
|
196
|
+
})
|
|
197
|
+
```
|
|
131
198
|
</details>
|
|
132
199
|
|
|
133
200
|
<details>
|
|
134
201
|
<summary>mParticle</summary>
|
|
135
202
|
|
|
203
|
+
**JavaScript/TypeScript**
|
|
136
204
|
```js
|
|
137
205
|
mParticle.logEvent('<event_name>', {
|
|
138
206
|
<event_parameters>
|
|
139
207
|
});
|
|
140
208
|
```
|
|
209
|
+
|
|
210
|
+
**Python**
|
|
211
|
+
```python
|
|
212
|
+
mParticle.logEvent('<event_name>', {
|
|
213
|
+
'<property_name>': '<property_value>'
|
|
214
|
+
})
|
|
215
|
+
```
|
|
141
216
|
</details>
|
|
142
217
|
|
|
143
218
|
<details>
|
|
144
219
|
<summary>PostHog</summary>
|
|
145
220
|
|
|
221
|
+
**JavaScript/TypeScript**
|
|
146
222
|
```js
|
|
147
223
|
posthog.capture('<event_name>', {
|
|
148
224
|
<event_parameters>
|
|
149
225
|
});
|
|
150
226
|
```
|
|
227
|
+
|
|
228
|
+
**Python**
|
|
229
|
+
```python
|
|
230
|
+
posthog.capture(
|
|
231
|
+
'distinct_id',
|
|
232
|
+
'<event_name>',
|
|
233
|
+
{
|
|
234
|
+
'<property_name>': '<property_value>'
|
|
235
|
+
}
|
|
236
|
+
)
|
|
237
|
+
# Or
|
|
238
|
+
posthog.capture(
|
|
239
|
+
'distinct_id',
|
|
240
|
+
event='<event_name>',
|
|
241
|
+
properties={
|
|
242
|
+
'<property_name>': '<property_value>'
|
|
243
|
+
}
|
|
244
|
+
)
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
**Ruby**
|
|
248
|
+
```ruby
|
|
249
|
+
posthog.capture({
|
|
250
|
+
distinct_id: '<distinct_id>',
|
|
251
|
+
event: '<event_name>',
|
|
252
|
+
properties: {
|
|
253
|
+
'<property_name>': '<property_value>'
|
|
254
|
+
}
|
|
255
|
+
})
|
|
256
|
+
```
|
|
151
257
|
</details>
|
|
152
258
|
|
|
153
259
|
<details>
|
|
154
260
|
<summary>Pendo</summary>
|
|
155
261
|
|
|
262
|
+
**JavaScript/TypeScript**
|
|
156
263
|
```js
|
|
157
264
|
pendo.track('<event_name>', {
|
|
158
265
|
<event_parameters>
|
|
159
266
|
});
|
|
160
267
|
```
|
|
268
|
+
|
|
269
|
+
**Python**
|
|
270
|
+
```python
|
|
271
|
+
pendo.track('<event_name>', {
|
|
272
|
+
'<property_name>': '<property_value>'
|
|
273
|
+
})
|
|
274
|
+
```
|
|
161
275
|
</details>
|
|
162
276
|
|
|
163
277
|
<details>
|
|
164
278
|
<summary>Heap</summary>
|
|
165
279
|
|
|
280
|
+
**JavaScript/TypeScript**
|
|
166
281
|
```js
|
|
167
282
|
heap.track('<event_name>', {
|
|
168
283
|
<event_parameters>
|
|
169
284
|
});
|
|
170
285
|
```
|
|
286
|
+
|
|
287
|
+
**Python**
|
|
288
|
+
```python
|
|
289
|
+
heap.track('<event_name>', {
|
|
290
|
+
'<property_name>': '<property_value>'
|
|
291
|
+
})
|
|
292
|
+
```
|
|
171
293
|
</details>
|
|
172
294
|
|
|
173
295
|
<details>
|
|
174
296
|
<summary>Snowplow (struct events)</summary>
|
|
175
297
|
|
|
298
|
+
**JavaScript/TypeScript**
|
|
176
299
|
```js
|
|
177
300
|
snowplow('trackStructEvent', {
|
|
178
301
|
category: '<category>',
|
|
179
302
|
action: '<action>',
|
|
180
303
|
label: '<label>',
|
|
181
304
|
property: '<property>',
|
|
182
|
-
value: '<value>
|
|
305
|
+
value: '<value>'
|
|
183
306
|
});
|
|
184
307
|
```
|
|
185
308
|
|
|
186
|
-
|
|
309
|
+
**Python**
|
|
310
|
+
```python
|
|
311
|
+
# Direct tracking
|
|
187
312
|
trackStructEvent({
|
|
188
|
-
category: '<category>',
|
|
189
|
-
action: '<action>',
|
|
190
|
-
label: '<label>',
|
|
191
|
-
property: '<property>',
|
|
192
|
-
value: '<value>'
|
|
193
|
-
})
|
|
313
|
+
'category': '<category>',
|
|
314
|
+
'action': '<action>',
|
|
315
|
+
'label': '<label>',
|
|
316
|
+
'property': '<property>',
|
|
317
|
+
'value': '<value>'
|
|
318
|
+
})
|
|
319
|
+
|
|
320
|
+
# Builder pattern
|
|
321
|
+
buildStructEvent({
|
|
322
|
+
'category': '<category>',
|
|
323
|
+
'action': '<action>',
|
|
324
|
+
'label': '<label>',
|
|
325
|
+
'property': '<property>',
|
|
326
|
+
'value': '<value>'
|
|
327
|
+
})
|
|
328
|
+
|
|
329
|
+
# Function call pattern
|
|
330
|
+
snowplow('trackStructEvent', {
|
|
331
|
+
'category': '<category>',
|
|
332
|
+
'action': '<action>',
|
|
333
|
+
'label': '<label>',
|
|
334
|
+
'property': '<property>',
|
|
335
|
+
'value': '<value>'
|
|
336
|
+
})
|
|
194
337
|
```
|
|
195
338
|
|
|
196
|
-
|
|
197
|
-
|
|
339
|
+
**Ruby**
|
|
340
|
+
```ruby
|
|
341
|
+
tracker.track_struct_event(
|
|
198
342
|
category: '<category>',
|
|
199
343
|
action: '<action>',
|
|
200
344
|
label: '<label>',
|
|
201
345
|
property: '<property>',
|
|
202
346
|
value: '<value>'
|
|
203
|
-
|
|
347
|
+
)
|
|
204
348
|
```
|
|
205
349
|
|
|
206
350
|
_Note: Snowplow Self Describing Events are coming soon!_
|
|
207
351
|
</details>
|
|
208
352
|
|
|
209
353
|
|
|
210
|
-
## Supported languages
|
|
211
|
-
|
|
212
|
-
- JavaScript
|
|
213
|
-
- TypeScript
|
|
214
|
-
- Ruby (Experimental - only supports Segment for now)
|
|
215
|
-
|
|
216
|
-
|
|
217
354
|
## Contribute
|
|
218
|
-
We
|
|
355
|
+
We're actively improving this package. Found a bug? Want to request a feature? Open an issue or contribute directly!
|
package/bin/help.js
CHANGED
|
@@ -57,10 +57,10 @@ const helpContent = [
|
|
|
57
57
|
{
|
|
58
58
|
name: 'model',
|
|
59
59
|
alias: 'm',
|
|
60
|
-
description: 'Specify a model (ex: {italic gpt-4o-mini}, {italic gemini-2.0-flash-lite-001})',
|
|
60
|
+
description: 'Specify a model (ex: {italic gpt-4.1-nano, gpt-4o-mini}, {italic gemini-2.0-flash-lite-001})',
|
|
61
61
|
type: String,
|
|
62
|
-
defaultValue: 'gpt-
|
|
63
|
-
typeLabel: '{underline gpt-
|
|
62
|
+
defaultValue: 'gpt-4.1-nano',
|
|
63
|
+
typeLabel: '{underline gpt-4.1-nano}'
|
|
64
64
|
},
|
|
65
65
|
{
|
|
66
66
|
name: 'output',
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@flisk/analyze-tracking",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "Analyzes tracking code in a project and generates data schemas",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -20,9 +20,9 @@
|
|
|
20
20
|
},
|
|
21
21
|
"homepage": "https://github.com/fliskdata/analyze-tracking#readme",
|
|
22
22
|
"dependencies": {
|
|
23
|
-
"@langchain/core": "^0.3.
|
|
24
|
-
"@langchain/google-vertexai": "^0.2.
|
|
25
|
-
"@langchain/openai": "^0.5.
|
|
23
|
+
"@langchain/core": "^0.3.56",
|
|
24
|
+
"@langchain/google-vertexai": "^0.2.9",
|
|
25
|
+
"@langchain/openai": "^0.5.10",
|
|
26
26
|
"@ruby/prism": "^1.4.0",
|
|
27
27
|
"@typescript-eslint/parser": "^8.1.0",
|
|
28
28
|
"acorn": "^8.12.1",
|
|
@@ -33,6 +33,7 @@
|
|
|
33
33
|
"command-line-usage": "^7.0.3",
|
|
34
34
|
"isomorphic-git": "^1.27.1",
|
|
35
35
|
"js-yaml": "^4.1.0",
|
|
36
|
+
"pyodide": "^0.27.6",
|
|
36
37
|
"typescript": "^5.5.4",
|
|
37
38
|
"zod": "^3.24.4"
|
|
38
39
|
},
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
|
|
4
|
+
let pyodide = null;
|
|
5
|
+
|
|
6
|
+
async function initPyodide() {
|
|
7
|
+
if (!pyodide) {
|
|
8
|
+
const { loadPyodide } = await import('pyodide');
|
|
9
|
+
pyodide = await loadPyodide();
|
|
10
|
+
await pyodide.loadPackagesFromImports('import ast, json');
|
|
11
|
+
}
|
|
12
|
+
return pyodide;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
async function analyzePythonFile(filePath, customFunction) {
|
|
16
|
+
try {
|
|
17
|
+
const code = fs.readFileSync(filePath, 'utf8');
|
|
18
|
+
const py = await initPyodide();
|
|
19
|
+
|
|
20
|
+
// Read the Python analyzer code
|
|
21
|
+
const analyzerPath = path.join(__dirname, 'pythonTrackingAnalyzer.py');
|
|
22
|
+
const analyzerCode = fs.readFileSync(analyzerPath, 'utf8');
|
|
23
|
+
|
|
24
|
+
// Add file content and analyzer code to Python environment
|
|
25
|
+
py.globals.set('code', code);
|
|
26
|
+
py.globals.set('filepath', filePath);
|
|
27
|
+
py.globals.set('custom_function', customFunction || null);
|
|
28
|
+
|
|
29
|
+
// Run the Python analyzer
|
|
30
|
+
py.runPython(analyzerCode);
|
|
31
|
+
const result = py.runPython('analyze_python_code(code, filepath, custom_function)');
|
|
32
|
+
const events = JSON.parse(result);
|
|
33
|
+
|
|
34
|
+
return events;
|
|
35
|
+
} catch (error) {
|
|
36
|
+
console.error(`Error analyzing Python file ${filePath}:`, error);
|
|
37
|
+
return [];
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
module.exports = { analyzePythonFile };
|
|
@@ -4,12 +4,13 @@ let parse = null;
|
|
|
4
4
|
|
|
5
5
|
// Create a visitor to traverse the AST
|
|
6
6
|
class TrackingVisitor {
|
|
7
|
-
constructor(code, filePath) {
|
|
7
|
+
constructor(code, filePath, customFunction=null) {
|
|
8
8
|
this.code = code;
|
|
9
9
|
this.lines = code.split('\n');
|
|
10
10
|
this.ancestors = [];
|
|
11
11
|
this.events = [];
|
|
12
12
|
this.filePath = filePath;
|
|
13
|
+
this.customFunction = customFunction;
|
|
13
14
|
}
|
|
14
15
|
|
|
15
16
|
getLineNumber(location) {
|
|
@@ -40,13 +41,27 @@ class TrackingVisitor {
|
|
|
40
41
|
detectSource(node) {
|
|
41
42
|
if (!node) return null;
|
|
42
43
|
|
|
43
|
-
// Check for
|
|
44
|
+
// Check for analytics libraries
|
|
44
45
|
if (node.receiver) {
|
|
45
46
|
const objectName = node.receiver.name;
|
|
46
47
|
const methodName = node.name;
|
|
47
48
|
|
|
49
|
+
// Segment
|
|
48
50
|
if (objectName === 'Analytics' && methodName === 'track') return 'segment';
|
|
51
|
+
|
|
52
|
+
// Mixpanel (Ruby SDK uses Mixpanel::Tracker instance)
|
|
53
|
+
if (methodName === 'track' && node.receiver.type === 'CallNode' &&
|
|
54
|
+
node.receiver.name === 'tracker') return 'mixpanel';
|
|
55
|
+
|
|
56
|
+
// PostHog
|
|
57
|
+
if (objectName === 'posthog' && methodName === 'capture') return 'posthog';
|
|
49
58
|
}
|
|
59
|
+
|
|
60
|
+
// Snowplow (typically tracker.track_struct_event)
|
|
61
|
+
if (node.name === 'track_struct_event') return 'snowplow';
|
|
62
|
+
|
|
63
|
+
// Custom tracking function
|
|
64
|
+
if (this.customFunction && node.name === this.customFunction) return 'custom';
|
|
50
65
|
|
|
51
66
|
return null;
|
|
52
67
|
}
|
|
@@ -58,6 +73,38 @@ class TrackingVisitor {
|
|
|
58
73
|
return eventProperty?.value?.unescaped?.value || null;
|
|
59
74
|
}
|
|
60
75
|
|
|
76
|
+
if (source === 'mixpanel') {
|
|
77
|
+
// Mixpanel Ruby SDK format: tracker.track('distinct_id', 'event_name', {...})
|
|
78
|
+
const args = node.arguments_.arguments_;
|
|
79
|
+
if (args && args.length > 1 && args[1]?.unescaped?.value) {
|
|
80
|
+
return args[1].unescaped.value;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (source === 'posthog') {
|
|
85
|
+
// PostHog Ruby SDK format: posthog.capture({distinct_id: '...', event: '...', properties: {...}})
|
|
86
|
+
const hashArg = node.arguments_.arguments_[0];
|
|
87
|
+
if (hashArg && hashArg.elements) {
|
|
88
|
+
const eventProperty = hashArg.elements.find(elem => elem?.key?.unescaped?.value === 'event');
|
|
89
|
+
return eventProperty?.value?.unescaped?.value || null;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (source === 'snowplow') {
|
|
94
|
+
// Snowplow Ruby SDK: tracker.track_struct_event(category: '...', action: '...', ...)
|
|
95
|
+
const params = node.arguments_.arguments_[0].elements;
|
|
96
|
+
const actionProperty = params.find(param => param?.key?.unescaped?.value === 'action');
|
|
97
|
+
return actionProperty?.value?.unescaped?.value || null;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if (source === 'custom') {
|
|
101
|
+
// Custom function format: customFunction('event_name', {...})
|
|
102
|
+
const args = node.arguments_.arguments_;
|
|
103
|
+
if (args && args.length > 0 && args[0]?.unescaped?.value) {
|
|
104
|
+
return args[0].unescaped.value;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
61
108
|
return null;
|
|
62
109
|
}
|
|
63
110
|
|
|
@@ -106,6 +153,78 @@ class TrackingVisitor {
|
|
|
106
153
|
return properties;
|
|
107
154
|
}
|
|
108
155
|
|
|
156
|
+
if (source === 'mixpanel') {
|
|
157
|
+
// Mixpanel Ruby SDK: tracker.track('distinct_id', 'event_name', {properties})
|
|
158
|
+
const args = node.arguments_.arguments_;
|
|
159
|
+
const properties = {};
|
|
160
|
+
|
|
161
|
+
// Add distinct_id as property
|
|
162
|
+
if (args && args.length > 0 && args[0]?.unescaped?.value) {
|
|
163
|
+
properties.distinct_id = {
|
|
164
|
+
type: 'string'
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Extract properties from third argument if it exists
|
|
169
|
+
if (args && args.length > 2 && args[2] instanceof HashNode) {
|
|
170
|
+
const propsHash = await this.extractHashProperties(args[2]);
|
|
171
|
+
Object.assign(properties, propsHash);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return properties;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (source === 'posthog') {
|
|
178
|
+
// PostHog Ruby SDK: posthog.capture({distinct_id: '...', event: '...', properties: {...}})
|
|
179
|
+
const hashArg = node.arguments_.arguments_[0];
|
|
180
|
+
const properties = {};
|
|
181
|
+
|
|
182
|
+
if (hashArg && hashArg.elements) {
|
|
183
|
+
// Extract distinct_id if present
|
|
184
|
+
const distinctIdProperty = hashArg.elements.find(elem => elem?.key?.unescaped?.value === 'distinct_id');
|
|
185
|
+
if (distinctIdProperty?.value) {
|
|
186
|
+
properties.distinct_id = {
|
|
187
|
+
type: await this.getValueType(distinctIdProperty.value)
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Extract properties
|
|
192
|
+
const propsProperty = hashArg.elements.find(elem => elem?.key?.unescaped?.value === 'properties');
|
|
193
|
+
if (propsProperty?.value instanceof HashNode) {
|
|
194
|
+
const props = await this.extractHashProperties(propsProperty.value);
|
|
195
|
+
Object.assign(properties, props);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
return properties;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if (source === 'snowplow') {
|
|
203
|
+
// Snowplow Ruby SDK: tracker.track_struct_event(category: '...', action: '...', ...)
|
|
204
|
+
const params = node.arguments_.arguments_[0].elements;
|
|
205
|
+
const properties = {};
|
|
206
|
+
|
|
207
|
+
// Extract all struct event parameters except 'action' (which is used as the event name)
|
|
208
|
+
for (const param of params) {
|
|
209
|
+
const key = param?.key?.unescaped?.value;
|
|
210
|
+
if (key && key !== 'action') {
|
|
211
|
+
properties[key] = {
|
|
212
|
+
type: await this.getValueType(param.value)
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
return properties;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
if (source === 'custom') {
|
|
221
|
+
// Custom function format: customFunction('event_name', {properties})
|
|
222
|
+
const args = node.arguments_.arguments_;
|
|
223
|
+
if (args && args.length > 1 && args[1] instanceof HashNode) {
|
|
224
|
+
return await this.extractHashProperties(args[1]);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
109
228
|
return null;
|
|
110
229
|
}
|
|
111
230
|
|
|
@@ -257,7 +376,7 @@ class TrackingVisitor {
|
|
|
257
376
|
}
|
|
258
377
|
}
|
|
259
378
|
|
|
260
|
-
async function analyzeRubyFile(filePath) {
|
|
379
|
+
async function analyzeRubyFile(filePath, customFunction) {
|
|
261
380
|
// Lazy load the ruby prism parser
|
|
262
381
|
if (!parse) {
|
|
263
382
|
const { loadPrism } = await import('@ruby/prism');
|
|
@@ -275,7 +394,7 @@ async function analyzeRubyFile(filePath) {
|
|
|
275
394
|
}
|
|
276
395
|
|
|
277
396
|
// Traverse the AST starting from the program node
|
|
278
|
-
const visitor = new TrackingVisitor(code, filePath);
|
|
397
|
+
const visitor = new TrackingVisitor(code, filePath, customFunction);
|
|
279
398
|
await visitor.visit(ast.value);
|
|
280
399
|
|
|
281
400
|
return visitor.events;
|
package/src/analyze/index.js
CHANGED
|
@@ -3,6 +3,7 @@ const ts = require('typescript');
|
|
|
3
3
|
const { getAllFiles } = require('../fileProcessor');
|
|
4
4
|
const { analyzeJsFile } = require('./analyzeJsFile');
|
|
5
5
|
const { analyzeTsFile } = require('./analyzeTsFile');
|
|
6
|
+
const { analyzePythonFile } = require('./analyzePythonFile');
|
|
6
7
|
const { analyzeRubyFile } = require('./analyzeRubyFile');
|
|
7
8
|
|
|
8
9
|
async function analyzeDirectory(dirPath, customFunction) {
|
|
@@ -20,14 +21,17 @@ async function analyzeDirectory(dirPath, customFunction) {
|
|
|
20
21
|
|
|
21
22
|
const isJsFile = /\.(jsx?)$/.test(file);
|
|
22
23
|
const isTsFile = /\.(tsx?)$/.test(file);
|
|
24
|
+
const isPythonFile = /\.(py)$/.test(file);
|
|
23
25
|
const isRubyFile = /\.(rb)$/.test(file);
|
|
24
26
|
|
|
25
27
|
if (isJsFile) {
|
|
26
28
|
events = analyzeJsFile(file, customFunction);
|
|
27
29
|
} else if (isTsFile) {
|
|
28
30
|
events = analyzeTsFile(file, tsProgram, customFunction);
|
|
31
|
+
} else if (isPythonFile) {
|
|
32
|
+
events = await analyzePythonFile(file, customFunction);
|
|
29
33
|
} else if (isRubyFile) {
|
|
30
|
-
events = await analyzeRubyFile(file);
|
|
34
|
+
events = await analyzeRubyFile(file, customFunction);
|
|
31
35
|
} else {
|
|
32
36
|
console.info(`Skipping file ${file} because it is not a supported file type`);
|
|
33
37
|
continue;
|
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
class TrackingVisitor(ast.NodeVisitor):
|
|
5
|
+
def __init__(self, filepath, custom_function=None):
|
|
6
|
+
self.events = []
|
|
7
|
+
self.filepath = filepath
|
|
8
|
+
self.current_function = 'global'
|
|
9
|
+
self.function_stack = []
|
|
10
|
+
# Track variable types in the current scope
|
|
11
|
+
self.var_types = {}
|
|
12
|
+
# Stack of variable type scopes
|
|
13
|
+
self.var_types_stack = []
|
|
14
|
+
# Custom tracking function name
|
|
15
|
+
self.custom_function = custom_function
|
|
16
|
+
|
|
17
|
+
def visit_FunctionDef(self, node):
|
|
18
|
+
# Save previous function context and variable types
|
|
19
|
+
self.function_stack.append(self.current_function)
|
|
20
|
+
self.var_types_stack.append(self.var_types)
|
|
21
|
+
|
|
22
|
+
# Create new scope for variable types
|
|
23
|
+
self.var_types = {}
|
|
24
|
+
self.current_function = node.name
|
|
25
|
+
|
|
26
|
+
# Extract parameter type annotations
|
|
27
|
+
for arg in node.args.args:
|
|
28
|
+
if arg.annotation:
|
|
29
|
+
# Store the type annotation for this parameter
|
|
30
|
+
self.var_types[arg.arg] = self.extract_type_annotation(arg.annotation)
|
|
31
|
+
|
|
32
|
+
# Visit children
|
|
33
|
+
self.generic_visit(node)
|
|
34
|
+
|
|
35
|
+
# Restore function context and variable types
|
|
36
|
+
self.current_function = self.function_stack.pop()
|
|
37
|
+
self.var_types = self.var_types_stack.pop()
|
|
38
|
+
|
|
39
|
+
def extract_type_annotation(self, annotation):
|
|
40
|
+
"""Extract type information from a type annotation node"""
|
|
41
|
+
if isinstance(annotation, ast.Name):
|
|
42
|
+
# Simple types like int, str, bool
|
|
43
|
+
type_name = annotation.id
|
|
44
|
+
if type_name == 'int' or type_name == 'float':
|
|
45
|
+
return 'number'
|
|
46
|
+
elif type_name == 'str':
|
|
47
|
+
return 'string'
|
|
48
|
+
elif type_name == 'bool':
|
|
49
|
+
return 'boolean'
|
|
50
|
+
elif type_name == 'None' or type_name == 'NoneType':
|
|
51
|
+
return 'null'
|
|
52
|
+
# Could add more type mappings here
|
|
53
|
+
elif isinstance(annotation, ast.Subscript):
|
|
54
|
+
# Handle generic types like List[int], Dict[str, int]
|
|
55
|
+
if hasattr(annotation.value, 'id'):
|
|
56
|
+
container_type = annotation.value.id
|
|
57
|
+
if container_type in ('List', 'Tuple', 'Set', 'list', 'tuple', 'set'):
|
|
58
|
+
# Try to get the type parameter
|
|
59
|
+
if isinstance(annotation.slice, ast.Name):
|
|
60
|
+
element_type = self.extract_type_annotation(annotation.slice)
|
|
61
|
+
return {
|
|
62
|
+
'type': 'array',
|
|
63
|
+
'items': {'type': element_type}
|
|
64
|
+
}
|
|
65
|
+
return 'array'
|
|
66
|
+
elif container_type in ('Dict', 'dict'):
|
|
67
|
+
return 'object'
|
|
68
|
+
# Default for unknown or complex types
|
|
69
|
+
return 'any'
|
|
70
|
+
|
|
71
|
+
def visit_AnnAssign(self, node):
|
|
72
|
+
"""Visit variable assignments with type annotations"""
|
|
73
|
+
if isinstance(node.target, ast.Name) and node.annotation:
|
|
74
|
+
# Store the type annotation for this variable
|
|
75
|
+
self.var_types[node.target.id] = self.extract_type_annotation(node.annotation)
|
|
76
|
+
self.generic_visit(node)
|
|
77
|
+
|
|
78
|
+
def visit_Assign(self, node):
|
|
79
|
+
"""Visit regular assignments to track simple type inferences"""
|
|
80
|
+
if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
|
|
81
|
+
var_name = node.targets[0].id
|
|
82
|
+
# Try to infer type from literal values
|
|
83
|
+
if isinstance(node.value, ast.Constant):
|
|
84
|
+
self.var_types[var_name] = self.get_value_type(node.value.value)
|
|
85
|
+
self.generic_visit(node)
|
|
86
|
+
|
|
87
|
+
def visit_ClassDef(self, node):
|
|
88
|
+
# Track class context for methods
|
|
89
|
+
class_name = node.name
|
|
90
|
+
self.function_stack.append(self.current_function)
|
|
91
|
+
self.var_types_stack.append(self.var_types)
|
|
92
|
+
|
|
93
|
+
# Create new scope for the class
|
|
94
|
+
self.var_types = {}
|
|
95
|
+
self.current_function = class_name
|
|
96
|
+
|
|
97
|
+
self.generic_visit(node)
|
|
98
|
+
|
|
99
|
+
# Restore context
|
|
100
|
+
self.current_function = self.function_stack.pop()
|
|
101
|
+
self.var_types = self.var_types_stack.pop()
|
|
102
|
+
|
|
103
|
+
def visit_Call(self, node):
|
|
104
|
+
# Check for analytics tracking calls
|
|
105
|
+
source = self.detect_source(node)
|
|
106
|
+
if source:
|
|
107
|
+
event_name = self.extract_event_name(node, source)
|
|
108
|
+
if event_name:
|
|
109
|
+
properties = self.extract_properties(node, source)
|
|
110
|
+
self.events.append({
|
|
111
|
+
"eventName": event_name,
|
|
112
|
+
"source": source,
|
|
113
|
+
"properties": properties,
|
|
114
|
+
"filePath": self.filepath,
|
|
115
|
+
"line": node.lineno,
|
|
116
|
+
"functionName": self.current_function
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
# Continue visiting child nodes
|
|
120
|
+
self.generic_visit(node)
|
|
121
|
+
|
|
122
|
+
def detect_source(self, node):
|
|
123
|
+
# Check for analytics tracking libraries
|
|
124
|
+
if isinstance(node.func, ast.Attribute):
|
|
125
|
+
if hasattr(node.func.value, 'id'):
|
|
126
|
+
obj_id = node.func.value.id
|
|
127
|
+
method_name = node.func.attr
|
|
128
|
+
|
|
129
|
+
# Segment analytics
|
|
130
|
+
if obj_id == 'analytics' and method_name == 'track':
|
|
131
|
+
return 'segment'
|
|
132
|
+
# Mixpanel
|
|
133
|
+
if obj_id == 'mixpanel' and method_name == 'track':
|
|
134
|
+
return 'mixpanel'
|
|
135
|
+
# Amplitude
|
|
136
|
+
if obj_id == 'amplitude' and method_name == 'track':
|
|
137
|
+
return 'amplitude'
|
|
138
|
+
# Rudderstack
|
|
139
|
+
if obj_id == 'rudderanalytics' and method_name == 'track':
|
|
140
|
+
return 'rudderstack'
|
|
141
|
+
# mParticle
|
|
142
|
+
if obj_id == 'mParticle' and method_name == 'logEvent':
|
|
143
|
+
return 'mparticle'
|
|
144
|
+
# PostHog
|
|
145
|
+
if obj_id == 'posthog' and method_name == 'capture':
|
|
146
|
+
return 'posthog'
|
|
147
|
+
# Pendo
|
|
148
|
+
if obj_id == 'pendo' and method_name == 'track':
|
|
149
|
+
return 'pendo'
|
|
150
|
+
# Heap
|
|
151
|
+
if obj_id == 'heap' and method_name == 'track':
|
|
152
|
+
return 'heap'
|
|
153
|
+
|
|
154
|
+
# Check for Snowplow struct event patterns
|
|
155
|
+
if isinstance(node.func, ast.Name) and node.func.id in ['trackStructEvent', 'buildStructEvent']:
|
|
156
|
+
return 'snowplow'
|
|
157
|
+
|
|
158
|
+
# Check for Snowplow's snowplow('trackStructEvent', {...}) pattern
|
|
159
|
+
if isinstance(node.func, ast.Name) and node.func.id == 'snowplow':
|
|
160
|
+
if len(node.args) >= 1 and isinstance(node.args[0], ast.Constant):
|
|
161
|
+
if node.args[0].value == 'trackStructEvent':
|
|
162
|
+
return 'snowplow'
|
|
163
|
+
|
|
164
|
+
# Check for custom tracking function
|
|
165
|
+
if self.custom_function and isinstance(node.func, ast.Name) and node.func.id == self.custom_function:
|
|
166
|
+
return 'custom'
|
|
167
|
+
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
def extract_event_name(self, node, source):
|
|
171
|
+
try:
|
|
172
|
+
if source in ['segment', 'mixpanel', 'amplitude', 'rudderstack', 'pendo', 'heap', 'custom']:
|
|
173
|
+
# Standard format: library.track('event_name', {...})
|
|
174
|
+
# Custom function follows same format: customFunction('event_name', {...})
|
|
175
|
+
if len(node.args) >= 1 and isinstance(node.args[0], ast.Constant):
|
|
176
|
+
return node.args[0].value
|
|
177
|
+
|
|
178
|
+
elif source == 'mparticle':
|
|
179
|
+
# mParticle: mParticle.logEvent('event_name', {...})
|
|
180
|
+
if len(node.args) >= 1 and isinstance(node.args[0], ast.Constant):
|
|
181
|
+
return node.args[0].value
|
|
182
|
+
|
|
183
|
+
elif source == 'posthog':
|
|
184
|
+
# PostHog has multiple formats:
|
|
185
|
+
# 1. posthog.capture('distinct_id', 'event_name', {...})
|
|
186
|
+
# 2. posthog.capture('distinct_id', event='event_name', properties={...})
|
|
187
|
+
|
|
188
|
+
# Check for named parameters first (event='event_name')
|
|
189
|
+
for keyword in node.keywords:
|
|
190
|
+
if keyword.arg == 'event' and isinstance(keyword.value, ast.Constant):
|
|
191
|
+
return keyword.value.value
|
|
192
|
+
|
|
193
|
+
# If no named event parameter, check positional args (second arg is event name)
|
|
194
|
+
if len(node.args) >= 2 and isinstance(node.args[1], ast.Constant):
|
|
195
|
+
return node.args[1].value
|
|
196
|
+
|
|
197
|
+
elif source == 'snowplow':
|
|
198
|
+
# Snowplow struct events use 'action' as the event name
|
|
199
|
+
if len(node.args) >= 1:
|
|
200
|
+
# Handle different snowplow call patterns
|
|
201
|
+
props_node = None
|
|
202
|
+
|
|
203
|
+
# Direct trackStructEvent/buildStructEvent call
|
|
204
|
+
if isinstance(node.func, ast.Name) and node.func.id in ['trackStructEvent', 'buildStructEvent']:
|
|
205
|
+
if len(node.args) >= 1:
|
|
206
|
+
props_node = node.args[0]
|
|
207
|
+
|
|
208
|
+
# snowplow('trackStructEvent', {...}) pattern
|
|
209
|
+
elif isinstance(node.func, ast.Name) and node.func.id == 'snowplow':
|
|
210
|
+
if len(node.args) >= 2:
|
|
211
|
+
props_node = node.args[1]
|
|
212
|
+
|
|
213
|
+
# Extract 'action' from properties
|
|
214
|
+
if props_node and isinstance(props_node, ast.Dict):
|
|
215
|
+
for i, key_node in enumerate(props_node.keys):
|
|
216
|
+
if isinstance(key_node, ast.Constant) and key_node.value == 'action':
|
|
217
|
+
value_node = props_node.values[i]
|
|
218
|
+
if isinstance(value_node, ast.Constant):
|
|
219
|
+
return value_node.value
|
|
220
|
+
except:
|
|
221
|
+
pass
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
def extract_properties(self, node, source):
|
|
225
|
+
properties = {}
|
|
226
|
+
try:
|
|
227
|
+
props_node = None
|
|
228
|
+
|
|
229
|
+
# Get the properties object based on source
|
|
230
|
+
if source in ['segment', 'mixpanel', 'amplitude', 'rudderstack', 'mparticle', 'pendo', 'heap', 'custom']:
|
|
231
|
+
# Standard format: library.track('event_name', {properties})
|
|
232
|
+
# Custom function follows same format: customFunction('event_name', {...})
|
|
233
|
+
if len(node.args) > 1:
|
|
234
|
+
props_node = node.args[1]
|
|
235
|
+
|
|
236
|
+
elif source == 'posthog':
|
|
237
|
+
# PostHog has multiple formats
|
|
238
|
+
is_anonymous = False
|
|
239
|
+
distinct_id = None
|
|
240
|
+
|
|
241
|
+
# Check for properties in named parameters first
|
|
242
|
+
for keyword in node.keywords:
|
|
243
|
+
if keyword.arg == 'properties' and isinstance(keyword.value, ast.Dict):
|
|
244
|
+
props_node = keyword.value
|
|
245
|
+
|
|
246
|
+
# Check if event is anonymous
|
|
247
|
+
for i, key_node in enumerate(props_node.keys):
|
|
248
|
+
if (isinstance(key_node, ast.Constant) and
|
|
249
|
+
key_node.value == '$process_person_profile'):
|
|
250
|
+
value_node = props_node.values[i]
|
|
251
|
+
if (isinstance(value_node, ast.Constant) and
|
|
252
|
+
value_node.value is False):
|
|
253
|
+
is_anonymous = True
|
|
254
|
+
|
|
255
|
+
# If no named properties, check positional args (third arg)
|
|
256
|
+
if props_node is None and len(node.args) > 2:
|
|
257
|
+
props_node = node.args[2]
|
|
258
|
+
|
|
259
|
+
# Add distinct_id as property if it exists and event is not anonymous
|
|
260
|
+
if not is_anonymous and len(node.args) > 0 and isinstance(node.args[0], ast.Constant):
|
|
261
|
+
distinct_id = node.args[0].value
|
|
262
|
+
if distinct_id:
|
|
263
|
+
properties["distinct_id"] = {"type": "string"}
|
|
264
|
+
|
|
265
|
+
elif source == 'snowplow':
|
|
266
|
+
# For Snowplow struct events
|
|
267
|
+
if isinstance(node.func, ast.Name) and node.func.id in ['trackStructEvent', 'buildStructEvent']:
|
|
268
|
+
if len(node.args) >= 1:
|
|
269
|
+
props_node = node.args[0]
|
|
270
|
+
|
|
271
|
+
# snowplow('trackStructEvent', {...}) pattern
|
|
272
|
+
elif isinstance(node.func, ast.Name) and node.func.id == 'snowplow':
|
|
273
|
+
if len(node.args) >= 2:
|
|
274
|
+
props_node = node.args[1]
|
|
275
|
+
|
|
276
|
+
# Extract properties from the dictionary
|
|
277
|
+
if props_node and isinstance(props_node, ast.Dict):
|
|
278
|
+
for i, key_node in enumerate(props_node.keys):
|
|
279
|
+
if isinstance(key_node, ast.Constant) and hasattr(key_node, 'value'):
|
|
280
|
+
key = key_node.value
|
|
281
|
+
value_node = props_node.values[i]
|
|
282
|
+
|
|
283
|
+
# Special handling for PostHog $set and $set_once
|
|
284
|
+
if source == 'posthog' and key in ['$set', '$set_once']:
|
|
285
|
+
if isinstance(value_node, ast.Dict):
|
|
286
|
+
nested_props = self.extract_nested_dict(value_node)
|
|
287
|
+
for nested_key, nested_value in nested_props.items():
|
|
288
|
+
properties[f"{key}.{nested_key}"] = nested_value
|
|
289
|
+
continue
|
|
290
|
+
|
|
291
|
+
# Skip PostHog internal properties
|
|
292
|
+
if source == 'posthog' and key == '$process_person_profile':
|
|
293
|
+
continue
|
|
294
|
+
|
|
295
|
+
# Handle different value types
|
|
296
|
+
if isinstance(value_node, ast.Constant):
|
|
297
|
+
value_type = self.get_value_type(value_node.value)
|
|
298
|
+
properties[key] = {"type": value_type}
|
|
299
|
+
elif isinstance(value_node, ast.Name):
|
|
300
|
+
# Check if we know the type of this variable
|
|
301
|
+
var_name = value_node.id
|
|
302
|
+
if var_name in self.var_types:
|
|
303
|
+
# Get the type for this variable
|
|
304
|
+
var_type = self.var_types[var_name]
|
|
305
|
+
|
|
306
|
+
# Handle structured types (arrays or objects)
|
|
307
|
+
if isinstance(var_type, dict):
|
|
308
|
+
properties[key] = var_type
|
|
309
|
+
else:
|
|
310
|
+
properties[key] = {"type": var_type}
|
|
311
|
+
else:
|
|
312
|
+
properties[key] = {"type": "any"}
|
|
313
|
+
elif isinstance(value_node, ast.Dict):
|
|
314
|
+
# Nested dictionary
|
|
315
|
+
nested_props = self.extract_nested_dict(value_node)
|
|
316
|
+
properties[key] = {
|
|
317
|
+
"type": "object",
|
|
318
|
+
"properties": nested_props
|
|
319
|
+
}
|
|
320
|
+
elif isinstance(value_node, ast.List) or isinstance(value_node, ast.Tuple):
|
|
321
|
+
# Array/list/tuple
|
|
322
|
+
item_type = self.infer_sequence_item_type(value_node)
|
|
323
|
+
properties[key] = {
|
|
324
|
+
"type": "array",
|
|
325
|
+
"items": item_type
|
|
326
|
+
}
|
|
327
|
+
except:
|
|
328
|
+
pass
|
|
329
|
+
return properties
|
|
330
|
+
|
|
331
|
+
def infer_sequence_item_type(self, seq_node):
|
|
332
|
+
"""Analyze a sequence (list or tuple) to determine the type of its items"""
|
|
333
|
+
if not hasattr(seq_node, 'elts') or not seq_node.elts:
|
|
334
|
+
return {"type": "any"}
|
|
335
|
+
|
|
336
|
+
# Get types of all elements
|
|
337
|
+
element_types = []
|
|
338
|
+
for element in seq_node.elts:
|
|
339
|
+
if isinstance(element, ast.Constant):
|
|
340
|
+
element_types.append(self.get_value_type(element.value))
|
|
341
|
+
elif isinstance(element, ast.Name):
|
|
342
|
+
if element.id in self.var_types:
|
|
343
|
+
element_types.append(self.var_types[element.id])
|
|
344
|
+
else:
|
|
345
|
+
element_types.append("any")
|
|
346
|
+
elif isinstance(element, ast.Dict):
|
|
347
|
+
element_types.append("object")
|
|
348
|
+
elif isinstance(element, ast.List) or isinstance(element, ast.Tuple):
|
|
349
|
+
element_types.append("array")
|
|
350
|
+
else:
|
|
351
|
+
element_types.append("any")
|
|
352
|
+
|
|
353
|
+
# Check if all elements are the same type
|
|
354
|
+
if len(set(element_types)) == 1:
|
|
355
|
+
return {"type": element_types[0]}
|
|
356
|
+
|
|
357
|
+
# Check if all types are either number or string (common mixed case)
|
|
358
|
+
if set(element_types) <= {"number", "string"}:
|
|
359
|
+
return {"type": "string"}
|
|
360
|
+
|
|
361
|
+
# Check if all types are either number or boolean
|
|
362
|
+
if set(element_types) <= {"number", "boolean"}:
|
|
363
|
+
return {"type": "number"}
|
|
364
|
+
|
|
365
|
+
# Otherwise, it's a mixed type array
|
|
366
|
+
return {"type": "any"}
|
|
367
|
+
|
|
368
|
+
def extract_nested_dict(self, dict_node):
|
|
369
|
+
nested_props = {}
|
|
370
|
+
for i, key_node in enumerate(dict_node.keys):
|
|
371
|
+
if isinstance(key_node, ast.Constant) and hasattr(key_node, 'value'):
|
|
372
|
+
key = key_node.value
|
|
373
|
+
value_node = dict_node.values[i]
|
|
374
|
+
|
|
375
|
+
if isinstance(value_node, ast.Constant):
|
|
376
|
+
value_type = self.get_value_type(value_node.value)
|
|
377
|
+
nested_props[key] = {"type": value_type}
|
|
378
|
+
elif isinstance(value_node, ast.Name):
|
|
379
|
+
# Check if we know the type of this variable
|
|
380
|
+
var_name = value_node.id
|
|
381
|
+
if var_name in self.var_types:
|
|
382
|
+
nested_props[key] = {"type": self.var_types[var_name]}
|
|
383
|
+
else:
|
|
384
|
+
nested_props[key] = {"type": "any"}
|
|
385
|
+
elif isinstance(value_node, ast.Dict):
|
|
386
|
+
sub_props = self.extract_nested_dict(value_node)
|
|
387
|
+
nested_props[key] = {
|
|
388
|
+
"type": "object",
|
|
389
|
+
"properties": sub_props
|
|
390
|
+
}
|
|
391
|
+
elif isinstance(value_node, ast.List) or isinstance(value_node, ast.Tuple):
|
|
392
|
+
# Array/list/tuple
|
|
393
|
+
item_type = self.infer_sequence_item_type(value_node)
|
|
394
|
+
nested_props[key] = {
|
|
395
|
+
"type": "array",
|
|
396
|
+
"items": item_type
|
|
397
|
+
}
|
|
398
|
+
return nested_props
|
|
399
|
+
|
|
400
|
+
def get_value_type(self, value):
|
|
401
|
+
if isinstance(value, str):
|
|
402
|
+
return "string"
|
|
403
|
+
elif isinstance(value, (int, float)):
|
|
404
|
+
return "number"
|
|
405
|
+
elif isinstance(value, bool):
|
|
406
|
+
return "boolean"
|
|
407
|
+
elif value is None:
|
|
408
|
+
return "null"
|
|
409
|
+
return "any"
|
|
410
|
+
|
|
411
|
+
def analyze_python_code(code, filepath, custom_function=None):
|
|
412
|
+
# Parse the Python code
|
|
413
|
+
tree = ast.parse(code)
|
|
414
|
+
visitor = TrackingVisitor(filepath, custom_function)
|
|
415
|
+
visitor.visit(tree)
|
|
416
|
+
|
|
417
|
+
# Return events as JSON
|
|
418
|
+
return json.dumps(visitor.events)
|
|
419
|
+
|
|
420
|
+
if __name__ == "__main__":
|
|
421
|
+
import sys
|
|
422
|
+
import argparse
|
|
423
|
+
|
|
424
|
+
parser = argparse.ArgumentParser(description='Analyze Python code for tracking calls')
|
|
425
|
+
parser.add_argument('file', help='Python file to analyze')
|
|
426
|
+
parser.add_argument('-c', '--custom-function', help='Name of custom tracking function')
|
|
427
|
+
args = parser.parse_args()
|
|
428
|
+
|
|
429
|
+
try:
|
|
430
|
+
with open(args.file, 'r') as f:
|
|
431
|
+
code = f.read()
|
|
432
|
+
result = analyze_python_code(code, args.file, args.custom_function)
|
|
433
|
+
print(result)
|
|
434
|
+
except FileNotFoundError:
|
|
435
|
+
print(f"Error: File '{args.file}' not found")
|
|
436
|
+
sys.exit(1)
|
|
437
|
+
except Exception as e:
|
|
438
|
+
print(f"Error analyzing file: {str(e)}")
|
|
439
|
+
sys.exit(1)
|
package/src/index.js
CHANGED
|
@@ -12,13 +12,13 @@ async function run(targetDir, outputPath, customFunction, customSourceDetails, g
|
|
|
12
12
|
let llm;
|
|
13
13
|
if (provider === 'openai') {
|
|
14
14
|
llm = new ChatOpenAI({
|
|
15
|
-
|
|
15
|
+
model: model,
|
|
16
16
|
temperature: 0,
|
|
17
17
|
});
|
|
18
18
|
}
|
|
19
19
|
if (provider === 'gemini') {
|
|
20
20
|
llm = new ChatVertexAI({
|
|
21
|
-
|
|
21
|
+
model: model,
|
|
22
22
|
temperature: 0,
|
|
23
23
|
});
|
|
24
24
|
}
|