@flisk/analyze-tracking 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -205,5 +205,12 @@ See [schema.json](schema.json) for a JSON Schema of the output.
205
205
  </details>
206
206
 
207
207
 
208
+ ## Supported languages
209
+
210
+ - JavaScript
211
+ - TypeScript
212
+ - Ruby (Experimental - only supports Segment for now)
213
+
214
+
208
215
  ## Contribute
209
216
  We’re actively improving this package. Found a bug? Want to request a feature? Open an issue or contribute directly!
package/bin/cli.js CHANGED
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env node
1
+ #!/usr/bin/env node --no-warnings=ExperimentalWarning
2
2
 
3
3
  const path = require('path');
4
4
  const commandLineArgs = require('command-line-args');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flisk/analyze-tracking",
3
- "version": "0.3.1",
3
+ "version": "0.4.0",
4
4
  "description": "Analyzes tracking code in a project and generates data schemas",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -20,6 +20,7 @@
20
20
  },
21
21
  "homepage": "https://github.com/fliskdata/analyze-tracking#readme",
22
22
  "dependencies": {
23
+ "@ruby/prism": "^1.4.0",
23
24
  "@typescript-eslint/parser": "^8.1.0",
24
25
  "acorn": "^8.12.1",
25
26
  "acorn-jsx": "^5.3.2",
@@ -0,0 +1,290 @@
1
+ const fs = require('fs');
2
+
3
+ let parse = null;
4
+
5
+ // Create a visitor to traverse the AST
6
+ class TrackingVisitor {
7
+ constructor(code, filePath) {
8
+ this.code = code;
9
+ this.lines = code.split('\n');
10
+ this.ancestors = [];
11
+ this.events = [];
12
+ this.filePath = filePath;
13
+ }
14
+
15
+ getLineNumber(location) {
16
+ // Count the number of newlines before the start offset
17
+ const beforeStart = this.code.slice(0, location.startOffset);
18
+ return beforeStart.split('\n').length;
19
+ }
20
+
21
+ async findWrappingFunction(node, ancestors) {
22
+ const { DefNode, BlockNode, LambdaNode } = await import('@ruby/prism');
23
+
24
+ for (let i = ancestors.length - 1; i >= 0; i--) {
25
+ const current = ancestors[i];
26
+
27
+ // Handle method definitions
28
+ if (current instanceof DefNode) {
29
+ return current.name;
30
+ }
31
+
32
+ // Handle blocks and lambdas
33
+ if (current instanceof BlockNode || current instanceof LambdaNode) {
34
+ return 'block';
35
+ }
36
+ }
37
+ return 'global';
38
+ }
39
+
40
+ detectSource(node) {
41
+ if (!node) return null;
42
+
43
+ // Check for other analytics libraries
44
+ if (node.receiver) {
45
+ const objectName = node.receiver.name;
46
+ const methodName = node.name;
47
+
48
+ if (objectName === 'Analytics' && methodName === 'track') return 'segment';
49
+ }
50
+
51
+ return null;
52
+ }
53
+
54
+ extractEventName(node, source) {
55
+ if (source === 'segment') {
56
+ const params = node.arguments_.arguments_[0].elements;
57
+ const eventProperty = params.find(param => param?.key?.unescaped?.value === 'event');
58
+ return eventProperty?.value?.unescaped?.value || null;
59
+ }
60
+
61
+ return null;
62
+ }
63
+
64
+ async extractProperties(node, source) {
65
+ const { HashNode, ArrayNode } = await import('@ruby/prism');
66
+
67
+ if (source === 'segment') {
68
+ const params = node.arguments_.arguments_[0].elements;
69
+ const properties = {};
70
+
71
+ // Process all top-level fields except 'event'
72
+ for (const param of params) {
73
+ const key = param?.key?.unescaped?.value;
74
+
75
+ if (key && key !== 'event') {
76
+ const value = param?.value;
77
+
78
+ if (key === 'properties' && value instanceof HashNode) {
79
+ // Merge properties from the 'properties' hash into the top level
80
+ const nestedProperties = await this.extractHashProperties(value);
81
+ Object.assign(properties, nestedProperties);
82
+ } else if (value instanceof HashNode) {
83
+ // Handle other nested hash objects
84
+ const hashProperties = await this.extractHashProperties(value);
85
+ properties[key] = {
86
+ type: 'object',
87
+ properties: hashProperties
88
+ };
89
+ } else if (value instanceof ArrayNode) {
90
+ // Handle arrays
91
+ const arrayItems = await this.extractArrayItemProperties(value);
92
+ properties[key] = {
93
+ type: 'array',
94
+ items: arrayItems
95
+ };
96
+ } else {
97
+ // Handle primitive values
98
+ const valueType = await this.getValueType(value);
99
+ properties[key] = {
100
+ type: valueType
101
+ };
102
+ }
103
+ }
104
+ }
105
+
106
+ return properties;
107
+ }
108
+
109
+ return null;
110
+ }
111
+
112
+ async extractHashProperties(hashNode) {
113
+ const { AssocNode, HashNode, ArrayNode } = await import('@ruby/prism');
114
+ const properties = {};
115
+
116
+ for (const element of hashNode.elements) {
117
+ if (element instanceof AssocNode) {
118
+ const key = element.key.unescaped?.value;
119
+ const value = element.value;
120
+
121
+ if (key) {
122
+ if (value instanceof HashNode) {
123
+ // Handle nested hash objects
124
+ const nestedProperties = await this.extractHashProperties(value);
125
+ properties[key] = {
126
+ type: 'object',
127
+ properties: nestedProperties
128
+ };
129
+ } else if (value instanceof ArrayNode) {
130
+ // Handle arrays
131
+ const items = await this.extractArrayItemProperties(value);
132
+ properties[key] = {
133
+ type: 'array',
134
+ items
135
+ };
136
+ } else {
137
+ // Handle primitive values
138
+ const valueType = await this.getValueType(value);
139
+ properties[key] = {
140
+ type: valueType
141
+ };
142
+ }
143
+ }
144
+ }
145
+ }
146
+
147
+ return properties;
148
+ }
149
+
150
+ async extractArrayItemProperties(arrayNode) {
151
+ const { HashNode } = await import('@ruby/prism');
152
+
153
+ if (arrayNode.elements.length === 0) {
154
+ return { type: 'any' };
155
+ }
156
+
157
+ const firstItem = arrayNode.elements[0];
158
+ if (firstItem instanceof HashNode) {
159
+ return {
160
+ type: 'object',
161
+ properties: this.extractHashProperties(firstItem)
162
+ };
163
+ } else {
164
+ const valueType = await this.getValueType(firstItem);
165
+ return {
166
+ type: valueType
167
+ };
168
+ }
169
+ }
170
+
171
+ async getValueType(node) {
172
+ const { StringNode, IntegerNode, FloatNode, TrueNode, FalseNode, NilNode, SymbolNode, CallNode } = await import('@ruby/prism');
173
+
174
+ if (node instanceof StringNode) return 'string';
175
+ if (node instanceof IntegerNode || node instanceof FloatNode) return 'number';
176
+ if (node instanceof TrueNode || node instanceof FalseNode) return 'boolean';
177
+ if (node instanceof NilNode) return 'null';
178
+ if (node instanceof SymbolNode) return 'string';
179
+ if (node instanceof CallNode) return 'any'; // Dynamic values
180
+ return 'any'; // Default type
181
+ }
182
+
183
+ async visit(node) {
184
+ const { CallNode, ProgramNode, StatementsNode, DefNode, IfNode, BlockNode, ArgumentsNode, HashNode, AssocNode, ClassNode } = await import('@ruby/prism');
185
+ if (!node) return;
186
+
187
+ this.ancestors.push(node);
188
+
189
+ // Check if this is a tracking call
190
+ if (node instanceof CallNode) {
191
+ try {
192
+ const source = this.detectSource(node);
193
+ const eventName = this.extractEventName(node, source);
194
+
195
+ if (!source || !eventName) {
196
+ this.ancestors.pop();
197
+ return;
198
+ }
199
+
200
+ const line = this.getLineNumber(node.location);
201
+ const functionName = await this.findWrappingFunction(node, this.ancestors);
202
+ const properties = await this.extractProperties(node, source);
203
+
204
+ this.events.push({
205
+ eventName,
206
+ source,
207
+ properties,
208
+ filePath: this.filePath,
209
+ line,
210
+ functionName
211
+ });
212
+ } catch (nodeError) {
213
+ console.error(`Error processing node in ${this.filePath}`);
214
+ }
215
+ }
216
+
217
+ // Visit all child nodes
218
+ if (node instanceof ProgramNode) {
219
+ await this.visit(node.statements);
220
+ } else if (node instanceof StatementsNode) {
221
+ for (const child of node.body) {
222
+ await this.visit(child);
223
+ }
224
+ } else if (node instanceof ClassNode) {
225
+ if (node.body) {
226
+ await this.visit(node.body);
227
+ }
228
+ } else if (node instanceof DefNode) {
229
+ if (node.body) {
230
+ await this.visit(node.body);
231
+ }
232
+ } else if (node instanceof IfNode) {
233
+ if (node.statements) {
234
+ await this.visit(node.statements);
235
+ }
236
+ if (node.subsequent) {
237
+ await this.visit(node.subsequent);
238
+ }
239
+ } else if (node instanceof BlockNode) {
240
+ if (node.body) {
241
+ await this.visit(node.body);
242
+ }
243
+ } else if (node instanceof ArgumentsNode) {
244
+ for (const arg of node.arguments) {
245
+ await this.visit(arg);
246
+ }
247
+ } else if (node instanceof HashNode) {
248
+ for (const element of node.elements) {
249
+ await this.visit(element);
250
+ }
251
+ } else if (node instanceof AssocNode) {
252
+ await this.visit(node.key);
253
+ await this.visit(node.value);
254
+ }
255
+
256
+ this.ancestors.pop();
257
+ }
258
+ }
259
+
260
+ async function analyzeRubyFile(filePath) {
261
+ // Lazy load the ruby prism parser
262
+ if (!parse) {
263
+ const { loadPrism } = await import('@ruby/prism');
264
+ parse = await loadPrism();
265
+ }
266
+
267
+ try {
268
+ const code = fs.readFileSync(filePath, 'utf8');
269
+ let ast;
270
+ try {
271
+ ast = await parse(code);
272
+ } catch (parseError) {
273
+ console.error(`Error parsing file ${filePath}`);
274
+ return []; // Return empty events array if parsing fails
275
+ }
276
+
277
+ // Traverse the AST starting from the program node
278
+ const visitor = new TrackingVisitor(code, filePath);
279
+ await visitor.visit(ast.value);
280
+
281
+ return visitor.events;
282
+
283
+ } catch (fileError) {
284
+ console.error(`Error reading or processing file ${filePath}`);
285
+ }
286
+
287
+ return [];
288
+ }
289
+
290
+ module.exports = { analyzeRubyFile };
@@ -1,10 +1,11 @@
1
+ const path = require('path');
2
+ const ts = require('typescript');
3
+ const { getAllFiles } = require('../fileProcessor');
1
4
  const { analyzeJsFile } = require('./analyzeJsFile');
2
5
  const { analyzeTsFile } = require('./analyzeTsFile');
3
- const { getAllFiles } = require('../fileProcessor');
4
- const ts = require('typescript');
5
- const path = require('path');
6
+ const { analyzeRubyFile } = require('./analyzeRubyFile');
6
7
 
7
- function analyzeDirectory(dirPath, customFunction) {
8
+ async function analyzeDirectory(dirPath, customFunction) {
8
9
  const files = getAllFiles(dirPath);
9
10
  const allEvents = {};
10
11
 
@@ -14,12 +15,26 @@ function analyzeDirectory(dirPath, customFunction) {
14
15
  module: ts.ModuleKind.CommonJS,
15
16
  });
16
17
 
17
- files.forEach((file) => {
18
+ for (const file of files) {
19
+ let events = [];
20
+
21
+ const isJsFile = /\.(jsx?)$/.test(file);
18
22
  const isTsFile = /\.(tsx?)$/.test(file);
19
- const events = isTsFile ? analyzeTsFile(file, program, customFunction) : analyzeJsFile(file, customFunction);
23
+ const isRubyFile = /\.(rb|ru|rake|gemspec)$/.test(file);
24
+
25
+ if (isJsFile) {
26
+ events = analyzeJsFile(file, customFunction);
27
+ } else if (isTsFile) {
28
+ events = analyzeTsFile(file, program, customFunction);
29
+ } else if (isRubyFile) {
30
+ events = await analyzeRubyFile(file);
31
+ } else {
32
+ console.info(`Skipping file ${file} because it is not a supported file type`);
33
+ continue;
34
+ }
20
35
 
21
36
  events.forEach((event) => {
22
- const relativeFilePath = path.relative(dirPath, event.filePath); // Calculate relative path
37
+ const relativeFilePath = path.relative(dirPath, event.filePath);
23
38
 
24
39
  if (!allEvents[event.eventName]) {
25
40
  allEvents[event.eventName] = {
@@ -45,7 +60,7 @@ function analyzeDirectory(dirPath, customFunction) {
45
60
  };
46
61
  }
47
62
  });
48
- });
63
+ }
49
64
 
50
65
  return allEvents;
51
66
  }
@@ -18,12 +18,18 @@ function getAllFiles(dirPath, arrayOfFiles = []) {
18
18
  }
19
19
  }
20
20
 
21
+ // Skip hidden files and directories
22
+ if (file.startsWith('.')) return
23
+
24
+ // Skip common directories we don't want to analyze
25
+ if (file === 'node_modules') return
26
+ if (file === 'coverage') return
27
+ if (file === 'temp') return
28
+ if (file === 'tmp') return
29
+
21
30
  if (stats.isDirectory()) {
22
- if (file === 'node_modules') {
23
- return; // Ignore the node_modules directory
24
- }
25
31
  arrayOfFiles = getAllFiles(fullPath, arrayOfFiles);
26
- } else if (/\.((j|t)sx?)$/.test(file)) {
32
+ } else {
27
33
  arrayOfFiles.push(fullPath);
28
34
  }
29
35
  });
@@ -5,7 +5,7 @@ const { z } = require('zod');
5
5
  const { zodResponseFormat } = require('openai/helpers/zod');
6
6
 
7
7
  const openai = new OpenAI({
8
- apiKey: process.env.OPENAI_API_KEY,
8
+ apiKey: process.env.OPENAI_API_KEY || 'undefined',
9
9
  });
10
10
  const model = 'gpt-4o-mini';
11
11
 
package/src/index.js CHANGED
@@ -4,7 +4,7 @@ const { generateYamlSchema } = require('./yamlGenerator');
4
4
  const { generateDescriptions } = require('./generateDescriptions');
5
5
 
6
6
  async function run(targetDir, outputPath, customFunction, customSourceDetails, generateDescription) {
7
- let events = analyzeDirectory(targetDir, customFunction);
7
+ let events = await analyzeDirectory(targetDir, customFunction);
8
8
  if (generateDescription) {
9
9
  events = await generateDescriptions(events, targetDir);
10
10
  }