@flisk/analyze-tracking 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -205,5 +205,12 @@ See [schema.json](schema.json) for a JSON Schema of the output.
205
205
  </details>
206
206
 
207
207
 
208
+ ## Supported languages
209
+
210
+ - JavaScript
211
+ - TypeScript
212
+ - Ruby (Experimental - only supports Segment for now)
213
+
214
+
208
215
  ## Contribute
209
216
  We’re actively improving this package. Found a bug? Want to request a feature? Open an issue or contribute directly!
package/bin/cli.js CHANGED
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env node
1
+ #!/usr/bin/env node --no-warnings=ExperimentalWarning
2
2
 
3
3
  const path = require('path');
4
4
  const commandLineArgs = require('command-line-args');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flisk/analyze-tracking",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "description": "Analyzes tracking code in a project and generates data schemas",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -20,6 +20,7 @@
20
20
  },
21
21
  "homepage": "https://github.com/fliskdata/analyze-tracking#readme",
22
22
  "dependencies": {
23
+ "@ruby/prism": "^1.4.0",
23
24
  "@typescript-eslint/parser": "^8.1.0",
24
25
  "acorn": "^8.12.1",
25
26
  "acorn-jsx": "^5.3.2",
package/schema.json CHANGED
@@ -71,6 +71,10 @@
71
71
  "unknown"
72
72
  ],
73
73
  "description": "Name of the platform where the event is sent"
74
+ },
75
+ "description": {
76
+ "type": "string",
77
+ "description": "Description of how the event is triggered"
74
78
  }
75
79
  },
76
80
  "required": [
@@ -89,6 +93,10 @@
89
93
  "$ref": "#/definitions/property"
90
94
  }
91
95
  }
96
+ },
97
+ "description": {
98
+ "type": "string",
99
+ "description": "Description of the event"
92
100
  }
93
101
  },
94
102
  "required": [
@@ -0,0 +1,290 @@
1
+ const fs = require('fs');
2
+
3
+ let parse = null;
4
+
5
+ // Create a visitor to traverse the AST
6
+ class TrackingVisitor {
7
+ constructor(code, filePath) {
8
+ this.code = code;
9
+ this.lines = code.split('\n');
10
+ this.ancestors = [];
11
+ this.events = [];
12
+ this.filePath = filePath;
13
+ }
14
+
15
+ getLineNumber(location) {
16
+ // Count the number of newlines before the start offset
17
+ const beforeStart = this.code.slice(0, location.startOffset);
18
+ return beforeStart.split('\n').length;
19
+ }
20
+
21
+ async findWrappingFunction(node, ancestors) {
22
+ const { DefNode, BlockNode, LambdaNode } = await import('@ruby/prism');
23
+
24
+ for (let i = ancestors.length - 1; i >= 0; i--) {
25
+ const current = ancestors[i];
26
+
27
+ // Handle method definitions
28
+ if (current instanceof DefNode) {
29
+ return current.name;
30
+ }
31
+
32
+ // Handle blocks and lambdas
33
+ if (current instanceof BlockNode || current instanceof LambdaNode) {
34
+ return 'block';
35
+ }
36
+ }
37
+ return 'global';
38
+ }
39
+
40
+ detectSource(node) {
41
+ if (!node) return null;
42
+
43
+ // Check for other analytics libraries
44
+ if (node.receiver) {
45
+ const objectName = node.receiver.name;
46
+ const methodName = node.name;
47
+
48
+ if (objectName === 'Analytics' && methodName === 'track') return 'segment';
49
+ }
50
+
51
+ return null;
52
+ }
53
+
54
+ extractEventName(node, source) {
55
+ if (source === 'segment') {
56
+ const params = node.arguments_.arguments_[0].elements;
57
+ const eventProperty = params.find(param => param?.key?.unescaped?.value === 'event');
58
+ return eventProperty?.value?.unescaped?.value || null;
59
+ }
60
+
61
+ return null;
62
+ }
63
+
64
+ async extractProperties(node, source) {
65
+ const { HashNode, ArrayNode } = await import('@ruby/prism');
66
+
67
+ if (source === 'segment') {
68
+ const params = node.arguments_.arguments_[0].elements;
69
+ const properties = {};
70
+
71
+ // Process all top-level fields except 'event'
72
+ for (const param of params) {
73
+ const key = param?.key?.unescaped?.value;
74
+
75
+ if (key && key !== 'event') {
76
+ const value = param?.value;
77
+
78
+ if (key === 'properties' && value instanceof HashNode) {
79
+ // Merge properties from the 'properties' hash into the top level
80
+ const nestedProperties = await this.extractHashProperties(value);
81
+ Object.assign(properties, nestedProperties);
82
+ } else if (value instanceof HashNode) {
83
+ // Handle other nested hash objects
84
+ const hashProperties = await this.extractHashProperties(value);
85
+ properties[key] = {
86
+ type: 'object',
87
+ properties: hashProperties
88
+ };
89
+ } else if (value instanceof ArrayNode) {
90
+ // Handle arrays
91
+ const arrayItems = await this.extractArrayItemProperties(value);
92
+ properties[key] = {
93
+ type: 'array',
94
+ items: arrayItems
95
+ };
96
+ } else {
97
+ // Handle primitive values
98
+ const valueType = await this.getValueType(value);
99
+ properties[key] = {
100
+ type: valueType
101
+ };
102
+ }
103
+ }
104
+ }
105
+
106
+ return properties;
107
+ }
108
+
109
+ return null;
110
+ }
111
+
112
+ async extractHashProperties(hashNode) {
113
+ const { AssocNode, HashNode, ArrayNode } = await import('@ruby/prism');
114
+ const properties = {};
115
+
116
+ for (const element of hashNode.elements) {
117
+ if (element instanceof AssocNode) {
118
+ const key = element.key.unescaped?.value;
119
+ const value = element.value;
120
+
121
+ if (key) {
122
+ if (value instanceof HashNode) {
123
+ // Handle nested hash objects
124
+ const nestedProperties = await this.extractHashProperties(value);
125
+ properties[key] = {
126
+ type: 'object',
127
+ properties: nestedProperties
128
+ };
129
+ } else if (value instanceof ArrayNode) {
130
+ // Handle arrays
131
+ const items = await this.extractArrayItemProperties(value);
132
+ properties[key] = {
133
+ type: 'array',
134
+ items
135
+ };
136
+ } else {
137
+ // Handle primitive values
138
+ const valueType = await this.getValueType(value);
139
+ properties[key] = {
140
+ type: valueType
141
+ };
142
+ }
143
+ }
144
+ }
145
+ }
146
+
147
+ return properties;
148
+ }
149
+
150
+ async extractArrayItemProperties(arrayNode) {
151
+ const { HashNode } = await import('@ruby/prism');
152
+
153
+ if (arrayNode.elements.length === 0) {
154
+ return { type: 'any' };
155
+ }
156
+
157
+ const firstItem = arrayNode.elements[0];
158
+ if (firstItem instanceof HashNode) {
159
+ return {
160
+ type: 'object',
161
+ properties: this.extractHashProperties(firstItem)
162
+ };
163
+ } else {
164
+ const valueType = await this.getValueType(firstItem);
165
+ return {
166
+ type: valueType
167
+ };
168
+ }
169
+ }
170
+
171
+ async getValueType(node) {
172
+ const { StringNode, IntegerNode, FloatNode, TrueNode, FalseNode, NilNode, SymbolNode, CallNode } = await import('@ruby/prism');
173
+
174
+ if (node instanceof StringNode) return 'string';
175
+ if (node instanceof IntegerNode || node instanceof FloatNode) return 'number';
176
+ if (node instanceof TrueNode || node instanceof FalseNode) return 'boolean';
177
+ if (node instanceof NilNode) return 'null';
178
+ if (node instanceof SymbolNode) return 'string';
179
+ if (node instanceof CallNode) return 'any'; // Dynamic values
180
+ return 'any'; // Default type
181
+ }
182
+
183
+ async visit(node) {
184
+ const { CallNode, ProgramNode, StatementsNode, DefNode, IfNode, BlockNode, ArgumentsNode, HashNode, AssocNode, ClassNode } = await import('@ruby/prism');
185
+ if (!node) return;
186
+
187
+ this.ancestors.push(node);
188
+
189
+ // Check if this is a tracking call
190
+ if (node instanceof CallNode) {
191
+ try {
192
+ const source = this.detectSource(node);
193
+ const eventName = this.extractEventName(node, source);
194
+
195
+ if (!source || !eventName) {
196
+ this.ancestors.pop();
197
+ return;
198
+ }
199
+
200
+ const line = this.getLineNumber(node.location);
201
+ const functionName = await this.findWrappingFunction(node, this.ancestors);
202
+ const properties = await this.extractProperties(node, source);
203
+
204
+ this.events.push({
205
+ eventName,
206
+ source,
207
+ properties,
208
+ filePath: this.filePath,
209
+ line,
210
+ functionName
211
+ });
212
+ } catch (nodeError) {
213
+ console.error(`Error processing node in ${this.filePath}`);
214
+ }
215
+ }
216
+
217
+ // Visit all child nodes
218
+ if (node instanceof ProgramNode) {
219
+ await this.visit(node.statements);
220
+ } else if (node instanceof StatementsNode) {
221
+ for (const child of node.body) {
222
+ await this.visit(child);
223
+ }
224
+ } else if (node instanceof ClassNode) {
225
+ if (node.body) {
226
+ await this.visit(node.body);
227
+ }
228
+ } else if (node instanceof DefNode) {
229
+ if (node.body) {
230
+ await this.visit(node.body);
231
+ }
232
+ } else if (node instanceof IfNode) {
233
+ if (node.statements) {
234
+ await this.visit(node.statements);
235
+ }
236
+ if (node.subsequent) {
237
+ await this.visit(node.subsequent);
238
+ }
239
+ } else if (node instanceof BlockNode) {
240
+ if (node.body) {
241
+ await this.visit(node.body);
242
+ }
243
+ } else if (node instanceof ArgumentsNode) {
244
+ for (const arg of node.arguments) {
245
+ await this.visit(arg);
246
+ }
247
+ } else if (node instanceof HashNode) {
248
+ for (const element of node.elements) {
249
+ await this.visit(element);
250
+ }
251
+ } else if (node instanceof AssocNode) {
252
+ await this.visit(node.key);
253
+ await this.visit(node.value);
254
+ }
255
+
256
+ this.ancestors.pop();
257
+ }
258
+ }
259
+
260
+ async function analyzeRubyFile(filePath) {
261
+ // Lazy load the ruby prism parser
262
+ if (!parse) {
263
+ const { loadPrism } = await import('@ruby/prism');
264
+ parse = await loadPrism();
265
+ }
266
+
267
+ try {
268
+ const code = fs.readFileSync(filePath, 'utf8');
269
+ let ast;
270
+ try {
271
+ ast = await parse(code);
272
+ } catch (parseError) {
273
+ console.error(`Error parsing file ${filePath}`);
274
+ return []; // Return empty events array if parsing fails
275
+ }
276
+
277
+ // Traverse the AST starting from the program node
278
+ const visitor = new TrackingVisitor(code, filePath);
279
+ await visitor.visit(ast.value);
280
+
281
+ return visitor.events;
282
+
283
+ } catch (fileError) {
284
+ console.error(`Error reading or processing file ${filePath}`);
285
+ }
286
+
287
+ return [];
288
+ }
289
+
290
+ module.exports = { analyzeRubyFile };
@@ -1,10 +1,11 @@
1
+ const path = require('path');
2
+ const ts = require('typescript');
3
+ const { getAllFiles } = require('../fileProcessor');
1
4
  const { analyzeJsFile } = require('./analyzeJsFile');
2
5
  const { analyzeTsFile } = require('./analyzeTsFile');
3
- const { getAllFiles } = require('../fileProcessor');
4
- const ts = require('typescript');
5
- const path = require('path');
6
+ const { analyzeRubyFile } = require('./analyzeRubyFile');
6
7
 
7
- function analyzeDirectory(dirPath, customFunction) {
8
+ async function analyzeDirectory(dirPath, customFunction) {
8
9
  const files = getAllFiles(dirPath);
9
10
  const allEvents = {};
10
11
 
@@ -14,12 +15,26 @@ function analyzeDirectory(dirPath, customFunction) {
14
15
  module: ts.ModuleKind.CommonJS,
15
16
  });
16
17
 
17
- files.forEach((file) => {
18
+ for (const file of files) {
19
+ let events = [];
20
+
21
+ const isJsFile = /\.(jsx?)$/.test(file);
18
22
  const isTsFile = /\.(tsx?)$/.test(file);
19
- const events = isTsFile ? analyzeTsFile(file, program, customFunction) : analyzeJsFile(file, customFunction);
23
+ const isRubyFile = /\.(rb|ru|rake|gemspec)$/.test(file);
24
+
25
+ if (isJsFile) {
26
+ events = analyzeJsFile(file, customFunction);
27
+ } else if (isTsFile) {
28
+ events = analyzeTsFile(file, program, customFunction);
29
+ } else if (isRubyFile) {
30
+ events = await analyzeRubyFile(file);
31
+ } else {
32
+ console.info(`Skipping file ${file} because it is not a supported file type`);
33
+ continue;
34
+ }
20
35
 
21
36
  events.forEach((event) => {
22
- const relativeFilePath = path.relative(dirPath, event.filePath); // Calculate relative path
37
+ const relativeFilePath = path.relative(dirPath, event.filePath);
23
38
 
24
39
  if (!allEvents[event.eventName]) {
25
40
  allEvents[event.eventName] = {
@@ -45,7 +60,7 @@ function analyzeDirectory(dirPath, customFunction) {
45
60
  };
46
61
  }
47
62
  });
48
- });
63
+ }
49
64
 
50
65
  return allEvents;
51
66
  }
@@ -18,12 +18,18 @@ function getAllFiles(dirPath, arrayOfFiles = []) {
18
18
  }
19
19
  }
20
20
 
21
+ // Skip hidden files and directories
22
+ if (file.startsWith('.')) return
23
+
24
+ // Skip common directories we don't want to analyze
25
+ if (file === 'node_modules') return
26
+ if (file === 'coverage') return
27
+ if (file === 'temp') return
28
+ if (file === 'tmp') return
29
+
21
30
  if (stats.isDirectory()) {
22
- if (file === 'node_modules') {
23
- return; // Ignore the node_modules directory
24
- }
25
31
  arrayOfFiles = getAllFiles(fullPath, arrayOfFiles);
26
- } else if (/\.((j|t)sx?)$/.test(file)) {
32
+ } else {
27
33
  arrayOfFiles.push(fullPath);
28
34
  }
29
35
  });
@@ -5,23 +5,27 @@ const { z } = require('zod');
5
5
  const { zodResponseFormat } = require('openai/helpers/zod');
6
6
 
7
7
  const openai = new OpenAI({
8
- apiKey: process.env.OPENAI_API_KEY,
8
+ apiKey: process.env.OPENAI_API_KEY || 'undefined',
9
9
  });
10
+ const model = 'gpt-4o-mini';
10
11
 
11
12
  function createPrompt(eventName, properties, implementations, codebaseDir) {
12
- // Initialize the prompt
13
- let prompt = `You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations.\n\n`;
14
-
15
- // Add event name
16
- prompt += `Event Name: "${eventName}"\n\n`;
17
-
18
- // Add properties
13
+ let prompt = `Event Name: "${eventName}"\n\n`;
19
14
  prompt += `Properties:\n`;
20
- for (const propName in properties) {
21
- const prop = properties[propName];
22
- prompt += `- "${propName}" (type: ${prop.type})\n`;
15
+
16
+ function appendPropertiesToPrompt(properties, indent = '') {
17
+ for (const propName in properties) {
18
+ const prop = properties[propName];
19
+ prompt += `${indent}- "${propName}" (type: ${prop.type})\n`;
20
+ if (prop.properties) {
21
+ prompt += `${indent} Sub-properties:\n`;
22
+ appendPropertiesToPrompt(prop.properties, indent + ' ');
23
+ }
24
+ }
23
25
  }
24
26
 
27
+ appendPropertiesToPrompt(properties);
28
+
25
29
  // Add implementations with code snippets
26
30
  prompt += `\nImplementations:\n`;
27
31
  for (const impl of implementations) {
@@ -53,18 +57,39 @@ function getCodeSnippet(filePath, lineNumber, contextLines = 5) {
53
57
  }
54
58
 
55
59
  function createEventDescriptionSchema(properties) {
60
+ function buildPropertySchema(prop) {
61
+ if (prop.properties) {
62
+ const subPropertiesSchema = {};
63
+ for (const subPropName in prop.properties) {
64
+ subPropertiesSchema[subPropName] = buildPropertySchema(prop.properties[subPropName]);
65
+ }
66
+ return z.object({
67
+ description: z
68
+ .string()
69
+ .describe('A maximum of 10 words describing the property and what it means'),
70
+ properties: z.object(subPropertiesSchema),
71
+ });
72
+ } else {
73
+ return z.object({
74
+ description: z
75
+ .string()
76
+ .describe('A maximum of 10 words describing the property and what it means'),
77
+ });
78
+ }
79
+ }
80
+
56
81
  // Define the schema for properties
57
82
  const propertiesSchema = {};
58
83
  for (const propName in properties) {
59
- propertiesSchema[propName] = z.object({
60
- description: z.string().describe('A maximum of 10 words describing the property and what it means'),
61
- });
84
+ propertiesSchema[propName] = buildPropertySchema(properties[propName]);
62
85
  }
63
86
 
64
87
  // Define the schema for implementations
65
88
  const implementationsSchema = z.array(
66
89
  z.object({
67
- description: z.string().describe('A maximum of 10 words describing when this event is triggered'),
90
+ description: z
91
+ .string()
92
+ .describe('A maximum of 10 words describing how this event is triggered without using the word "triggered"'),
68
93
  path: z.string(),
69
94
  line: z.number(),
70
95
  })
@@ -72,7 +97,9 @@ function createEventDescriptionSchema(properties) {
72
97
 
73
98
  // Construct the full schema
74
99
  const eventDescriptionSchema = z.object({
75
- eventDescription: z.string().describe('A maximum of 10 words describing the event and what it describes'),
100
+ eventDescription: z
101
+ .string()
102
+ .describe('A maximum of 10 words describing the event and what it tracks without using the word "tracks"'),
76
103
  properties: z.object(propertiesSchema),
77
104
  implementations: implementationsSchema,
78
105
  });
@@ -83,11 +110,11 @@ function createEventDescriptionSchema(properties) {
83
110
  async function sendPromptToLLM(prompt, schema) {
84
111
  try {
85
112
  const completion = await openai.beta.chat.completions.parse({
86
- model: 'gpt-4o-mini',
113
+ model,
87
114
  messages: [
88
115
  {
89
116
  role: 'system',
90
- content: 'You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations',
117
+ content: 'You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations.',
91
118
  },
92
119
  {
93
120
  role: 'user',
@@ -97,7 +124,10 @@ async function sendPromptToLLM(prompt, schema) {
97
124
  response_format: zodResponseFormat(schema, 'event_description'),
98
125
  });
99
126
 
100
- return completion.choices[0].message.parsed;
127
+ return {
128
+ descriptions: completion.choices[0].message.parsed,
129
+ usage: completion.usage,
130
+ };
101
131
  } catch (error) {
102
132
  console.error('Error during LLM response parsing:', error);
103
133
  return null;
@@ -115,36 +145,58 @@ async function generateEventDescription(eventName, event, codebaseDir) {
115
145
  const eventDescriptionSchema = createEventDescriptionSchema(properties);
116
146
 
117
147
  // Send prompt to the LLM and get the structured response
118
- const descriptions = await sendPromptToLLM(prompt, eventDescriptionSchema);
148
+ const { descriptions, usage } = await sendPromptToLLM(prompt, eventDescriptionSchema);
119
149
 
120
- return { eventName, descriptions };
150
+ return { eventName, descriptions, usage };
121
151
  }
122
152
 
123
153
  async function generateDescriptions(events, codebaseDir) {
154
+ console.log(`Generating descriptions using ${model}`);
155
+
124
156
  const eventPromises = Object.entries(events).map(([eventName, event]) =>
125
157
  generateEventDescription(eventName, event, codebaseDir)
126
158
  );
127
159
 
160
+ console.log(`Running ${eventPromises.length} prompts in parallel...`);
161
+
128
162
  const results = await Promise.all(eventPromises);
129
163
 
164
+ let promptTokens = 0;
165
+ let completionTokens = 0;
166
+
130
167
  // Process results and update the events object
131
- results.forEach(({ eventName, descriptions }) => {
168
+ results.forEach(({ eventName, descriptions, usage }) => {
132
169
  if (descriptions) {
170
+ promptTokens += usage.prompt_tokens;
171
+ completionTokens += usage.completion_tokens;
172
+
133
173
  const event = events[eventName];
134
174
  event.description = descriptions.eventDescription;
135
175
 
136
- // Update property descriptions
137
- for (const propName in descriptions.properties) {
138
- if (event.properties[propName]) {
139
- event.properties[propName].description = descriptions.properties[propName].description;
176
+ // Update property descriptions recursively
177
+ function updatePropertyDescriptions(eventProperties, descriptionProperties) {
178
+ for (const propName in descriptionProperties) {
179
+ if (eventProperties[propName]) {
180
+ eventProperties[propName].description = descriptionProperties[propName].description;
181
+ if (eventProperties[propName].properties && descriptionProperties[propName].properties) {
182
+ updatePropertyDescriptions(
183
+ eventProperties[propName].properties,
184
+ descriptionProperties[propName].properties
185
+ );
186
+ }
187
+ }
140
188
  }
141
189
  }
142
190
 
191
+ updatePropertyDescriptions(event.properties, descriptions.properties);
192
+
143
193
  // Update implementations with descriptions
144
194
  for (let i = 0; i < descriptions.implementations.length; i++) {
145
195
  if (event.implementations[i]) {
146
- if (event.implementations[i].path === descriptions.implementations[i].path &&
147
- event.implementations[i].line === descriptions.implementations[i].line) {
196
+ if (
197
+ event.implementations[i].path === descriptions.implementations[i].path &&
198
+ event.implementations[i].line === descriptions.implementations[i].line
199
+ ) {
148
200
  event.implementations[i].description = descriptions.implementations[i].description;
149
201
  } else {
150
202
  console.error(`Returned implementation description does not match path or line for event: ${eventName}`);
@@ -156,6 +208,10 @@ async function generateDescriptions(events, codebaseDir) {
156
208
  }
157
209
  });
158
210
 
211
+ console.log(`Prompt tokens used: ${promptTokens}`);
212
+ console.log(`Completion tokens used: ${completionTokens}`);
213
+ console.log(`Total tokens used: ${promptTokens + completionTokens}`);
214
+
159
215
  return events;
160
216
  }
161
217
 
package/src/index.js CHANGED
@@ -4,7 +4,7 @@ const { generateYamlSchema } = require('./yamlGenerator');
4
4
  const { generateDescriptions } = require('./generateDescriptions');
5
5
 
6
6
  async function run(targetDir, outputPath, customFunction, customSourceDetails, generateDescription) {
7
- let events = analyzeDirectory(targetDir, customFunction);
7
+ let events = await analyzeDirectory(targetDir, customFunction);
8
8
  if (generateDescription) {
9
9
  events = await generateDescriptions(events, targetDir);
10
10
  }