@flisk/analyze-tracking 0.2.8 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,9 +24,12 @@ npx @flisk/analyze-tracking /path/to/project [options]
24
24
  ```
25
25
 
26
26
  ### Key Options:
27
+ - `-g, --generateDescription`: Generate descriptions of fields (default: `false`)
27
28
  - `-o, --output <output_file>`: Name of the output file (default: `tracking-schema.yaml`)
28
29
  - `-c, --customFunction <function_name>`: Specify a custom tracking function
29
30
 
31
+ 🔑&nbsp; **Important:** you must set the `OPENAI_API_KEY` environment variable to use `generateDescription`
32
+
30
33
  <details>
31
34
  <summary>Note on Custom Functions 💡</summary>
32
35
 
@@ -53,18 +56,23 @@ source:
53
56
  timestamp: <commit_timestamp>
54
57
  events:
55
58
  <event_name>:
59
+ description: <ai_generated_description>
56
60
  implementations:
57
- - path: <path_to_file>
61
+ - description: <ai_generated_description>
62
+ path: <path_to_file>
58
63
  line: <line_number>
59
64
  function: <function_name>
60
65
  destination: <platform_name>
61
66
  properties:
62
67
  <property_name>:
68
+ description: <ai_generated_description>
63
69
  type: <property_type>
64
70
  ```
65
71
 
66
72
  Use this to understand where your events live in the code and how they’re being tracked.
67
73
 
74
+ [GPT-4o mini](https://platform.openai.com/docs/models/gpt-4o-mini) is used for generating descriptions of events, properties, and implementations.
75
+
68
76
  See [schema.json](schema.json) for a JSON Schema of the output.
69
77
 
70
78
 
package/bin/cli.js CHANGED
@@ -13,6 +13,12 @@ const optionDefinitions = [
13
13
  type: String,
14
14
  defaultOption: true,
15
15
  },
16
+ {
17
+ name: 'generateDescription',
18
+ alias: 'g',
19
+ type: Boolean,
20
+ defaultValue: false,
21
+ },
16
22
  {
17
23
  name: 'output',
18
24
  alias: 'o',
@@ -48,6 +54,7 @@ const optionDefinitions = [
48
54
  const options = commandLineArgs(optionDefinitions);
49
55
  const {
50
56
  targetDir,
57
+ generateDescription,
51
58
  output,
52
59
  customFunction,
53
60
  repositoryUrl,
@@ -73,4 +80,11 @@ if (!targetDir) {
73
80
  process.exit(1);
74
81
  }
75
82
 
76
- run(path.resolve(targetDir), output, customFunction, customSourceDetails);
83
+ if (generateDescription) {
84
+ if (!process.env.OPENAI_API_KEY) {
85
+ console.error('Please set the `OPENAI_API_KEY` environment variable to use `generateDescription`.');
86
+ process.exit(1);
87
+ }
88
+ }
89
+
90
+ run(path.resolve(targetDir), output, customFunction, customSourceDetails, generateDescription);
package/bin/help.js CHANGED
@@ -38,6 +38,14 @@ const helpContent = [
38
38
  description: 'Display this usage guide.',
39
39
  type: Boolean
40
40
  },
41
+ {
42
+ name: 'generateDescription',
43
+ alias: 'g',
44
+ description: 'Generate descriptions of fields.',
45
+ type: Boolean,
46
+ defaultValue: false,
47
+ typeLabel: '{underline false}'
48
+ },
41
49
  {
42
50
  name: 'output',
43
51
  alias: 'o',
@@ -51,7 +59,7 @@ const helpContent = [
51
59
  alias: 'c',
52
60
  description: 'Specify a custom tracking function.',
53
61
  type: String,
54
- typeLabel: '{underline yourCustomFunctionName}'
62
+ typeLabel: '{italic yourCustomFunctionName}'
55
63
  }
56
64
  ]
57
65
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flisk/analyze-tracking",
3
- "version": "0.2.8",
3
+ "version": "0.3.0",
4
4
  "description": "Analyzes tracking code in a project and generates data schemas",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -29,7 +29,9 @@
29
29
  "command-line-usage": "^7.0.3",
30
30
  "isomorphic-git": "^1.27.1",
31
31
  "js-yaml": "^4.1.0",
32
- "typescript": "^5.5.4"
32
+ "openai": "^4.67.1",
33
+ "typescript": "^5.5.4",
34
+ "zod": "^3.23.8"
33
35
  },
34
36
  "devDependencies": {
35
37
  "jest": "^29.7.0"
@@ -10,47 +10,61 @@ const parserOptions = { ecmaVersion: 'latest', sourceType: 'module', locations:
10
10
  extend(walk.base);
11
11
 
12
12
  function analyzeJsFile(filePath, customFunction) {
13
- const code = fs.readFileSync(filePath, 'utf8');
14
- const ast = parser.parse(code, parserOptions);
15
- const events = [];
16
-
17
- walk.ancestor(ast, {
18
- CallExpression(node, ancestors) {
19
- const source = detectSourceJs(node, customFunction);
20
- if (source === 'unknown') return;
21
-
22
- let eventName = null;
23
- let propertiesNode = null;
24
-
25
- if (source === 'googleanalytics' && node.arguments.length >= 3) {
26
- eventName = node.arguments[1]?.value || null;
27
- propertiesNode = node.arguments[2];
28
- } else if (source === 'snowplow' && node.arguments.length >= 2) {
29
- const actionProperty = node.arguments[1].properties.find(prop => prop.key.name === 'action');
30
- eventName = actionProperty ? actionProperty.value.value : null;
31
- propertiesNode = node.arguments[1];
32
- } else if (node.arguments.length >= 2) {
33
- eventName = node.arguments[0]?.value || null;
34
- propertiesNode = node.arguments[1];
35
- }
36
-
37
- const line = node.loc.start.line;
38
- const functionName = findWrappingFunctionJs(node, ancestors);
39
-
40
- if (eventName && propertiesNode && propertiesNode.type === 'ObjectExpression') {
41
- const properties = extractJsProperties(propertiesNode);
42
-
43
- events.push({
44
- eventName,
45
- source,
46
- properties,
47
- filePath,
48
- line,
49
- functionName
50
- });
51
- }
52
- },
53
- });
13
+ let events = [];
14
+ try {
15
+ const code = fs.readFileSync(filePath, 'utf8');
16
+ let ast;
17
+ try {
18
+ ast = parser.parse(code, parserOptions);
19
+ } catch (parseError) {
20
+ console.error(`Error parsing file ${filePath}`);
21
+ return events; // Return empty events array if parsing fails
22
+ }
23
+
24
+ walk.ancestor(ast, {
25
+ CallExpression(node, ancestors) {
26
+ try {
27
+ const source = detectSourceJs(node, customFunction);
28
+ if (source === 'unknown') return;
29
+
30
+ let eventName = null;
31
+ let propertiesNode = null;
32
+
33
+ if (source === 'googleanalytics' && node.arguments.length >= 3) {
34
+ eventName = node.arguments[1]?.value || null;
35
+ propertiesNode = node.arguments[2];
36
+ } else if (source === 'snowplow' && node.arguments.length >= 2) {
37
+ const actionProperty = node.arguments[1].properties.find(prop => prop.key.name === 'action');
38
+ eventName = actionProperty ? actionProperty.value.value : null;
39
+ propertiesNode = node.arguments[1];
40
+ } else if (node.arguments.length >= 2) {
41
+ eventName = node.arguments[0]?.value || null;
42
+ propertiesNode = node.arguments[1];
43
+ }
44
+
45
+ const line = node.loc.start.line;
46
+ const functionName = findWrappingFunctionJs(node, ancestors);
47
+
48
+ if (eventName && propertiesNode && propertiesNode.type === 'ObjectExpression') {
49
+ const properties = extractJsProperties(propertiesNode);
50
+
51
+ events.push({
52
+ eventName,
53
+ source,
54
+ properties,
55
+ filePath,
56
+ line,
57
+ functionName
58
+ });
59
+ }
60
+ } catch (nodeError) {
61
+ console.error(`Error processing node in ${filePath}`);
62
+ }
63
+ },
64
+ });
65
+ } catch (fileError) {
66
+ console.error(`Error reading or processing file ${filePath}`);
67
+ }
54
68
 
55
69
  return events;
56
70
  }
@@ -2,49 +2,66 @@ const ts = require('typescript');
2
2
  const { detectSourceTs, findWrappingFunctionTs, extractTsProperties } = require('./helpers');
3
3
 
4
4
  function analyzeTsFile(filePath, program, customFunction) {
5
- const sourceFile = program.getSourceFile(filePath);
6
- const checker = program.getTypeChecker();
7
- const events = [];
8
-
9
- function visit(node) {
10
- if (ts.isCallExpression(node)) {
11
- const source = detectSourceTs(node, customFunction);
12
- if (source === 'unknown') return;
13
-
14
- let eventName = null;
15
- let propertiesNode = null;
16
-
17
- if (source === 'googleanalytics' && node.arguments.length >= 3) {
18
- eventName = node.arguments[1]?.text || null;
19
- propertiesNode = node.arguments[2];
20
- } else if (source === 'snowplow' && node.arguments.length >= 2) {
21
- const actionProperty = node.arguments[1].properties.find(prop => prop.name.escapedText === 'action');
22
- eventName = actionProperty ? actionProperty.initializer.text : null;
23
- propertiesNode = node.arguments[1];
24
- } else if (node.arguments.length >= 2) {
25
- eventName = node.arguments[0]?.text || null;
26
- propertiesNode = node.arguments[1];
27
- }
5
+ let events = [];
6
+ try {
7
+ const sourceFile = program.getSourceFile(filePath);
8
+ if (!sourceFile) {
9
+ console.error(`Error: Unable to get source file for ${filePath}`);
10
+ return events;
11
+ }
12
+
13
+ const checker = program.getTypeChecker();
14
+
15
+ function visit(node) {
16
+ try {
17
+ if (ts.isCallExpression(node)) {
18
+ const source = detectSourceTs(node, customFunction);
19
+ if (source === 'unknown') return;
20
+
21
+ let eventName = null;
22
+ let propertiesNode = null;
28
23
 
29
- const line = sourceFile.getLineAndCharacterOfPosition(node.getStart()).line + 1;
30
- const functionName = findWrappingFunctionTs(node);
31
-
32
- if (eventName && propertiesNode && ts.isObjectLiteralExpression(propertiesNode)) {
33
- const properties = extractTsProperties(checker, propertiesNode);
34
- events.push({
35
- eventName,
36
- source,
37
- properties,
38
- filePath,
39
- line,
40
- functionName
41
- });
24
+ if (source === 'googleanalytics' && node.arguments.length >= 3) {
25
+ eventName = node.arguments[1]?.text || null;
26
+ propertiesNode = node.arguments[2];
27
+ } else if (source === 'snowplow' && node.arguments.length >= 2) {
28
+ const actionProperty = node.arguments[1].properties.find(prop => prop.name.escapedText === 'action');
29
+ eventName = actionProperty ? actionProperty.initializer.text : null;
30
+ propertiesNode = node.arguments[1];
31
+ } else if (node.arguments.length >= 2) {
32
+ eventName = node.arguments[0]?.text || null;
33
+ propertiesNode = node.arguments[1];
34
+ }
35
+
36
+ const line = sourceFile.getLineAndCharacterOfPosition(node.getStart()).line + 1;
37
+ const functionName = findWrappingFunctionTs(node);
38
+
39
+ if (eventName && propertiesNode && ts.isObjectLiteralExpression(propertiesNode)) {
40
+ try {
41
+ const properties = extractTsProperties(checker, propertiesNode);
42
+ events.push({
43
+ eventName,
44
+ source,
45
+ properties,
46
+ filePath,
47
+ line,
48
+ functionName
49
+ });
50
+ } catch (propertyError) {
51
+ console.error(`Error extracting properties in ${filePath} at line ${line}`);
52
+ }
53
+ }
54
+ }
55
+ ts.forEachChild(node, visit);
56
+ } catch (nodeError) {
57
+ console.error(`Error processing node in ${filePath}`);
42
58
  }
43
59
  }
44
- ts.forEachChild(node, visit);
45
- }
46
60
 
47
- ts.forEachChild(sourceFile, visit);
61
+ ts.forEachChild(sourceFile, visit);
62
+ } catch (fileError) {
63
+ console.error(`Error analyzing TypeScript file ${filePath}`);
64
+ }
48
65
 
49
66
  return events;
50
67
  }
@@ -0,0 +1,162 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const OpenAI = require('openai');
4
+ const { z } = require('zod');
5
+ const { zodResponseFormat } = require('openai/helpers/zod');
6
+
7
+ const openai = new OpenAI({
8
+ apiKey: process.env.OPENAI_API_KEY,
9
+ });
10
+
11
+ function createPrompt(eventName, properties, implementations, codebaseDir) {
12
+ // Initialize the prompt
13
+ let prompt = `You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations.\n\n`;
14
+
15
+ // Add event name
16
+ prompt += `Event Name: "${eventName}"\n\n`;
17
+
18
+ // Add properties
19
+ prompt += `Properties:\n`;
20
+ for (const propName in properties) {
21
+ const prop = properties[propName];
22
+ prompt += `- "${propName}" (type: ${prop.type})\n`;
23
+ }
24
+
25
+ // Add implementations with code snippets
26
+ prompt += `\nImplementations:\n`;
27
+ for (const impl of implementations) {
28
+ const codeSnippet = getCodeSnippet(path.join(codebaseDir, impl.path), impl.line);
29
+ prompt += `- Path: "${impl.path}", Line: ${impl.line}, Function: "${impl.function}", Destination: "${impl.destination}"\n`;
30
+ prompt += `Code Snippet:\n`;
31
+ prompt += '```\n';
32
+ prompt += codeSnippet + '\n';
33
+ prompt += '```\n';
34
+ }
35
+
36
+ return prompt;
37
+ }
38
+
39
+ function getCodeSnippet(filePath, lineNumber, contextLines = 5) {
40
+ // Extract a code snippet from the file around the specified line
41
+ try {
42
+ const fileContent = fs.readFileSync(filePath, 'utf8');
43
+ const lines = fileContent.split('\n');
44
+ const startLine = Math.max(0, lineNumber - contextLines - 1);
45
+ const endLine = Math.min(lines.length, lineNumber + contextLines);
46
+
47
+ const snippetLines = lines.slice(startLine, endLine);
48
+ return snippetLines.join('\n');
49
+ } catch (e) {
50
+ console.error(`Failed to read file ${filePath}:`, e);
51
+ return '';
52
+ }
53
+ }
54
+
55
+ function createEventDescriptionSchema(properties) {
56
+ // Define the schema for properties
57
+ const propertiesSchema = {};
58
+ for (const propName in properties) {
59
+ propertiesSchema[propName] = z.object({
60
+ description: z.string().describe('A maximum of 10 words describing the property and what it means'),
61
+ });
62
+ }
63
+
64
+ // Define the schema for implementations
65
+ const implementationsSchema = z.array(
66
+ z.object({
67
+ description: z.string().describe('A maximum of 10 words describing when this event is triggered'),
68
+ path: z.string(),
69
+ line: z.number(),
70
+ })
71
+ );
72
+
73
+ // Construct the full schema
74
+ const eventDescriptionSchema = z.object({
75
+ eventDescription: z.string().describe('A maximum of 10 words describing the event and what it describes'),
76
+ properties: z.object(propertiesSchema),
77
+ implementations: implementationsSchema,
78
+ });
79
+
80
+ return eventDescriptionSchema;
81
+ }
82
+
83
+ async function sendPromptToLLM(prompt, schema) {
84
+ try {
85
+ const completion = await openai.beta.chat.completions.parse({
86
+ model: 'gpt-4o-mini',
87
+ messages: [
88
+ {
89
+ role: 'system',
90
+ content: 'You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations',
91
+ },
92
+ {
93
+ role: 'user',
94
+ content: prompt,
95
+ },
96
+ ],
97
+ response_format: zodResponseFormat(schema, 'event_description'),
98
+ });
99
+
100
+ return completion.choices[0].message.parsed;
101
+ } catch (error) {
102
+ console.error('Error during LLM response parsing:', error);
103
+ return null;
104
+ }
105
+ }
106
+
107
+ async function generateEventDescription(eventName, event, codebaseDir) {
108
+ const properties = event.properties || {};
109
+ const implementations = event.implementations || [];
110
+
111
+ // Create prompt for the LLM
112
+ const prompt = createPrompt(eventName, properties, implementations, codebaseDir);
113
+
114
+ // Define the output schema using Zod
115
+ const eventDescriptionSchema = createEventDescriptionSchema(properties);
116
+
117
+ // Send prompt to the LLM and get the structured response
118
+ const descriptions = await sendPromptToLLM(prompt, eventDescriptionSchema);
119
+
120
+ return { eventName, descriptions };
121
+ }
122
+
123
+ async function generateDescriptions(events, codebaseDir) {
124
+ const eventPromises = Object.entries(events).map(([eventName, event]) =>
125
+ generateEventDescription(eventName, event, codebaseDir)
126
+ );
127
+
128
+ const results = await Promise.all(eventPromises);
129
+
130
+ // Process results and update the events object
131
+ results.forEach(({ eventName, descriptions }) => {
132
+ if (descriptions) {
133
+ const event = events[eventName];
134
+ event.description = descriptions.eventDescription;
135
+
136
+ // Update property descriptions
137
+ for (const propName in descriptions.properties) {
138
+ if (event.properties[propName]) {
139
+ event.properties[propName].description = descriptions.properties[propName].description;
140
+ }
141
+ }
142
+
143
+ // Update implementations with descriptions
144
+ for (let i = 0; i < descriptions.implementations.length; i++) {
145
+ if (event.implementations[i]) {
146
+ if (event.implementations[i].path === descriptions.implementations[i].path &&
147
+ event.implementations[i].line === descriptions.implementations[i].line) {
148
+ event.implementations[i].description = descriptions.implementations[i].description;
149
+ } else {
150
+ console.error(`Returned implementation description does not match path or line for event: ${eventName}`);
151
+ }
152
+ }
153
+ }
154
+ } else {
155
+ console.error(`Failed to get description for event: ${eventName}`);
156
+ }
157
+ });
158
+
159
+ return events;
160
+ }
161
+
162
+ module.exports = { generateDescriptions };
package/src/index.js CHANGED
@@ -1,9 +1,13 @@
1
1
  const { analyzeDirectory } = require('./analyze');
2
2
  const { getRepoDetails } = require('./repoDetails');
3
3
  const { generateYamlSchema } = require('./yamlGenerator');
4
+ const { generateDescriptions } = require('./generateDescriptions');
4
5
 
5
- async function run(targetDir, outputPath, customFunction, customSourceDetails) {
6
- const events = analyzeDirectory(targetDir, customFunction);
6
+ async function run(targetDir, outputPath, customFunction, customSourceDetails, generateDescription) {
7
+ let events = analyzeDirectory(targetDir, customFunction);
8
+ if (generateDescription) {
9
+ events = await generateDescriptions(events, targetDir);
10
+ }
7
11
  const repoDetails = await getRepoDetails(targetDir, customSourceDetails);
8
12
  generateYamlSchema(events, repoDetails, outputPath);
9
13
  }
@@ -1,5 +1,6 @@
1
1
  const fs = require('fs');
2
2
  const git = require('isomorphic-git');
3
+ const { execSync } = require('child_process');
3
4
 
4
5
  async function getRepositoryUrl(targetDir) {
5
6
  try {
@@ -10,8 +11,13 @@ async function getRepositoryUrl(targetDir) {
10
11
  });
11
12
  return repoUrl.trim();
12
13
  } catch (error) {
13
- console.warn('Could not determine repository URL. Will exclude.');
14
- return null;
14
+ try {
15
+ const repoUrl = execSync('git config --get remote.origin.url', { cwd: targetDir, encoding: 'utf8' });
16
+ return repoUrl.trim();
17
+ } catch (error) {
18
+ console.warn('Could not determine repository URL. Will exclude.');
19
+ return null;
20
+ }
15
21
  }
16
22
  }
17
23
 
@@ -24,8 +30,13 @@ async function getCommitHash(targetDir) {
24
30
  });
25
31
  return commitHash.trim();
26
32
  } catch (error) {
27
- console.warn('Could not determine latest commit hash. Will exclude.');
28
- return null;
33
+ try {
34
+ const commitHash = execSync('git rev-parse HEAD', { cwd: targetDir, encoding: 'utf8' });
35
+ return commitHash.trim();
36
+ } catch (error) {
37
+ console.warn('Could not determine latest commit hash. Will exclude.');
38
+ return null;
39
+ }
29
40
  }
30
41
  }
31
42
 
@@ -39,8 +50,14 @@ async function getCommitTimestamp(targetDir, commitHash) {
39
50
  const unixTimeSeconds = commit.committer.timestamp;
40
51
  return new Date(unixTimeSeconds * 1000);
41
52
  } catch (error) {
42
- console.warn('Could not retrieve commit timestamp. Using current timestamp as default.');
43
- return new Date();
53
+ try {
54
+ const commitTimestamp = execSync(`git --no-pager show -s --format=%ct ${commitHash}`, { cwd: targetDir, encoding: 'utf8' });
55
+ const unixTimeSeconds = commitTimestamp.trim();
56
+ return new Date(unixTimeSeconds * 1000);
57
+ } catch (error) {
58
+ console.warn('Could not retrieve commit timestamp. Using current timestamp as default.');
59
+ return new Date();
60
+ }
44
61
  }
45
62
  }
46
63