@flisk/analyze-tracking 0.2.9 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -1
- package/bin/cli.js +15 -1
- package/bin/help.js +9 -1
- package/package.json +4 -2
- package/schema.json +8 -0
- package/src/generateDescriptions.js +218 -0
- package/src/index.js +6 -2
package/README.md
CHANGED
|
@@ -24,9 +24,12 @@ npx @flisk/analyze-tracking /path/to/project [options]
|
|
|
24
24
|
```
|
|
25
25
|
|
|
26
26
|
### Key Options:
|
|
27
|
+
- `-g, --generateDescription`: Generate descriptions of fields (default: `false`)
|
|
27
28
|
- `-o, --output <output_file>`: Name of the output file (default: `tracking-schema.yaml`)
|
|
28
29
|
- `-c, --customFunction <function_name>`: Specify a custom tracking function
|
|
29
30
|
|
|
31
|
+
🔑 **Important:** you must set the `OPENAI_API_KEY` environment variable to use `generateDescription`
|
|
32
|
+
|
|
30
33
|
<details>
|
|
31
34
|
<summary>Note on Custom Functions 💡</summary>
|
|
32
35
|
|
|
@@ -53,18 +56,23 @@ source:
|
|
|
53
56
|
timestamp: <commit_timestamp>
|
|
54
57
|
events:
|
|
55
58
|
<event_name>:
|
|
59
|
+
description: <ai_generated_description>
|
|
56
60
|
implementations:
|
|
57
|
-
-
|
|
61
|
+
- description: <ai_generated_description>
|
|
62
|
+
path: <path_to_file>
|
|
58
63
|
line: <line_number>
|
|
59
64
|
function: <function_name>
|
|
60
65
|
destination: <platform_name>
|
|
61
66
|
properties:
|
|
62
67
|
<property_name>:
|
|
68
|
+
description: <ai_generated_description>
|
|
63
69
|
type: <property_type>
|
|
64
70
|
```
|
|
65
71
|
|
|
66
72
|
Use this to understand where your events live in the code and how they’re being tracked.
|
|
67
73
|
|
|
74
|
+
[GPT-4o mini](https://platform.openai.com/docs/models/gpt-4o-mini) is used for generating descriptions of events, properties, and implementations.
|
|
75
|
+
|
|
68
76
|
See [schema.json](schema.json) for a JSON Schema of the output.
|
|
69
77
|
|
|
70
78
|
|
package/bin/cli.js
CHANGED
|
@@ -13,6 +13,12 @@ const optionDefinitions = [
|
|
|
13
13
|
type: String,
|
|
14
14
|
defaultOption: true,
|
|
15
15
|
},
|
|
16
|
+
{
|
|
17
|
+
name: 'generateDescription',
|
|
18
|
+
alias: 'g',
|
|
19
|
+
type: Boolean,
|
|
20
|
+
defaultValue: false,
|
|
21
|
+
},
|
|
16
22
|
{
|
|
17
23
|
name: 'output',
|
|
18
24
|
alias: 'o',
|
|
@@ -48,6 +54,7 @@ const optionDefinitions = [
|
|
|
48
54
|
const options = commandLineArgs(optionDefinitions);
|
|
49
55
|
const {
|
|
50
56
|
targetDir,
|
|
57
|
+
generateDescription,
|
|
51
58
|
output,
|
|
52
59
|
customFunction,
|
|
53
60
|
repositoryUrl,
|
|
@@ -73,4 +80,11 @@ if (!targetDir) {
|
|
|
73
80
|
process.exit(1);
|
|
74
81
|
}
|
|
75
82
|
|
|
76
|
-
|
|
83
|
+
if (generateDescription) {
|
|
84
|
+
if (!process.env.OPENAI_API_KEY) {
|
|
85
|
+
console.error('Please set the `OPENAI_API_KEY` environment variable to use `generateDescription`.');
|
|
86
|
+
process.exit(1);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
run(path.resolve(targetDir), output, customFunction, customSourceDetails, generateDescription);
|
package/bin/help.js
CHANGED
|
@@ -38,6 +38,14 @@ const helpContent = [
|
|
|
38
38
|
description: 'Display this usage guide.',
|
|
39
39
|
type: Boolean
|
|
40
40
|
},
|
|
41
|
+
{
|
|
42
|
+
name: 'generateDescription',
|
|
43
|
+
alias: 'g',
|
|
44
|
+
description: 'Generate descriptions of fields.',
|
|
45
|
+
type: Boolean,
|
|
46
|
+
defaultValue: false,
|
|
47
|
+
typeLabel: '{underline false}'
|
|
48
|
+
},
|
|
41
49
|
{
|
|
42
50
|
name: 'output',
|
|
43
51
|
alias: 'o',
|
|
@@ -51,7 +59,7 @@ const helpContent = [
|
|
|
51
59
|
alias: 'c',
|
|
52
60
|
description: 'Specify a custom tracking function.',
|
|
53
61
|
type: String,
|
|
54
|
-
typeLabel: '{
|
|
62
|
+
typeLabel: '{italic yourCustomFunctionName}'
|
|
55
63
|
}
|
|
56
64
|
]
|
|
57
65
|
},
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@flisk/analyze-tracking",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "Analyzes tracking code in a project and generates data schemas",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -29,7 +29,9 @@
|
|
|
29
29
|
"command-line-usage": "^7.0.3",
|
|
30
30
|
"isomorphic-git": "^1.27.1",
|
|
31
31
|
"js-yaml": "^4.1.0",
|
|
32
|
-
"
|
|
32
|
+
"openai": "^4.67.1",
|
|
33
|
+
"typescript": "^5.5.4",
|
|
34
|
+
"zod": "^3.23.8"
|
|
33
35
|
},
|
|
34
36
|
"devDependencies": {
|
|
35
37
|
"jest": "^29.7.0"
|
package/schema.json
CHANGED
|
@@ -71,6 +71,10 @@
|
|
|
71
71
|
"unknown"
|
|
72
72
|
],
|
|
73
73
|
"description": "Name of the platform where the event is sent"
|
|
74
|
+
},
|
|
75
|
+
"description": {
|
|
76
|
+
"type": "string",
|
|
77
|
+
"description": "Description of how the event is triggered"
|
|
74
78
|
}
|
|
75
79
|
},
|
|
76
80
|
"required": [
|
|
@@ -89,6 +93,10 @@
|
|
|
89
93
|
"$ref": "#/definitions/property"
|
|
90
94
|
}
|
|
91
95
|
}
|
|
96
|
+
},
|
|
97
|
+
"description": {
|
|
98
|
+
"type": "string",
|
|
99
|
+
"description": "Description of the event"
|
|
92
100
|
}
|
|
93
101
|
},
|
|
94
102
|
"required": [
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
const OpenAI = require('openai');
|
|
4
|
+
const { z } = require('zod');
|
|
5
|
+
const { zodResponseFormat } = require('openai/helpers/zod');
|
|
6
|
+
|
|
7
|
+
const openai = new OpenAI({
|
|
8
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
9
|
+
});
|
|
10
|
+
const model = 'gpt-4o-mini';
|
|
11
|
+
|
|
12
|
+
function createPrompt(eventName, properties, implementations, codebaseDir) {
|
|
13
|
+
let prompt = `Event Name: "${eventName}"\n\n`;
|
|
14
|
+
prompt += `Properties:\n`;
|
|
15
|
+
|
|
16
|
+
function appendPropertiesToPrompt(properties, indent = '') {
|
|
17
|
+
for (const propName in properties) {
|
|
18
|
+
const prop = properties[propName];
|
|
19
|
+
prompt += `${indent}- "${propName}" (type: ${prop.type})\n`;
|
|
20
|
+
if (prop.properties) {
|
|
21
|
+
prompt += `${indent} Sub-properties:\n`;
|
|
22
|
+
appendPropertiesToPrompt(prop.properties, indent + ' ');
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
appendPropertiesToPrompt(properties);
|
|
28
|
+
|
|
29
|
+
// Add implementations with code snippets
|
|
30
|
+
prompt += `\nImplementations:\n`;
|
|
31
|
+
for (const impl of implementations) {
|
|
32
|
+
const codeSnippet = getCodeSnippet(path.join(codebaseDir, impl.path), impl.line);
|
|
33
|
+
prompt += `- Path: "${impl.path}", Line: ${impl.line}, Function: "${impl.function}", Destination: "${impl.destination}"\n`;
|
|
34
|
+
prompt += `Code Snippet:\n`;
|
|
35
|
+
prompt += '```\n';
|
|
36
|
+
prompt += codeSnippet + '\n';
|
|
37
|
+
prompt += '```\n';
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return prompt;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function getCodeSnippet(filePath, lineNumber, contextLines = 5) {
|
|
44
|
+
// Extract a code snippet from the file around the specified line
|
|
45
|
+
try {
|
|
46
|
+
const fileContent = fs.readFileSync(filePath, 'utf8');
|
|
47
|
+
const lines = fileContent.split('\n');
|
|
48
|
+
const startLine = Math.max(0, lineNumber - contextLines - 1);
|
|
49
|
+
const endLine = Math.min(lines.length, lineNumber + contextLines);
|
|
50
|
+
|
|
51
|
+
const snippetLines = lines.slice(startLine, endLine);
|
|
52
|
+
return snippetLines.join('\n');
|
|
53
|
+
} catch (e) {
|
|
54
|
+
console.error(`Failed to read file ${filePath}:`, e);
|
|
55
|
+
return '';
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function createEventDescriptionSchema(properties) {
|
|
60
|
+
function buildPropertySchema(prop) {
|
|
61
|
+
if (prop.properties) {
|
|
62
|
+
const subPropertiesSchema = {};
|
|
63
|
+
for (const subPropName in prop.properties) {
|
|
64
|
+
subPropertiesSchema[subPropName] = buildPropertySchema(prop.properties[subPropName]);
|
|
65
|
+
}
|
|
66
|
+
return z.object({
|
|
67
|
+
description: z
|
|
68
|
+
.string()
|
|
69
|
+
.describe('A maximum of 10 words describing the property and what it means'),
|
|
70
|
+
properties: z.object(subPropertiesSchema),
|
|
71
|
+
});
|
|
72
|
+
} else {
|
|
73
|
+
return z.object({
|
|
74
|
+
description: z
|
|
75
|
+
.string()
|
|
76
|
+
.describe('A maximum of 10 words describing the property and what it means'),
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Define the schema for properties
|
|
82
|
+
const propertiesSchema = {};
|
|
83
|
+
for (const propName in properties) {
|
|
84
|
+
propertiesSchema[propName] = buildPropertySchema(properties[propName]);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Define the schema for implementations
|
|
88
|
+
const implementationsSchema = z.array(
|
|
89
|
+
z.object({
|
|
90
|
+
description: z
|
|
91
|
+
.string()
|
|
92
|
+
.describe('A maximum of 10 words describing how this event is triggered without using the word "triggered"'),
|
|
93
|
+
path: z.string(),
|
|
94
|
+
line: z.number(),
|
|
95
|
+
})
|
|
96
|
+
);
|
|
97
|
+
|
|
98
|
+
// Construct the full schema
|
|
99
|
+
const eventDescriptionSchema = z.object({
|
|
100
|
+
eventDescription: z
|
|
101
|
+
.string()
|
|
102
|
+
.describe('A maximum of 10 words describing the event and what it tracks without using the word "tracks"'),
|
|
103
|
+
properties: z.object(propertiesSchema),
|
|
104
|
+
implementations: implementationsSchema,
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
return eventDescriptionSchema;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
async function sendPromptToLLM(prompt, schema) {
|
|
111
|
+
try {
|
|
112
|
+
const completion = await openai.beta.chat.completions.parse({
|
|
113
|
+
model,
|
|
114
|
+
messages: [
|
|
115
|
+
{
|
|
116
|
+
role: 'system',
|
|
117
|
+
content: 'You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations.',
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
role: 'user',
|
|
121
|
+
content: prompt,
|
|
122
|
+
},
|
|
123
|
+
],
|
|
124
|
+
response_format: zodResponseFormat(schema, 'event_description'),
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
descriptions: completion.choices[0].message.parsed,
|
|
129
|
+
usage: completion.usage,
|
|
130
|
+
};
|
|
131
|
+
} catch (error) {
|
|
132
|
+
console.error('Error during LLM response parsing:', error);
|
|
133
|
+
return null;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
async function generateEventDescription(eventName, event, codebaseDir) {
|
|
138
|
+
const properties = event.properties || {};
|
|
139
|
+
const implementations = event.implementations || [];
|
|
140
|
+
|
|
141
|
+
// Create prompt for the LLM
|
|
142
|
+
const prompt = createPrompt(eventName, properties, implementations, codebaseDir);
|
|
143
|
+
|
|
144
|
+
// Define the output schema using Zod
|
|
145
|
+
const eventDescriptionSchema = createEventDescriptionSchema(properties);
|
|
146
|
+
|
|
147
|
+
// Send prompt to the LLM and get the structured response
|
|
148
|
+
const { descriptions, usage } = await sendPromptToLLM(prompt, eventDescriptionSchema);
|
|
149
|
+
|
|
150
|
+
return { eventName, descriptions, usage };
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
async function generateDescriptions(events, codebaseDir) {
|
|
154
|
+
console.log(`Generating descriptions using ${model}`);
|
|
155
|
+
|
|
156
|
+
const eventPromises = Object.entries(events).map(([eventName, event]) =>
|
|
157
|
+
generateEventDescription(eventName, event, codebaseDir)
|
|
158
|
+
);
|
|
159
|
+
|
|
160
|
+
console.log(`Running ${eventPromises.length} prompts in parallel...`);
|
|
161
|
+
|
|
162
|
+
const results = await Promise.all(eventPromises);
|
|
163
|
+
|
|
164
|
+
let promptTokens = 0;
|
|
165
|
+
let completionTokens = 0;
|
|
166
|
+
|
|
167
|
+
// Process results and update the events object
|
|
168
|
+
results.forEach(({ eventName, descriptions, usage }) => {
|
|
169
|
+
if (descriptions) {
|
|
170
|
+
promptTokens += usage.prompt_tokens;
|
|
171
|
+
completionTokens += usage.completion_tokens;
|
|
172
|
+
|
|
173
|
+
const event = events[eventName];
|
|
174
|
+
event.description = descriptions.eventDescription;
|
|
175
|
+
|
|
176
|
+
// Update property descriptions recursively
|
|
177
|
+
function updatePropertyDescriptions(eventProperties, descriptionProperties) {
|
|
178
|
+
for (const propName in descriptionProperties) {
|
|
179
|
+
if (eventProperties[propName]) {
|
|
180
|
+
eventProperties[propName].description = descriptionProperties[propName].description;
|
|
181
|
+
if (eventProperties[propName].properties && descriptionProperties[propName].properties) {
|
|
182
|
+
updatePropertyDescriptions(
|
|
183
|
+
eventProperties[propName].properties,
|
|
184
|
+
descriptionProperties[propName].properties
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
updatePropertyDescriptions(event.properties, descriptions.properties);
|
|
192
|
+
|
|
193
|
+
// Update implementations with descriptions
|
|
194
|
+
for (let i = 0; i < descriptions.implementations.length; i++) {
|
|
195
|
+
if (event.implementations[i]) {
|
|
196
|
+
if (
|
|
197
|
+
event.implementations[i].path === descriptions.implementations[i].path &&
|
|
198
|
+
event.implementations[i].line === descriptions.implementations[i].line
|
|
199
|
+
) {
|
|
200
|
+
event.implementations[i].description = descriptions.implementations[i].description;
|
|
201
|
+
} else {
|
|
202
|
+
console.error(`Returned implementation description does not match path or line for event: ${eventName}`);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
} else {
|
|
207
|
+
console.error(`Failed to get description for event: ${eventName}`);
|
|
208
|
+
}
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
console.log(`Prompt tokens used: ${promptTokens}`);
|
|
212
|
+
console.log(`Completion tokens used: ${completionTokens}`);
|
|
213
|
+
console.log(`Total tokens used: ${promptTokens + completionTokens}`);
|
|
214
|
+
|
|
215
|
+
return events;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
module.exports = { generateDescriptions };
|
package/src/index.js
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
const { analyzeDirectory } = require('./analyze');
|
|
2
2
|
const { getRepoDetails } = require('./repoDetails');
|
|
3
3
|
const { generateYamlSchema } = require('./yamlGenerator');
|
|
4
|
+
const { generateDescriptions } = require('./generateDescriptions');
|
|
4
5
|
|
|
5
|
-
async function run(targetDir, outputPath, customFunction, customSourceDetails) {
|
|
6
|
-
|
|
6
|
+
async function run(targetDir, outputPath, customFunction, customSourceDetails, generateDescription) {
|
|
7
|
+
let events = analyzeDirectory(targetDir, customFunction);
|
|
8
|
+
if (generateDescription) {
|
|
9
|
+
events = await generateDescriptions(events, targetDir);
|
|
10
|
+
}
|
|
7
11
|
const repoDetails = await getRepoDetails(targetDir, customSourceDetails);
|
|
8
12
|
generateYamlSchema(events, repoDetails, outputPath);
|
|
9
13
|
}
|