@flisk/analyze-tracking 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/bin/cli.js +1 -1
- package/package.json +2 -1
- package/schema.json +8 -0
- package/src/analyze/analyzeRubyFile.js +290 -0
- package/src/analyze/index.js +23 -8
- package/src/fileProcessor.js +10 -4
- package/src/generateDescriptions.js +84 -28
- package/src/index.js +1 -1
package/README.md
CHANGED
|
@@ -205,5 +205,12 @@ See [schema.json](schema.json) for a JSON Schema of the output.
|
|
|
205
205
|
</details>
|
|
206
206
|
|
|
207
207
|
|
|
208
|
+
## Supported languages
|
|
209
|
+
|
|
210
|
+
- JavaScript
|
|
211
|
+
- TypeScript
|
|
212
|
+
- Ruby (Experimental - only supports Segment for now)
|
|
213
|
+
|
|
214
|
+
|
|
208
215
|
## Contribute
|
|
209
216
|
We’re actively improving this package. Found a bug? Want to request a feature? Open an issue or contribute directly!
|
package/bin/cli.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@flisk/analyze-tracking",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "Analyzes tracking code in a project and generates data schemas",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
},
|
|
21
21
|
"homepage": "https://github.com/fliskdata/analyze-tracking#readme",
|
|
22
22
|
"dependencies": {
|
|
23
|
+
"@ruby/prism": "^1.4.0",
|
|
23
24
|
"@typescript-eslint/parser": "^8.1.0",
|
|
24
25
|
"acorn": "^8.12.1",
|
|
25
26
|
"acorn-jsx": "^5.3.2",
|
package/schema.json
CHANGED
|
@@ -71,6 +71,10 @@
|
|
|
71
71
|
"unknown"
|
|
72
72
|
],
|
|
73
73
|
"description": "Name of the platform where the event is sent"
|
|
74
|
+
},
|
|
75
|
+
"description": {
|
|
76
|
+
"type": "string",
|
|
77
|
+
"description": "Description of how the event is triggered"
|
|
74
78
|
}
|
|
75
79
|
},
|
|
76
80
|
"required": [
|
|
@@ -89,6 +93,10 @@
|
|
|
89
93
|
"$ref": "#/definitions/property"
|
|
90
94
|
}
|
|
91
95
|
}
|
|
96
|
+
},
|
|
97
|
+
"description": {
|
|
98
|
+
"type": "string",
|
|
99
|
+
"description": "Description of the event"
|
|
92
100
|
}
|
|
93
101
|
},
|
|
94
102
|
"required": [
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
|
|
3
|
+
let parse = null;
|
|
4
|
+
|
|
5
|
+
// Create a visitor to traverse the AST
|
|
6
|
+
class TrackingVisitor {
|
|
7
|
+
constructor(code, filePath) {
|
|
8
|
+
this.code = code;
|
|
9
|
+
this.lines = code.split('\n');
|
|
10
|
+
this.ancestors = [];
|
|
11
|
+
this.events = [];
|
|
12
|
+
this.filePath = filePath;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
getLineNumber(location) {
|
|
16
|
+
// Count the number of newlines before the start offset
|
|
17
|
+
const beforeStart = this.code.slice(0, location.startOffset);
|
|
18
|
+
return beforeStart.split('\n').length;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
async findWrappingFunction(node, ancestors) {
|
|
22
|
+
const { DefNode, BlockNode, LambdaNode } = await import('@ruby/prism');
|
|
23
|
+
|
|
24
|
+
for (let i = ancestors.length - 1; i >= 0; i--) {
|
|
25
|
+
const current = ancestors[i];
|
|
26
|
+
|
|
27
|
+
// Handle method definitions
|
|
28
|
+
if (current instanceof DefNode) {
|
|
29
|
+
return current.name;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Handle blocks and lambdas
|
|
33
|
+
if (current instanceof BlockNode || current instanceof LambdaNode) {
|
|
34
|
+
return 'block';
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return 'global';
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
detectSource(node) {
|
|
41
|
+
if (!node) return null;
|
|
42
|
+
|
|
43
|
+
// Check for other analytics libraries
|
|
44
|
+
if (node.receiver) {
|
|
45
|
+
const objectName = node.receiver.name;
|
|
46
|
+
const methodName = node.name;
|
|
47
|
+
|
|
48
|
+
if (objectName === 'Analytics' && methodName === 'track') return 'segment';
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
extractEventName(node, source) {
|
|
55
|
+
if (source === 'segment') {
|
|
56
|
+
const params = node.arguments_.arguments_[0].elements;
|
|
57
|
+
const eventProperty = params.find(param => param?.key?.unescaped?.value === 'event');
|
|
58
|
+
return eventProperty?.value?.unescaped?.value || null;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async extractProperties(node, source) {
|
|
65
|
+
const { HashNode, ArrayNode } = await import('@ruby/prism');
|
|
66
|
+
|
|
67
|
+
if (source === 'segment') {
|
|
68
|
+
const params = node.arguments_.arguments_[0].elements;
|
|
69
|
+
const properties = {};
|
|
70
|
+
|
|
71
|
+
// Process all top-level fields except 'event'
|
|
72
|
+
for (const param of params) {
|
|
73
|
+
const key = param?.key?.unescaped?.value;
|
|
74
|
+
|
|
75
|
+
if (key && key !== 'event') {
|
|
76
|
+
const value = param?.value;
|
|
77
|
+
|
|
78
|
+
if (key === 'properties' && value instanceof HashNode) {
|
|
79
|
+
// Merge properties from the 'properties' hash into the top level
|
|
80
|
+
const nestedProperties = await this.extractHashProperties(value);
|
|
81
|
+
Object.assign(properties, nestedProperties);
|
|
82
|
+
} else if (value instanceof HashNode) {
|
|
83
|
+
// Handle other nested hash objects
|
|
84
|
+
const hashProperties = await this.extractHashProperties(value);
|
|
85
|
+
properties[key] = {
|
|
86
|
+
type: 'object',
|
|
87
|
+
properties: hashProperties
|
|
88
|
+
};
|
|
89
|
+
} else if (value instanceof ArrayNode) {
|
|
90
|
+
// Handle arrays
|
|
91
|
+
const arrayItems = await this.extractArrayItemProperties(value);
|
|
92
|
+
properties[key] = {
|
|
93
|
+
type: 'array',
|
|
94
|
+
items: arrayItems
|
|
95
|
+
};
|
|
96
|
+
} else {
|
|
97
|
+
// Handle primitive values
|
|
98
|
+
const valueType = await this.getValueType(value);
|
|
99
|
+
properties[key] = {
|
|
100
|
+
type: valueType
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return properties;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return null;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
async extractHashProperties(hashNode) {
|
|
113
|
+
const { AssocNode, HashNode, ArrayNode } = await import('@ruby/prism');
|
|
114
|
+
const properties = {};
|
|
115
|
+
|
|
116
|
+
for (const element of hashNode.elements) {
|
|
117
|
+
if (element instanceof AssocNode) {
|
|
118
|
+
const key = element.key.unescaped?.value;
|
|
119
|
+
const value = element.value;
|
|
120
|
+
|
|
121
|
+
if (key) {
|
|
122
|
+
if (value instanceof HashNode) {
|
|
123
|
+
// Handle nested hash objects
|
|
124
|
+
const nestedProperties = await this.extractHashProperties(value);
|
|
125
|
+
properties[key] = {
|
|
126
|
+
type: 'object',
|
|
127
|
+
properties: nestedProperties
|
|
128
|
+
};
|
|
129
|
+
} else if (value instanceof ArrayNode) {
|
|
130
|
+
// Handle arrays
|
|
131
|
+
const items = await this.extractArrayItemProperties(value);
|
|
132
|
+
properties[key] = {
|
|
133
|
+
type: 'array',
|
|
134
|
+
items
|
|
135
|
+
};
|
|
136
|
+
} else {
|
|
137
|
+
// Handle primitive values
|
|
138
|
+
const valueType = await this.getValueType(value);
|
|
139
|
+
properties[key] = {
|
|
140
|
+
type: valueType
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return properties;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
async extractArrayItemProperties(arrayNode) {
|
|
151
|
+
const { HashNode } = await import('@ruby/prism');
|
|
152
|
+
|
|
153
|
+
if (arrayNode.elements.length === 0) {
|
|
154
|
+
return { type: 'any' };
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const firstItem = arrayNode.elements[0];
|
|
158
|
+
if (firstItem instanceof HashNode) {
|
|
159
|
+
return {
|
|
160
|
+
type: 'object',
|
|
161
|
+
properties: this.extractHashProperties(firstItem)
|
|
162
|
+
};
|
|
163
|
+
} else {
|
|
164
|
+
const valueType = await this.getValueType(firstItem);
|
|
165
|
+
return {
|
|
166
|
+
type: valueType
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
async getValueType(node) {
|
|
172
|
+
const { StringNode, IntegerNode, FloatNode, TrueNode, FalseNode, NilNode, SymbolNode, CallNode } = await import('@ruby/prism');
|
|
173
|
+
|
|
174
|
+
if (node instanceof StringNode) return 'string';
|
|
175
|
+
if (node instanceof IntegerNode || node instanceof FloatNode) return 'number';
|
|
176
|
+
if (node instanceof TrueNode || node instanceof FalseNode) return 'boolean';
|
|
177
|
+
if (node instanceof NilNode) return 'null';
|
|
178
|
+
if (node instanceof SymbolNode) return 'string';
|
|
179
|
+
if (node instanceof CallNode) return 'any'; // Dynamic values
|
|
180
|
+
return 'any'; // Default type
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
async visit(node) {
|
|
184
|
+
const { CallNode, ProgramNode, StatementsNode, DefNode, IfNode, BlockNode, ArgumentsNode, HashNode, AssocNode, ClassNode } = await import('@ruby/prism');
|
|
185
|
+
if (!node) return;
|
|
186
|
+
|
|
187
|
+
this.ancestors.push(node);
|
|
188
|
+
|
|
189
|
+
// Check if this is a tracking call
|
|
190
|
+
if (node instanceof CallNode) {
|
|
191
|
+
try {
|
|
192
|
+
const source = this.detectSource(node);
|
|
193
|
+
const eventName = this.extractEventName(node, source);
|
|
194
|
+
|
|
195
|
+
if (!source || !eventName) {
|
|
196
|
+
this.ancestors.pop();
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const line = this.getLineNumber(node.location);
|
|
201
|
+
const functionName = await this.findWrappingFunction(node, this.ancestors);
|
|
202
|
+
const properties = await this.extractProperties(node, source);
|
|
203
|
+
|
|
204
|
+
this.events.push({
|
|
205
|
+
eventName,
|
|
206
|
+
source,
|
|
207
|
+
properties,
|
|
208
|
+
filePath: this.filePath,
|
|
209
|
+
line,
|
|
210
|
+
functionName
|
|
211
|
+
});
|
|
212
|
+
} catch (nodeError) {
|
|
213
|
+
console.error(`Error processing node in ${this.filePath}`);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Visit all child nodes
|
|
218
|
+
if (node instanceof ProgramNode) {
|
|
219
|
+
await this.visit(node.statements);
|
|
220
|
+
} else if (node instanceof StatementsNode) {
|
|
221
|
+
for (const child of node.body) {
|
|
222
|
+
await this.visit(child);
|
|
223
|
+
}
|
|
224
|
+
} else if (node instanceof ClassNode) {
|
|
225
|
+
if (node.body) {
|
|
226
|
+
await this.visit(node.body);
|
|
227
|
+
}
|
|
228
|
+
} else if (node instanceof DefNode) {
|
|
229
|
+
if (node.body) {
|
|
230
|
+
await this.visit(node.body);
|
|
231
|
+
}
|
|
232
|
+
} else if (node instanceof IfNode) {
|
|
233
|
+
if (node.statements) {
|
|
234
|
+
await this.visit(node.statements);
|
|
235
|
+
}
|
|
236
|
+
if (node.subsequent) {
|
|
237
|
+
await this.visit(node.subsequent);
|
|
238
|
+
}
|
|
239
|
+
} else if (node instanceof BlockNode) {
|
|
240
|
+
if (node.body) {
|
|
241
|
+
await this.visit(node.body);
|
|
242
|
+
}
|
|
243
|
+
} else if (node instanceof ArgumentsNode) {
|
|
244
|
+
for (const arg of node.arguments) {
|
|
245
|
+
await this.visit(arg);
|
|
246
|
+
}
|
|
247
|
+
} else if (node instanceof HashNode) {
|
|
248
|
+
for (const element of node.elements) {
|
|
249
|
+
await this.visit(element);
|
|
250
|
+
}
|
|
251
|
+
} else if (node instanceof AssocNode) {
|
|
252
|
+
await this.visit(node.key);
|
|
253
|
+
await this.visit(node.value);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
this.ancestors.pop();
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
async function analyzeRubyFile(filePath) {
|
|
261
|
+
// Lazy load the ruby prism parser
|
|
262
|
+
if (!parse) {
|
|
263
|
+
const { loadPrism } = await import('@ruby/prism');
|
|
264
|
+
parse = await loadPrism();
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
try {
|
|
268
|
+
const code = fs.readFileSync(filePath, 'utf8');
|
|
269
|
+
let ast;
|
|
270
|
+
try {
|
|
271
|
+
ast = await parse(code);
|
|
272
|
+
} catch (parseError) {
|
|
273
|
+
console.error(`Error parsing file ${filePath}`);
|
|
274
|
+
return []; // Return empty events array if parsing fails
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// Traverse the AST starting from the program node
|
|
278
|
+
const visitor = new TrackingVisitor(code, filePath);
|
|
279
|
+
await visitor.visit(ast.value);
|
|
280
|
+
|
|
281
|
+
return visitor.events;
|
|
282
|
+
|
|
283
|
+
} catch (fileError) {
|
|
284
|
+
console.error(`Error reading or processing file ${filePath}`);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
return [];
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
module.exports = { analyzeRubyFile };
|
package/src/analyze/index.js
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
|
+
const path = require('path');
|
|
2
|
+
const ts = require('typescript');
|
|
3
|
+
const { getAllFiles } = require('../fileProcessor');
|
|
1
4
|
const { analyzeJsFile } = require('./analyzeJsFile');
|
|
2
5
|
const { analyzeTsFile } = require('./analyzeTsFile');
|
|
3
|
-
const {
|
|
4
|
-
const ts = require('typescript');
|
|
5
|
-
const path = require('path');
|
|
6
|
+
const { analyzeRubyFile } = require('./analyzeRubyFile');
|
|
6
7
|
|
|
7
|
-
function analyzeDirectory(dirPath, customFunction) {
|
|
8
|
+
async function analyzeDirectory(dirPath, customFunction) {
|
|
8
9
|
const files = getAllFiles(dirPath);
|
|
9
10
|
const allEvents = {};
|
|
10
11
|
|
|
@@ -14,12 +15,26 @@ function analyzeDirectory(dirPath, customFunction) {
|
|
|
14
15
|
module: ts.ModuleKind.CommonJS,
|
|
15
16
|
});
|
|
16
17
|
|
|
17
|
-
|
|
18
|
+
for (const file of files) {
|
|
19
|
+
let events = [];
|
|
20
|
+
|
|
21
|
+
const isJsFile = /\.(jsx?)$/.test(file);
|
|
18
22
|
const isTsFile = /\.(tsx?)$/.test(file);
|
|
19
|
-
const
|
|
23
|
+
const isRubyFile = /\.(rb|ru|rake|gemspec)$/.test(file);
|
|
24
|
+
|
|
25
|
+
if (isJsFile) {
|
|
26
|
+
events = analyzeJsFile(file, customFunction);
|
|
27
|
+
} else if (isTsFile) {
|
|
28
|
+
events = analyzeTsFile(file, program, customFunction);
|
|
29
|
+
} else if (isRubyFile) {
|
|
30
|
+
events = await analyzeRubyFile(file);
|
|
31
|
+
} else {
|
|
32
|
+
console.info(`Skipping file ${file} because it is not a supported file type`);
|
|
33
|
+
continue;
|
|
34
|
+
}
|
|
20
35
|
|
|
21
36
|
events.forEach((event) => {
|
|
22
|
-
const relativeFilePath = path.relative(dirPath, event.filePath);
|
|
37
|
+
const relativeFilePath = path.relative(dirPath, event.filePath);
|
|
23
38
|
|
|
24
39
|
if (!allEvents[event.eventName]) {
|
|
25
40
|
allEvents[event.eventName] = {
|
|
@@ -45,7 +60,7 @@ function analyzeDirectory(dirPath, customFunction) {
|
|
|
45
60
|
};
|
|
46
61
|
}
|
|
47
62
|
});
|
|
48
|
-
}
|
|
63
|
+
}
|
|
49
64
|
|
|
50
65
|
return allEvents;
|
|
51
66
|
}
|
package/src/fileProcessor.js
CHANGED
|
@@ -18,12 +18,18 @@ function getAllFiles(dirPath, arrayOfFiles = []) {
|
|
|
18
18
|
}
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
+
// Skip hidden files and directories
|
|
22
|
+
if (file.startsWith('.')) return
|
|
23
|
+
|
|
24
|
+
// Skip common directories we don't want to analyze
|
|
25
|
+
if (file === 'node_modules') return
|
|
26
|
+
if (file === 'coverage') return
|
|
27
|
+
if (file === 'temp') return
|
|
28
|
+
if (file === 'tmp') return
|
|
29
|
+
|
|
21
30
|
if (stats.isDirectory()) {
|
|
22
|
-
if (file === 'node_modules') {
|
|
23
|
-
return; // Ignore the node_modules directory
|
|
24
|
-
}
|
|
25
31
|
arrayOfFiles = getAllFiles(fullPath, arrayOfFiles);
|
|
26
|
-
} else
|
|
32
|
+
} else {
|
|
27
33
|
arrayOfFiles.push(fullPath);
|
|
28
34
|
}
|
|
29
35
|
});
|
|
@@ -5,23 +5,27 @@ const { z } = require('zod');
|
|
|
5
5
|
const { zodResponseFormat } = require('openai/helpers/zod');
|
|
6
6
|
|
|
7
7
|
const openai = new OpenAI({
|
|
8
|
-
apiKey: process.env.OPENAI_API_KEY,
|
|
8
|
+
apiKey: process.env.OPENAI_API_KEY || 'undefined',
|
|
9
9
|
});
|
|
10
|
+
const model = 'gpt-4o-mini';
|
|
10
11
|
|
|
11
12
|
function createPrompt(eventName, properties, implementations, codebaseDir) {
|
|
12
|
-
|
|
13
|
-
let prompt = `You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations.\n\n`;
|
|
14
|
-
|
|
15
|
-
// Add event name
|
|
16
|
-
prompt += `Event Name: "${eventName}"\n\n`;
|
|
17
|
-
|
|
18
|
-
// Add properties
|
|
13
|
+
let prompt = `Event Name: "${eventName}"\n\n`;
|
|
19
14
|
prompt += `Properties:\n`;
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
15
|
+
|
|
16
|
+
function appendPropertiesToPrompt(properties, indent = '') {
|
|
17
|
+
for (const propName in properties) {
|
|
18
|
+
const prop = properties[propName];
|
|
19
|
+
prompt += `${indent}- "${propName}" (type: ${prop.type})\n`;
|
|
20
|
+
if (prop.properties) {
|
|
21
|
+
prompt += `${indent} Sub-properties:\n`;
|
|
22
|
+
appendPropertiesToPrompt(prop.properties, indent + ' ');
|
|
23
|
+
}
|
|
24
|
+
}
|
|
23
25
|
}
|
|
24
26
|
|
|
27
|
+
appendPropertiesToPrompt(properties);
|
|
28
|
+
|
|
25
29
|
// Add implementations with code snippets
|
|
26
30
|
prompt += `\nImplementations:\n`;
|
|
27
31
|
for (const impl of implementations) {
|
|
@@ -53,18 +57,39 @@ function getCodeSnippet(filePath, lineNumber, contextLines = 5) {
|
|
|
53
57
|
}
|
|
54
58
|
|
|
55
59
|
function createEventDescriptionSchema(properties) {
|
|
60
|
+
function buildPropertySchema(prop) {
|
|
61
|
+
if (prop.properties) {
|
|
62
|
+
const subPropertiesSchema = {};
|
|
63
|
+
for (const subPropName in prop.properties) {
|
|
64
|
+
subPropertiesSchema[subPropName] = buildPropertySchema(prop.properties[subPropName]);
|
|
65
|
+
}
|
|
66
|
+
return z.object({
|
|
67
|
+
description: z
|
|
68
|
+
.string()
|
|
69
|
+
.describe('A maximum of 10 words describing the property and what it means'),
|
|
70
|
+
properties: z.object(subPropertiesSchema),
|
|
71
|
+
});
|
|
72
|
+
} else {
|
|
73
|
+
return z.object({
|
|
74
|
+
description: z
|
|
75
|
+
.string()
|
|
76
|
+
.describe('A maximum of 10 words describing the property and what it means'),
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
56
81
|
// Define the schema for properties
|
|
57
82
|
const propertiesSchema = {};
|
|
58
83
|
for (const propName in properties) {
|
|
59
|
-
propertiesSchema[propName] =
|
|
60
|
-
description: z.string().describe('A maximum of 10 words describing the property and what it means'),
|
|
61
|
-
});
|
|
84
|
+
propertiesSchema[propName] = buildPropertySchema(properties[propName]);
|
|
62
85
|
}
|
|
63
86
|
|
|
64
87
|
// Define the schema for implementations
|
|
65
88
|
const implementationsSchema = z.array(
|
|
66
89
|
z.object({
|
|
67
|
-
description: z
|
|
90
|
+
description: z
|
|
91
|
+
.string()
|
|
92
|
+
.describe('A maximum of 10 words describing how this event is triggered without using the word "triggered"'),
|
|
68
93
|
path: z.string(),
|
|
69
94
|
line: z.number(),
|
|
70
95
|
})
|
|
@@ -72,7 +97,9 @@ function createEventDescriptionSchema(properties) {
|
|
|
72
97
|
|
|
73
98
|
// Construct the full schema
|
|
74
99
|
const eventDescriptionSchema = z.object({
|
|
75
|
-
eventDescription: z
|
|
100
|
+
eventDescription: z
|
|
101
|
+
.string()
|
|
102
|
+
.describe('A maximum of 10 words describing the event and what it tracks without using the word "tracks"'),
|
|
76
103
|
properties: z.object(propertiesSchema),
|
|
77
104
|
implementations: implementationsSchema,
|
|
78
105
|
});
|
|
@@ -83,11 +110,11 @@ function createEventDescriptionSchema(properties) {
|
|
|
83
110
|
async function sendPromptToLLM(prompt, schema) {
|
|
84
111
|
try {
|
|
85
112
|
const completion = await openai.beta.chat.completions.parse({
|
|
86
|
-
model
|
|
113
|
+
model,
|
|
87
114
|
messages: [
|
|
88
115
|
{
|
|
89
116
|
role: 'system',
|
|
90
|
-
content: 'You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations',
|
|
117
|
+
content: 'You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations.',
|
|
91
118
|
},
|
|
92
119
|
{
|
|
93
120
|
role: 'user',
|
|
@@ -97,7 +124,10 @@ async function sendPromptToLLM(prompt, schema) {
|
|
|
97
124
|
response_format: zodResponseFormat(schema, 'event_description'),
|
|
98
125
|
});
|
|
99
126
|
|
|
100
|
-
return
|
|
127
|
+
return {
|
|
128
|
+
descriptions: completion.choices[0].message.parsed,
|
|
129
|
+
usage: completion.usage,
|
|
130
|
+
};
|
|
101
131
|
} catch (error) {
|
|
102
132
|
console.error('Error during LLM response parsing:', error);
|
|
103
133
|
return null;
|
|
@@ -115,36 +145,58 @@ async function generateEventDescription(eventName, event, codebaseDir) {
|
|
|
115
145
|
const eventDescriptionSchema = createEventDescriptionSchema(properties);
|
|
116
146
|
|
|
117
147
|
// Send prompt to the LLM and get the structured response
|
|
118
|
-
const descriptions = await sendPromptToLLM(prompt, eventDescriptionSchema);
|
|
148
|
+
const { descriptions, usage } = await sendPromptToLLM(prompt, eventDescriptionSchema);
|
|
119
149
|
|
|
120
|
-
return { eventName, descriptions };
|
|
150
|
+
return { eventName, descriptions, usage };
|
|
121
151
|
}
|
|
122
152
|
|
|
123
153
|
async function generateDescriptions(events, codebaseDir) {
|
|
154
|
+
console.log(`Generating descriptions using ${model}`);
|
|
155
|
+
|
|
124
156
|
const eventPromises = Object.entries(events).map(([eventName, event]) =>
|
|
125
157
|
generateEventDescription(eventName, event, codebaseDir)
|
|
126
158
|
);
|
|
127
159
|
|
|
160
|
+
console.log(`Running ${eventPromises.length} prompts in parallel...`);
|
|
161
|
+
|
|
128
162
|
const results = await Promise.all(eventPromises);
|
|
129
163
|
|
|
164
|
+
let promptTokens = 0;
|
|
165
|
+
let completionTokens = 0;
|
|
166
|
+
|
|
130
167
|
// Process results and update the events object
|
|
131
|
-
results.forEach(({ eventName, descriptions }) => {
|
|
168
|
+
results.forEach(({ eventName, descriptions, usage }) => {
|
|
132
169
|
if (descriptions) {
|
|
170
|
+
promptTokens += usage.prompt_tokens;
|
|
171
|
+
completionTokens += usage.completion_tokens;
|
|
172
|
+
|
|
133
173
|
const event = events[eventName];
|
|
134
174
|
event.description = descriptions.eventDescription;
|
|
135
175
|
|
|
136
|
-
// Update property descriptions
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
176
|
+
// Update property descriptions recursively
|
|
177
|
+
function updatePropertyDescriptions(eventProperties, descriptionProperties) {
|
|
178
|
+
for (const propName in descriptionProperties) {
|
|
179
|
+
if (eventProperties[propName]) {
|
|
180
|
+
eventProperties[propName].description = descriptionProperties[propName].description;
|
|
181
|
+
if (eventProperties[propName].properties && descriptionProperties[propName].properties) {
|
|
182
|
+
updatePropertyDescriptions(
|
|
183
|
+
eventProperties[propName].properties,
|
|
184
|
+
descriptionProperties[propName].properties
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
140
188
|
}
|
|
141
189
|
}
|
|
142
190
|
|
|
191
|
+
updatePropertyDescriptions(event.properties, descriptions.properties);
|
|
192
|
+
|
|
143
193
|
// Update implementations with descriptions
|
|
144
194
|
for (let i = 0; i < descriptions.implementations.length; i++) {
|
|
145
195
|
if (event.implementations[i]) {
|
|
146
|
-
if (
|
|
147
|
-
|
|
196
|
+
if (
|
|
197
|
+
event.implementations[i].path === descriptions.implementations[i].path &&
|
|
198
|
+
event.implementations[i].line === descriptions.implementations[i].line
|
|
199
|
+
) {
|
|
148
200
|
event.implementations[i].description = descriptions.implementations[i].description;
|
|
149
201
|
} else {
|
|
150
202
|
console.error(`Returned implementation description does not match path or line for event: ${eventName}`);
|
|
@@ -156,6 +208,10 @@ async function generateDescriptions(events, codebaseDir) {
|
|
|
156
208
|
}
|
|
157
209
|
});
|
|
158
210
|
|
|
211
|
+
console.log(`Prompt tokens used: ${promptTokens}`);
|
|
212
|
+
console.log(`Completion tokens used: ${completionTokens}`);
|
|
213
|
+
console.log(`Total tokens used: ${promptTokens + completionTokens}`);
|
|
214
|
+
|
|
159
215
|
return events;
|
|
160
216
|
}
|
|
161
217
|
|
package/src/index.js
CHANGED
|
@@ -4,7 +4,7 @@ const { generateYamlSchema } = require('./yamlGenerator');
|
|
|
4
4
|
const { generateDescriptions } = require('./generateDescriptions');
|
|
5
5
|
|
|
6
6
|
async function run(targetDir, outputPath, customFunction, customSourceDetails, generateDescription) {
|
|
7
|
-
let events = analyzeDirectory(targetDir, customFunction);
|
|
7
|
+
let events = await analyzeDirectory(targetDir, customFunction);
|
|
8
8
|
if (generateDescription) {
|
|
9
9
|
events = await generateDescriptions(events, targetDir);
|
|
10
10
|
}
|