@flisk/analyze-tracking 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/bin/cli.js +44 -3
- package/bin/help.js +16 -0
- package/package.json +5 -3
- package/src/analyze/index.js +4 -4
- package/src/fileProcessor.js +1 -0
- package/src/generateDescriptions.js +19 -41
- package/src/index.js +23 -2
package/README.md
CHANGED
|
@@ -25,10 +25,12 @@ npx @flisk/analyze-tracking /path/to/project [options]
|
|
|
25
25
|
|
|
26
26
|
### Key Options:
|
|
27
27
|
- `-g, --generateDescription`: Generate descriptions of fields (default: `false`)
|
|
28
|
+
- `-p, --provider <provider>`: Specify a provider (options: `openai`, `gemini`)
|
|
29
|
+
- `-m, --model <model>`: Specify a model (options: `gpt-4o-mini`, `gemini-2.0-flash-lite-001`)
|
|
28
30
|
- `-o, --output <output_file>`: Name of the output file (default: `tracking-schema.yaml`)
|
|
29
31
|
- `-c, --customFunction <function_name>`: Specify a custom tracking function
|
|
30
32
|
|
|
31
|
-
🔑 **Important:** you must set the `OPENAI_API_KEY`
|
|
33
|
+
🔑 **Important:** If you are using `generateDescription`, you must set the appropriate credentials for the provider you are using as an environment variable. OpenAI uses `OPENAI_API_KEY` and Google Vertex AI uses `GOOGLE_APPLICATION_CREDENTIALS`.
|
|
32
34
|
|
|
33
35
|
<details>
|
|
34
36
|
<summary>Note on Custom Functions 💡</summary>
|
package/bin/cli.js
CHANGED
|
@@ -6,6 +6,11 @@ const commandLineUsage = require('command-line-usage');
|
|
|
6
6
|
const { run } = require('../src/index');
|
|
7
7
|
const { helpContent } = require('./help');
|
|
8
8
|
|
|
9
|
+
const SUPPORTED_MODELS = {
|
|
10
|
+
openai: ['gpt-4o-mini'],
|
|
11
|
+
gemini: ['gemini-2.0-flash-lite-001'],
|
|
12
|
+
};
|
|
13
|
+
|
|
9
14
|
// Parse command-line arguments
|
|
10
15
|
const optionDefinitions = [
|
|
11
16
|
{
|
|
@@ -19,6 +24,18 @@ const optionDefinitions = [
|
|
|
19
24
|
type: Boolean,
|
|
20
25
|
defaultValue: false,
|
|
21
26
|
},
|
|
27
|
+
{
|
|
28
|
+
name: 'provider',
|
|
29
|
+
alias: 'p',
|
|
30
|
+
type: String,
|
|
31
|
+
defaultValue: 'openai',
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
name: 'model',
|
|
35
|
+
alias: 'm',
|
|
36
|
+
type: String,
|
|
37
|
+
defaultValue: 'gpt-4o-mini',
|
|
38
|
+
},
|
|
22
39
|
{
|
|
23
40
|
name: 'output',
|
|
24
41
|
alias: 'o',
|
|
@@ -55,6 +72,8 @@ const options = commandLineArgs(optionDefinitions);
|
|
|
55
72
|
const {
|
|
56
73
|
targetDir,
|
|
57
74
|
generateDescription,
|
|
75
|
+
provider,
|
|
76
|
+
model,
|
|
58
77
|
output,
|
|
59
78
|
customFunction,
|
|
60
79
|
repositoryUrl,
|
|
@@ -81,10 +100,32 @@ if (!targetDir) {
|
|
|
81
100
|
}
|
|
82
101
|
|
|
83
102
|
if (generateDescription) {
|
|
84
|
-
if (!
|
|
85
|
-
console.error('Please
|
|
103
|
+
if (!Object.keys(SUPPORTED_MODELS).includes(provider)) {
|
|
104
|
+
console.error('Please provide a valid provider. Options: openai, gemini');
|
|
86
105
|
process.exit(1);
|
|
87
106
|
}
|
|
107
|
+
|
|
108
|
+
if (provider === 'openai') {
|
|
109
|
+
if (!SUPPORTED_MODELS.openai.includes(model)) {
|
|
110
|
+
console.error(`Please provide a valid model for OpenAI. Options: ${SUPPORTED_MODELS.openai.join(', ')}`);
|
|
111
|
+
process.exit(1);
|
|
112
|
+
}
|
|
113
|
+
if (!process.env.OPENAI_API_KEY) {
|
|
114
|
+
console.error('Please set the `OPENAI_API_KEY` environment variable to use OpenAI for `generateDescription`.');
|
|
115
|
+
process.exit(1);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (provider === 'gemini') {
|
|
120
|
+
if (!SUPPORTED_MODELS.gemini.includes(model)) {
|
|
121
|
+
console.error(`Please provide a valid model for Gemini. Options: ${SUPPORTED_MODELS.gemini.join(', ')}`);
|
|
122
|
+
process.exit(1);
|
|
123
|
+
}
|
|
124
|
+
if (!process.env.GOOGLE_APPLICATION_CREDENTIALS) {
|
|
125
|
+
console.error('Please set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to use Gemini for `generateDescription`.');
|
|
126
|
+
process.exit(1);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
88
129
|
}
|
|
89
130
|
|
|
90
|
-
run(path.resolve(targetDir), output, customFunction, customSourceDetails, generateDescription);
|
|
131
|
+
run(path.resolve(targetDir), output, customFunction, customSourceDetails, generateDescription, provider, model);
|
package/bin/help.js
CHANGED
|
@@ -46,6 +46,22 @@ const helpContent = [
|
|
|
46
46
|
defaultValue: false,
|
|
47
47
|
typeLabel: '{underline false}'
|
|
48
48
|
},
|
|
49
|
+
{
|
|
50
|
+
name: 'provider',
|
|
51
|
+
alias: 'p',
|
|
52
|
+
description: 'Specify a provider (options: {italic openai}, {italic gemini})',
|
|
53
|
+
type: String,
|
|
54
|
+
defaultValue: 'openai',
|
|
55
|
+
typeLabel: '{underline openai}'
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
name: 'model',
|
|
59
|
+
alias: 'm',
|
|
60
|
+
description: 'Specify a model (options: {italic gpt-4o-mini}, {italic gemini-2.0-flash-lite-001})',
|
|
61
|
+
type: String,
|
|
62
|
+
defaultValue: 'gpt-4o-mini',
|
|
63
|
+
typeLabel: '{underline gpt-4o-mini}'
|
|
64
|
+
},
|
|
49
65
|
{
|
|
50
66
|
name: 'output',
|
|
51
67
|
alias: 'o',
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@flisk/analyze-tracking",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "Analyzes tracking code in a project and generates data schemas",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -20,6 +20,9 @@
|
|
|
20
20
|
},
|
|
21
21
|
"homepage": "https://github.com/fliskdata/analyze-tracking#readme",
|
|
22
22
|
"dependencies": {
|
|
23
|
+
"@langchain/core": "^0.3.49",
|
|
24
|
+
"@langchain/google-vertexai": "^0.2.5",
|
|
25
|
+
"@langchain/openai": "^0.5.7",
|
|
23
26
|
"@ruby/prism": "^1.4.0",
|
|
24
27
|
"@typescript-eslint/parser": "^8.1.0",
|
|
25
28
|
"acorn": "^8.12.1",
|
|
@@ -30,9 +33,8 @@
|
|
|
30
33
|
"command-line-usage": "^7.0.3",
|
|
31
34
|
"isomorphic-git": "^1.27.1",
|
|
32
35
|
"js-yaml": "^4.1.0",
|
|
33
|
-
"openai": "^4.67.1",
|
|
34
36
|
"typescript": "^5.5.4",
|
|
35
|
-
"zod": "^3.
|
|
37
|
+
"zod": "^3.24.3"
|
|
36
38
|
},
|
|
37
39
|
"devDependencies": {
|
|
38
40
|
"jest": "^29.7.0"
|
package/src/analyze/index.js
CHANGED
|
@@ -6,11 +6,11 @@ const { analyzeTsFile } = require('./analyzeTsFile');
|
|
|
6
6
|
const { analyzeRubyFile } = require('./analyzeRubyFile');
|
|
7
7
|
|
|
8
8
|
async function analyzeDirectory(dirPath, customFunction) {
|
|
9
|
-
const files = getAllFiles(dirPath);
|
|
10
9
|
const allEvents = {};
|
|
11
10
|
|
|
11
|
+
const files = getAllFiles(dirPath);
|
|
12
12
|
const tsFiles = files.filter(file => /\.(tsx?)$/.test(file));
|
|
13
|
-
const
|
|
13
|
+
const tsProgram = ts.createProgram(tsFiles, {
|
|
14
14
|
target: ts.ScriptTarget.ESNext,
|
|
15
15
|
module: ts.ModuleKind.CommonJS,
|
|
16
16
|
});
|
|
@@ -20,12 +20,12 @@ async function analyzeDirectory(dirPath, customFunction) {
|
|
|
20
20
|
|
|
21
21
|
const isJsFile = /\.(jsx?)$/.test(file);
|
|
22
22
|
const isTsFile = /\.(tsx?)$/.test(file);
|
|
23
|
-
const isRubyFile = /\.(rb
|
|
23
|
+
const isRubyFile = /\.(rb)$/.test(file);
|
|
24
24
|
|
|
25
25
|
if (isJsFile) {
|
|
26
26
|
events = analyzeJsFile(file, customFunction);
|
|
27
27
|
} else if (isTsFile) {
|
|
28
|
-
events = analyzeTsFile(file,
|
|
28
|
+
events = analyzeTsFile(file, tsProgram, customFunction);
|
|
29
29
|
} else if (isRubyFile) {
|
|
30
30
|
events = await analyzeRubyFile(file);
|
|
31
31
|
} else {
|
package/src/fileProcessor.js
CHANGED
|
@@ -26,6 +26,7 @@ function getAllFiles(dirPath, arrayOfFiles = []) {
|
|
|
26
26
|
if (file === 'coverage') return
|
|
27
27
|
if (file === 'temp') return
|
|
28
28
|
if (file === 'tmp') return
|
|
29
|
+
if (file === 'log') return
|
|
29
30
|
|
|
30
31
|
if (stats.isDirectory()) {
|
|
31
32
|
arrayOfFiles = getAllFiles(fullPath, arrayOfFiles);
|
|
@@ -1,13 +1,7 @@
|
|
|
1
1
|
const fs = require('fs');
|
|
2
2
|
const path = require('path');
|
|
3
|
-
const OpenAI = require('openai');
|
|
4
3
|
const { z } = require('zod');
|
|
5
|
-
const {
|
|
6
|
-
|
|
7
|
-
const openai = new OpenAI({
|
|
8
|
-
apiKey: process.env.OPENAI_API_KEY || 'undefined',
|
|
9
|
-
});
|
|
10
|
-
const model = 'gpt-4o-mini';
|
|
4
|
+
const { PromptTemplate } = require('@langchain/core/prompts');
|
|
11
5
|
|
|
12
6
|
function createPrompt(eventName, properties, implementations, codebaseDir) {
|
|
13
7
|
let prompt = `Event Name: "${eventName}"\n\n`;
|
|
@@ -107,26 +101,22 @@ function createEventDescriptionSchema(properties) {
|
|
|
107
101
|
return eventDescriptionSchema;
|
|
108
102
|
}
|
|
109
103
|
|
|
110
|
-
async function sendPromptToLLM(prompt, schema) {
|
|
104
|
+
async function sendPromptToLLM(prompt, schema, model) {
|
|
111
105
|
try {
|
|
112
|
-
const
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
{
|
|
120
|
-
role: 'user',
|
|
121
|
-
content: prompt,
|
|
122
|
-
},
|
|
123
|
-
],
|
|
124
|
-
response_format: zodResponseFormat(schema, 'event_description'),
|
|
106
|
+
const promptTemplate = new PromptTemplate({
|
|
107
|
+
template: `You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations.\n{input}`,
|
|
108
|
+
inputVariables: ['input'],
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
const formattedPrompt = await promptTemplate.format({
|
|
112
|
+
input: prompt,
|
|
125
113
|
});
|
|
126
114
|
|
|
115
|
+
const structuredModel = model.withStructuredOutput(schema);
|
|
116
|
+
const response = await structuredModel.invoke(formattedPrompt);
|
|
117
|
+
|
|
127
118
|
return {
|
|
128
|
-
descriptions:
|
|
129
|
-
usage: completion.usage,
|
|
119
|
+
descriptions: response,
|
|
130
120
|
};
|
|
131
121
|
} catch (error) {
|
|
132
122
|
console.error('Error during LLM response parsing:', error);
|
|
@@ -134,7 +124,7 @@ async function sendPromptToLLM(prompt, schema) {
|
|
|
134
124
|
}
|
|
135
125
|
}
|
|
136
126
|
|
|
137
|
-
async function generateEventDescription(eventName, event, codebaseDir) {
|
|
127
|
+
async function generateEventDescription(eventName, event, codebaseDir, model) {
|
|
138
128
|
const properties = event.properties || {};
|
|
139
129
|
const implementations = event.implementations || [];
|
|
140
130
|
|
|
@@ -145,31 +135,23 @@ async function generateEventDescription(eventName, event, codebaseDir) {
|
|
|
145
135
|
const eventDescriptionSchema = createEventDescriptionSchema(properties);
|
|
146
136
|
|
|
147
137
|
// Send prompt to the LLM and get the structured response
|
|
148
|
-
const { descriptions
|
|
138
|
+
const { descriptions } = await sendPromptToLLM(prompt, eventDescriptionSchema, model);
|
|
149
139
|
|
|
150
|
-
return { eventName, descriptions
|
|
140
|
+
return { eventName, descriptions };
|
|
151
141
|
}
|
|
152
142
|
|
|
153
|
-
async function generateDescriptions(events, codebaseDir) {
|
|
154
|
-
console.log(`Generating descriptions using ${model}`);
|
|
155
|
-
|
|
143
|
+
async function generateDescriptions(events, codebaseDir, model) {
|
|
156
144
|
const eventPromises = Object.entries(events).map(([eventName, event]) =>
|
|
157
|
-
generateEventDescription(eventName, event, codebaseDir)
|
|
145
|
+
generateEventDescription(eventName, event, codebaseDir, model)
|
|
158
146
|
);
|
|
159
147
|
|
|
160
148
|
console.log(`Running ${eventPromises.length} prompts in parallel...`);
|
|
161
149
|
|
|
162
150
|
const results = await Promise.all(eventPromises);
|
|
163
151
|
|
|
164
|
-
let promptTokens = 0;
|
|
165
|
-
let completionTokens = 0;
|
|
166
|
-
|
|
167
152
|
// Process results and update the events object
|
|
168
|
-
results.forEach(({ eventName, descriptions
|
|
153
|
+
results.forEach(({ eventName, descriptions }) => {
|
|
169
154
|
if (descriptions) {
|
|
170
|
-
promptTokens += usage.prompt_tokens;
|
|
171
|
-
completionTokens += usage.completion_tokens;
|
|
172
|
-
|
|
173
155
|
const event = events[eventName];
|
|
174
156
|
event.description = descriptions.eventDescription;
|
|
175
157
|
|
|
@@ -208,10 +190,6 @@ async function generateDescriptions(events, codebaseDir) {
|
|
|
208
190
|
}
|
|
209
191
|
});
|
|
210
192
|
|
|
211
|
-
console.log(`Prompt tokens used: ${promptTokens}`);
|
|
212
|
-
console.log(`Completion tokens used: ${completionTokens}`);
|
|
213
|
-
console.log(`Total tokens used: ${promptTokens + completionTokens}`);
|
|
214
|
-
|
|
215
193
|
return events;
|
|
216
194
|
}
|
|
217
195
|
|
package/src/index.js
CHANGED
|
@@ -3,10 +3,31 @@ const { getRepoDetails } = require('./repoDetails');
|
|
|
3
3
|
const { generateYamlSchema } = require('./yamlGenerator');
|
|
4
4
|
const { generateDescriptions } = require('./generateDescriptions');
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
const { ChatOpenAI } = require('@langchain/openai');
|
|
7
|
+
const { ChatVertexAI } = require('@langchain/google-vertexai');
|
|
8
|
+
|
|
9
|
+
async function run(targetDir, outputPath, customFunction, customSourceDetails, generateDescription, provider, model) {
|
|
7
10
|
let events = await analyzeDirectory(targetDir, customFunction);
|
|
8
11
|
if (generateDescription) {
|
|
9
|
-
|
|
12
|
+
let llm;
|
|
13
|
+
if (provider === 'openai') {
|
|
14
|
+
llm = new ChatOpenAI({
|
|
15
|
+
modelName: model,
|
|
16
|
+
temperature: 0,
|
|
17
|
+
});
|
|
18
|
+
}
|
|
19
|
+
if (provider === 'gemini') {
|
|
20
|
+
llm = new ChatVertexAI({
|
|
21
|
+
modelName: model,
|
|
22
|
+
temperature: 0,
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
if (!llm) {
|
|
26
|
+
console.error('Please provide a valid AI model provider for `generateDescription`. Options: openai, gemini');
|
|
27
|
+
process.exit(1);
|
|
28
|
+
}
|
|
29
|
+
console.log(`Generating descriptions using ${provider} model ${model}`);
|
|
30
|
+
events = await generateDescriptions(events, targetDir, llm);
|
|
10
31
|
}
|
|
11
32
|
const repoDetails = await getRepoDetails(targetDir, customSourceDetails);
|
|
12
33
|
generateYamlSchema(events, repoDetails, outputPath);
|