@flisk/analyze-tracking 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -25,10 +25,12 @@ npx @flisk/analyze-tracking /path/to/project [options]
25
25
 
26
26
  ### Key Options:
27
27
  - `-g, --generateDescription`: Generate descriptions of fields (default: `false`)
28
+ - `-p, --provider <provider>`: Specify a provider (options: `openai`, `gemini`)
29
+ - `-m, --model <model>`: Specify a model (options: `gpt-4o-mini`, `gemini-2.0-flash-lite-001`)
28
30
  - `-o, --output <output_file>`: Name of the output file (default: `tracking-schema.yaml`)
29
31
  - `-c, --customFunction <function_name>`: Specify a custom tracking function
30
32
 
31
- 🔑&nbsp; **Important:** you must set the `OPENAI_API_KEY` environment variable to use `generateDescription`
33
+ 🔑&nbsp; **Important:** If you are using `generateDescription`, you must set the appropriate credentials for the provider you are using as an environment variable. OpenAI uses `OPENAI_API_KEY` and Google Vertex AI uses `GOOGLE_APPLICATION_CREDENTIALS`.
32
34
 
33
35
  <details>
34
36
  <summary>Note on Custom Functions 💡</summary>
package/bin/cli.js CHANGED
@@ -6,6 +6,11 @@ const commandLineUsage = require('command-line-usage');
6
6
  const { run } = require('../src/index');
7
7
  const { helpContent } = require('./help');
8
8
 
9
+ const SUPPORTED_MODELS = {
10
+ openai: ['gpt-4o-mini'],
11
+ gemini: ['gemini-2.0-flash-lite-001'],
12
+ };
13
+
9
14
  // Parse command-line arguments
10
15
  const optionDefinitions = [
11
16
  {
@@ -19,6 +24,18 @@ const optionDefinitions = [
19
24
  type: Boolean,
20
25
  defaultValue: false,
21
26
  },
27
+ {
28
+ name: 'provider',
29
+ alias: 'p',
30
+ type: String,
31
+ defaultValue: 'openai',
32
+ },
33
+ {
34
+ name: 'model',
35
+ alias: 'm',
36
+ type: String,
37
+ defaultValue: 'gpt-4o-mini',
38
+ },
22
39
  {
23
40
  name: 'output',
24
41
  alias: 'o',
@@ -55,6 +72,8 @@ const options = commandLineArgs(optionDefinitions);
55
72
  const {
56
73
  targetDir,
57
74
  generateDescription,
75
+ provider,
76
+ model,
58
77
  output,
59
78
  customFunction,
60
79
  repositoryUrl,
@@ -81,10 +100,32 @@ if (!targetDir) {
81
100
  }
82
101
 
83
102
  if (generateDescription) {
84
- if (!process.env.OPENAI_API_KEY) {
85
- console.error('Please set the `OPENAI_API_KEY` environment variable to use `generateDescription`.');
103
+ if (!Object.keys(SUPPORTED_MODELS).includes(provider)) {
104
+ console.error('Please provide a valid provider. Options: openai, gemini');
86
105
  process.exit(1);
87
106
  }
107
+
108
+ if (provider === 'openai') {
109
+ if (!SUPPORTED_MODELS.openai.includes(model)) {
110
+ console.error(`Please provide a valid model for OpenAI. Options: ${SUPPORTED_MODELS.openai.join(', ')}`);
111
+ process.exit(1);
112
+ }
113
+ if (!process.env.OPENAI_API_KEY) {
114
+ console.error('Please set the `OPENAI_API_KEY` environment variable to use OpenAI for `generateDescription`.');
115
+ process.exit(1);
116
+ }
117
+ }
118
+
119
+ if (provider === 'gemini') {
120
+ if (!SUPPORTED_MODELS.gemini.includes(model)) {
121
+ console.error(`Please provide a valid model for Gemini. Options: ${SUPPORTED_MODELS.gemini.join(', ')}`);
122
+ process.exit(1);
123
+ }
124
+ if (!process.env.GOOGLE_APPLICATION_CREDENTIALS) {
125
+ console.error('Please set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to use Gemini for `generateDescription`.');
126
+ process.exit(1);
127
+ }
128
+ }
88
129
  }
89
130
 
90
- run(path.resolve(targetDir), output, customFunction, customSourceDetails, generateDescription);
131
+ run(path.resolve(targetDir), output, customFunction, customSourceDetails, generateDescription, provider, model);
package/bin/help.js CHANGED
@@ -46,6 +46,22 @@ const helpContent = [
46
46
  defaultValue: false,
47
47
  typeLabel: '{underline false}'
48
48
  },
49
+ {
50
+ name: 'provider',
51
+ alias: 'p',
52
+ description: 'Specify a provider (options: {italic openai}, {italic gemini})',
53
+ type: String,
54
+ defaultValue: 'openai',
55
+ typeLabel: '{underline openai}'
56
+ },
57
+ {
58
+ name: 'model',
59
+ alias: 'm',
60
+ description: 'Specify a model (options: {italic gpt-4o-mini}, {italic gemini-2.0-flash-lite-001})',
61
+ type: String,
62
+ defaultValue: 'gpt-4o-mini',
63
+ typeLabel: '{underline gpt-4o-mini}'
64
+ },
49
65
  {
50
66
  name: 'output',
51
67
  alias: 'o',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flisk/analyze-tracking",
3
- "version": "0.4.1",
3
+ "version": "0.5.0",
4
4
  "description": "Analyzes tracking code in a project and generates data schemas",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -20,6 +20,9 @@
20
20
  },
21
21
  "homepage": "https://github.com/fliskdata/analyze-tracking#readme",
22
22
  "dependencies": {
23
+ "@langchain/core": "^0.3.49",
24
+ "@langchain/google-vertexai": "^0.2.5",
25
+ "@langchain/openai": "^0.5.7",
23
26
  "@ruby/prism": "^1.4.0",
24
27
  "@typescript-eslint/parser": "^8.1.0",
25
28
  "acorn": "^8.12.1",
@@ -30,9 +33,8 @@
30
33
  "command-line-usage": "^7.0.3",
31
34
  "isomorphic-git": "^1.27.1",
32
35
  "js-yaml": "^4.1.0",
33
- "openai": "^4.67.1",
34
36
  "typescript": "^5.5.4",
35
- "zod": "^3.23.8"
37
+ "zod": "^3.24.3"
36
38
  },
37
39
  "devDependencies": {
38
40
  "jest": "^29.7.0"
@@ -1,13 +1,7 @@
1
1
  const fs = require('fs');
2
2
  const path = require('path');
3
- const OpenAI = require('openai');
4
3
  const { z } = require('zod');
5
- const { zodResponseFormat } = require('openai/helpers/zod');
6
-
7
- const openai = new OpenAI({
8
- apiKey: process.env.OPENAI_API_KEY || 'undefined',
9
- });
10
- const model = 'gpt-4o-mini';
4
+ const { PromptTemplate } = require('@langchain/core/prompts');
11
5
 
12
6
  function createPrompt(eventName, properties, implementations, codebaseDir) {
13
7
  let prompt = `Event Name: "${eventName}"\n\n`;
@@ -107,26 +101,22 @@ function createEventDescriptionSchema(properties) {
107
101
  return eventDescriptionSchema;
108
102
  }
109
103
 
110
- async function sendPromptToLLM(prompt, schema) {
104
+ async function sendPromptToLLM(prompt, schema, model) {
111
105
  try {
112
- const completion = await openai.beta.chat.completions.parse({
113
- model,
114
- messages: [
115
- {
116
- role: 'system',
117
- content: 'You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations.',
118
- },
119
- {
120
- role: 'user',
121
- content: prompt,
122
- },
123
- ],
124
- response_format: zodResponseFormat(schema, 'event_description'),
106
+ const promptTemplate = new PromptTemplate({
107
+ template: `You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations.\n{input}`,
108
+ inputVariables: ['input'],
109
+ });
110
+
111
+ const formattedPrompt = await promptTemplate.format({
112
+ input: prompt,
125
113
  });
126
114
 
115
+ const structuredModel = model.withStructuredOutput(schema);
116
+ const response = await structuredModel.invoke(formattedPrompt);
117
+
127
118
  return {
128
- descriptions: completion.choices[0].message.parsed,
129
- usage: completion.usage,
119
+ descriptions: response,
130
120
  };
131
121
  } catch (error) {
132
122
  console.error('Error during LLM response parsing:', error);
@@ -134,7 +124,7 @@ async function sendPromptToLLM(prompt, schema) {
134
124
  }
135
125
  }
136
126
 
137
- async function generateEventDescription(eventName, event, codebaseDir) {
127
+ async function generateEventDescription(eventName, event, codebaseDir, model) {
138
128
  const properties = event.properties || {};
139
129
  const implementations = event.implementations || [];
140
130
 
@@ -145,31 +135,23 @@ async function generateEventDescription(eventName, event, codebaseDir) {
145
135
  const eventDescriptionSchema = createEventDescriptionSchema(properties);
146
136
 
147
137
  // Send prompt to the LLM and get the structured response
148
- const { descriptions, usage } = await sendPromptToLLM(prompt, eventDescriptionSchema);
138
+ const { descriptions } = await sendPromptToLLM(prompt, eventDescriptionSchema, model);
149
139
 
150
- return { eventName, descriptions, usage };
140
+ return { eventName, descriptions };
151
141
  }
152
142
 
153
- async function generateDescriptions(events, codebaseDir) {
154
- console.log(`Generating descriptions using ${model}`);
155
-
143
+ async function generateDescriptions(events, codebaseDir, model) {
156
144
  const eventPromises = Object.entries(events).map(([eventName, event]) =>
157
- generateEventDescription(eventName, event, codebaseDir)
145
+ generateEventDescription(eventName, event, codebaseDir, model)
158
146
  );
159
147
 
160
148
  console.log(`Running ${eventPromises.length} prompts in parallel...`);
161
149
 
162
150
  const results = await Promise.all(eventPromises);
163
151
 
164
- let promptTokens = 0;
165
- let completionTokens = 0;
166
-
167
152
  // Process results and update the events object
168
- results.forEach(({ eventName, descriptions, usage }) => {
153
+ results.forEach(({ eventName, descriptions }) => {
169
154
  if (descriptions) {
170
- promptTokens += usage.prompt_tokens;
171
- completionTokens += usage.completion_tokens;
172
-
173
155
  const event = events[eventName];
174
156
  event.description = descriptions.eventDescription;
175
157
 
@@ -208,10 +190,6 @@ async function generateDescriptions(events, codebaseDir) {
208
190
  }
209
191
  });
210
192
 
211
- console.log(`Prompt tokens used: ${promptTokens}`);
212
- console.log(`Completion tokens used: ${completionTokens}`);
213
- console.log(`Total tokens used: ${promptTokens + completionTokens}`);
214
-
215
193
  return events;
216
194
  }
217
195
 
package/src/index.js CHANGED
@@ -3,10 +3,31 @@ const { getRepoDetails } = require('./repoDetails');
3
3
  const { generateYamlSchema } = require('./yamlGenerator');
4
4
  const { generateDescriptions } = require('./generateDescriptions');
5
5
 
6
- async function run(targetDir, outputPath, customFunction, customSourceDetails, generateDescription) {
6
+ const { ChatOpenAI } = require('@langchain/openai');
7
+ const { ChatVertexAI } = require('@langchain/google-vertexai');
8
+
9
+ async function run(targetDir, outputPath, customFunction, customSourceDetails, generateDescription, provider, model) {
7
10
  let events = await analyzeDirectory(targetDir, customFunction);
8
11
  if (generateDescription) {
9
- events = await generateDescriptions(events, targetDir);
12
+ let llm;
13
+ if (provider === 'openai') {
14
+ llm = new ChatOpenAI({
15
+ modelName: model,
16
+ temperature: 0,
17
+ });
18
+ }
19
+ if (provider === 'gemini') {
20
+ llm = new ChatVertexAI({
21
+ modelName: model,
22
+ temperature: 0,
23
+ });
24
+ }
25
+ if (!llm) {
26
+ console.error('Please provide a valid AI model provider for `generateDescription`. Options: openai, gemini');
27
+ process.exit(1);
28
+ }
29
+ console.log(`Generating descriptions using ${provider} model ${model}`);
30
+ events = await generateDescriptions(events, targetDir, llm);
10
31
  }
11
32
  const repoDetails = await getRepoDetails(targetDir, customSourceDetails);
12
33
  generateYamlSchema(events, repoDetails, outputPath);