npm - @flisk/analyze-tracking - Versions diffs - 0.4.0 → 0.5.0 - Mend

@flisk/analyze-tracking 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +3 -1
package/bin/cli.js +44 -3
package/bin/help.js +16 -0
package/package.json +5 -3
package/src/analyze/index.js +4 -4
package/src/fileProcessor.js +1 -0
package/src/generateDescriptions.js +19 -41
package/src/index.js +23 -2

package/README.md CHANGED Viewed

@@ -25,10 +25,12 @@ npx @flisk/analyze-tracking /path/to/project [options]
 ### Key Options:
 - `-g, --generateDescription`: Generate descriptions of fields (default: `false`)
+- `-p, --provider <provider>`: Specify a provider (options: `openai`, `gemini`)
+- `-m, --model <model>`: Specify a model (options: `gpt-4o-mini`, `gemini-2.0-flash-lite-001`)
 - `-o, --output <output_file>`: Name of the output file (default: `tracking-schema.yaml`)
 - `-c, --customFunction <function_name>`: Specify a custom tracking function
-🔑&nbsp; **Important:** you must set the `OPENAI_API_KEY` environment variable to use `generateDescription`
+🔑&nbsp; **Important:** If you are using `generateDescription`, you must set the appropriate credentials for the provider you are using as an environment variable. OpenAI uses `OPENAI_API_KEY` and Google Vertex AI uses `GOOGLE_APPLICATION_CREDENTIALS`.
 <details>
   <summary>Note on Custom Functions 💡</summary>

package/bin/cli.js CHANGED Viewed

@@ -6,6 +6,11 @@ const commandLineUsage = require('command-line-usage');
 const { run } = require('../src/index');
 const { helpContent } = require('./help');
+const SUPPORTED_MODELS = {
+  openai: ['gpt-4o-mini'],
+  gemini: ['gemini-2.0-flash-lite-001'],
+};
 // Parse command-line arguments
 const optionDefinitions = [
   {
@@ -19,6 +24,18 @@ const optionDefinitions = [
     type: Boolean,
     defaultValue: false,
   },
+  {
+    name: 'provider',
+    alias: 'p',
+    type: String,
+    defaultValue: 'openai',
+  },
+  {
+    name: 'model',
+    alias: 'm',
+    type: String,
+    defaultValue: 'gpt-4o-mini',
+  },
   {
     name: 'output',
     alias: 'o',
@@ -55,6 +72,8 @@ const options = commandLineArgs(optionDefinitions);
 const {
   targetDir,
   generateDescription,
+  provider,
+  model,
   output,
   customFunction,
   repositoryUrl,
@@ -81,10 +100,32 @@ if (!targetDir) {
 }
 if (generateDescription) {
-  if (!process.env.OPENAI_API_KEY) {
-    console.error('Please set the `OPENAI_API_KEY` environment variable to use `generateDescription`.');
+  if (!Object.keys(SUPPORTED_MODELS).includes(provider)) {
+    console.error('Please provide a valid provider. Options: openai, gemini');
     process.exit(1);
   }
+  if (provider === 'openai') {
+    if (!SUPPORTED_MODELS.openai.includes(model)) {
+      console.error(`Please provide a valid model for OpenAI. Options: ${SUPPORTED_MODELS.openai.join(', ')}`);
+      process.exit(1);
+    }
+    if (!process.env.OPENAI_API_KEY) {
+      console.error('Please set the `OPENAI_API_KEY` environment variable to use OpenAI for `generateDescription`.');
+      process.exit(1);
+    }
+  }
+  if (provider === 'gemini') {
+    if (!SUPPORTED_MODELS.gemini.includes(model)) {
+      console.error(`Please provide a valid model for Gemini. Options: ${SUPPORTED_MODELS.gemini.join(', ')}`);
+      process.exit(1);
+    }
+    if (!process.env.GOOGLE_APPLICATION_CREDENTIALS) {
+      console.error('Please set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to use Gemini for `generateDescription`.');
+      process.exit(1);
+    }
+  }
 }
-run(path.resolve(targetDir), output, customFunction, customSourceDetails, generateDescription);
+run(path.resolve(targetDir), output, customFunction, customSourceDetails, generateDescription, provider, model);

package/bin/help.js CHANGED Viewed

@@ -46,6 +46,22 @@ const helpContent = [
         defaultValue: false,
         typeLabel: '{underline false}'
       },
+      {
+        name: 'provider',
+        alias: 'p',
+        description: 'Specify a provider (options: {italic openai}, {italic gemini})',
+        type: String,
+        defaultValue: 'openai',
+        typeLabel: '{underline openai}'
+      },
+      {
+        name: 'model',
+        alias: 'm',
+        description: 'Specify a model (options: {italic gpt-4o-mini}, {italic gemini-2.0-flash-lite-001})',
+        type: String,
+        defaultValue: 'gpt-4o-mini',
+        typeLabel: '{underline gpt-4o-mini}'
+      },
       {
         name: 'output',
         alias: 'o',

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@flisk/analyze-tracking",
-  "version": "0.4.0",
+  "version": "0.5.0",
   "description": "Analyzes tracking code in a project and generates data schemas",
   "main": "src/index.js",
   "bin": {
@@ -20,6 +20,9 @@
   },
   "homepage": "https://github.com/fliskdata/analyze-tracking#readme",
   "dependencies": {
+    "@langchain/core": "^0.3.49",
+    "@langchain/google-vertexai": "^0.2.5",
+    "@langchain/openai": "^0.5.7",
     "@ruby/prism": "^1.4.0",
     "@typescript-eslint/parser": "^8.1.0",
     "acorn": "^8.12.1",
@@ -30,9 +33,8 @@
     "command-line-usage": "^7.0.3",
     "isomorphic-git": "^1.27.1",
     "js-yaml": "^4.1.0",
-    "openai": "^4.67.1",
     "typescript": "^5.5.4",
-    "zod": "^3.23.8"
+    "zod": "^3.24.3"
   },
   "devDependencies": {
     "jest": "^29.7.0"

package/src/analyze/index.js CHANGED Viewed

@@ -6,11 +6,11 @@ const { analyzeTsFile } = require('./analyzeTsFile');
 const { analyzeRubyFile } = require('./analyzeRubyFile');
 async function analyzeDirectory(dirPath, customFunction) {
-  const files = getAllFiles(dirPath);
   const allEvents = {};
+  const files = getAllFiles(dirPath);
   const tsFiles = files.filter(file => /\.(tsx?)$/.test(file));
-  const program = ts.createProgram(tsFiles, {
+  const tsProgram = ts.createProgram(tsFiles, {
     target: ts.ScriptTarget.ESNext,
     module: ts.ModuleKind.CommonJS,
   });
@@ -20,12 +20,12 @@ async function analyzeDirectory(dirPath, customFunction) {
     const isJsFile = /\.(jsx?)$/.test(file);
     const isTsFile = /\.(tsx?)$/.test(file);
-    const isRubyFile = /\.(rb|ru|rake|gemspec)$/.test(file);
+    const isRubyFile = /\.(rb)$/.test(file);
     if (isJsFile) {
       events = analyzeJsFile(file, customFunction);
     } else if (isTsFile) {
-      events = analyzeTsFile(file, program, customFunction);
+      events = analyzeTsFile(file, tsProgram, customFunction);
     } else if (isRubyFile) {
       events = await analyzeRubyFile(file);
     } else {

package/src/fileProcessor.js CHANGED Viewed

@@ -26,6 +26,7 @@ function getAllFiles(dirPath, arrayOfFiles = []) {
     if (file === 'coverage') return
     if (file === 'temp') return
     if (file === 'tmp') return
+    if (file === 'log') return
     if (stats.isDirectory()) {
       arrayOfFiles = getAllFiles(fullPath, arrayOfFiles);

package/src/generateDescriptions.js CHANGED Viewed

@@ -1,13 +1,7 @@
 const fs = require('fs');
 const path = require('path');
-const OpenAI = require('openai');
 const { z } = require('zod');
-const { zodResponseFormat } = require('openai/helpers/zod');
-const openai = new OpenAI({
-  apiKey: process.env.OPENAI_API_KEY || 'undefined',
-});
-const model = 'gpt-4o-mini';
+const { PromptTemplate } = require('@langchain/core/prompts');
 function createPrompt(eventName, properties, implementations, codebaseDir) {
   let prompt = `Event Name: "${eventName}"\n\n`;
@@ -107,26 +101,22 @@ function createEventDescriptionSchema(properties) {
   return eventDescriptionSchema;
 }
-async function sendPromptToLLM(prompt, schema) {
+async function sendPromptToLLM(prompt, schema, model) {
   try {
-    const completion = await openai.beta.chat.completions.parse({
-      model,
-      messages: [
-        {
-          role: 'system',
-          content: 'You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations.',
-        },
-        {
-          role: 'user',
-          content: prompt,
-        },
-      ],
-      response_format: zodResponseFormat(schema, 'event_description'),
+    const promptTemplate = new PromptTemplate({
+      template: `You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations.\n{input}`,
+      inputVariables: ['input'],
+    });
+    const formattedPrompt = await promptTemplate.format({
+      input: prompt,
     });
+    const structuredModel = model.withStructuredOutput(schema);
+    const response = await structuredModel.invoke(formattedPrompt);
     return {
-      descriptions: completion.choices[0].message.parsed,
-      usage: completion.usage,
+      descriptions: response,
     };
   } catch (error) {
     console.error('Error during LLM response parsing:', error);
@@ -134,7 +124,7 @@ async function sendPromptToLLM(prompt, schema) {
   }
 }
-async function generateEventDescription(eventName, event, codebaseDir) {
+async function generateEventDescription(eventName, event, codebaseDir, model) {
   const properties = event.properties || {};
   const implementations = event.implementations || [];
@@ -145,31 +135,23 @@ async function generateEventDescription(eventName, event, codebaseDir) {
   const eventDescriptionSchema = createEventDescriptionSchema(properties);
   // Send prompt to the LLM and get the structured response
-  const { descriptions, usage } = await sendPromptToLLM(prompt, eventDescriptionSchema);
+  const { descriptions } = await sendPromptToLLM(prompt, eventDescriptionSchema, model);
-  return { eventName, descriptions, usage };
+  return { eventName, descriptions };
 }
-async function generateDescriptions(events, codebaseDir) {
-  console.log(`Generating descriptions using ${model}`);
+async function generateDescriptions(events, codebaseDir, model) {
   const eventPromises = Object.entries(events).map(([eventName, event]) =>
-    generateEventDescription(eventName, event, codebaseDir)
+    generateEventDescription(eventName, event, codebaseDir, model)
   );
   console.log(`Running ${eventPromises.length} prompts in parallel...`);
   const results = await Promise.all(eventPromises);
-  let promptTokens = 0;
-  let completionTokens = 0;
   // Process results and update the events object
-  results.forEach(({ eventName, descriptions, usage }) => {
+  results.forEach(({ eventName, descriptions }) => {
     if (descriptions) {
-      promptTokens += usage.prompt_tokens;
-      completionTokens += usage.completion_tokens;
       const event = events[eventName];
       event.description = descriptions.eventDescription;
@@ -208,10 +190,6 @@ async function generateDescriptions(events, codebaseDir) {
     }
   });
-  console.log(`Prompt tokens used: ${promptTokens}`);
-  console.log(`Completion tokens used: ${completionTokens}`);
-  console.log(`Total tokens used: ${promptTokens + completionTokens}`);
   return events;
 }

package/src/index.js CHANGED Viewed

@@ -3,10 +3,31 @@ const { getRepoDetails } = require('./repoDetails');
 const { generateYamlSchema } = require('./yamlGenerator');
 const { generateDescriptions } = require('./generateDescriptions');
-async function run(targetDir, outputPath, customFunction, customSourceDetails, generateDescription) {
+const { ChatOpenAI } = require('@langchain/openai');
+const { ChatVertexAI } = require('@langchain/google-vertexai');
+async function run(targetDir, outputPath, customFunction, customSourceDetails, generateDescription, provider, model) {
   let events = await analyzeDirectory(targetDir, customFunction);
   if (generateDescription) {
-    events = await generateDescriptions(events, targetDir);
+    let llm;
+    if (provider === 'openai') {
+      llm = new ChatOpenAI({
+        modelName: model,
+        temperature: 0,
+      });
+    }
+    if (provider === 'gemini') {
+      llm = new ChatVertexAI({
+        modelName: model,
+        temperature: 0,
+      });
+    }
+    if (!llm) {
+      console.error('Please provide a valid AI model provider for `generateDescription`. Options: openai, gemini');
+      process.exit(1);
+    }
+    console.log(`Generating descriptions using ${provider} model ${model}`);
+    events = await generateDescriptions(events, targetDir, llm);
   }
   const repoDetails = await getRepoDetails(targetDir, customSourceDetails);
   generateYamlSchema(events, repoDetails, outputPath);