@flisk/analyze-tracking 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/bin/cli.js +1 -1
  2. package/package.json +9 -7
  3. package/src/analyze/go/astTraversal.js +121 -0
  4. package/src/analyze/go/constants.js +20 -0
  5. package/src/analyze/go/eventDeduplicator.js +47 -0
  6. package/src/analyze/go/eventExtractor.js +156 -0
  7. package/src/analyze/go/goAstParser/constants.js +39 -0
  8. package/src/analyze/go/goAstParser/expressionParser.js +281 -0
  9. package/src/analyze/go/goAstParser/index.js +52 -0
  10. package/src/analyze/go/goAstParser/statementParser.js +387 -0
  11. package/src/analyze/go/goAstParser/tokenizer.js +196 -0
  12. package/src/analyze/go/goAstParser/typeParser.js +202 -0
  13. package/src/analyze/go/goAstParser/utils.js +99 -0
  14. package/src/analyze/go/index.js +55 -0
  15. package/src/analyze/go/propertyExtractor.js +670 -0
  16. package/src/analyze/go/trackingDetector.js +71 -0
  17. package/src/analyze/go/trackingExtractor.js +54 -0
  18. package/src/analyze/go/typeContext.js +88 -0
  19. package/src/analyze/go/utils.js +215 -0
  20. package/src/analyze/index.js +11 -6
  21. package/src/analyze/javascript/constants.js +115 -0
  22. package/src/analyze/javascript/detectors/analytics-source.js +119 -0
  23. package/src/analyze/javascript/detectors/index.js +10 -0
  24. package/src/analyze/javascript/extractors/event-extractor.js +179 -0
  25. package/src/analyze/javascript/extractors/index.js +13 -0
  26. package/src/analyze/javascript/extractors/property-extractor.js +172 -0
  27. package/src/analyze/javascript/index.js +38 -0
  28. package/src/analyze/javascript/parser.js +126 -0
  29. package/src/analyze/javascript/utils/function-finder.js +123 -0
  30. package/src/analyze/python/index.js +111 -0
  31. package/src/analyze/python/pythonTrackingAnalyzer.py +814 -0
  32. package/src/analyze/ruby/detectors.js +46 -0
  33. package/src/analyze/ruby/extractors.js +258 -0
  34. package/src/analyze/ruby/index.js +51 -0
  35. package/src/analyze/ruby/traversal.js +123 -0
  36. package/src/analyze/ruby/types.js +30 -0
  37. package/src/analyze/ruby/visitor.js +66 -0
  38. package/src/analyze/typescript/constants.js +109 -0
  39. package/src/analyze/typescript/detectors/analytics-source.js +120 -0
  40. package/src/analyze/typescript/detectors/index.js +10 -0
  41. package/src/analyze/typescript/extractors/event-extractor.js +269 -0
  42. package/src/analyze/typescript/extractors/index.js +14 -0
  43. package/src/analyze/typescript/extractors/property-extractor.js +395 -0
  44. package/src/analyze/typescript/index.js +48 -0
  45. package/src/analyze/typescript/parser.js +131 -0
  46. package/src/analyze/typescript/utils/function-finder.js +114 -0
  47. package/src/analyze/typescript/utils/type-resolver.js +193 -0
  48. package/src/generateDescriptions/index.js +81 -0
  49. package/src/generateDescriptions/llmUtils.js +33 -0
  50. package/src/generateDescriptions/promptUtils.js +62 -0
  51. package/src/generateDescriptions/schemaUtils.js +61 -0
  52. package/src/index.js +7 -2
  53. package/src/{fileProcessor.js → utils/fileProcessor.js} +5 -0
  54. package/src/{repoDetails.js → utils/repoDetails.js} +5 -0
  55. package/src/{yamlGenerator.js → utils/yamlGenerator.js} +5 -0
  56. package/src/analyze/analyzeGoFile.js +0 -1164
  57. package/src/analyze/analyzeJsFile.js +0 -87
  58. package/src/analyze/analyzePythonFile.js +0 -42
  59. package/src/analyze/analyzeRubyFile.js +0 -419
  60. package/src/analyze/analyzeTsFile.js +0 -192
  61. package/src/analyze/go2json.js +0 -1069
  62. package/src/analyze/helpers.js +0 -656
  63. package/src/analyze/pythonTrackingAnalyzer.py +0 -541
  64. package/src/generateDescriptions.js +0 -196
@@ -0,0 +1,193 @@
1
+ /**
2
+ * @fileoverview Utilities for resolving TypeScript types and identifiers
3
+ * @module analyze/typescript/utils/type-resolver
4
+ */
5
+
6
+ const ts = require('typescript');
7
+
8
+ /**
9
+ * Resolves an identifier to its initializer node
10
+ * @param {Object} checker - TypeScript type checker
11
+ * @param {Object} identifier - Identifier node to resolve
12
+ * @param {Object} sourceFile - Source file containing the identifier
13
+ * @returns {Object|null} Initializer node or null
14
+ */
15
+ function resolveIdentifierToInitializer(checker, identifier, sourceFile) {
16
+ try {
17
+ const symbol = checker.getSymbolAtLocation(identifier);
18
+ if (!symbol || !symbol.valueDeclaration) {
19
+ return null;
20
+ }
21
+
22
+ const declaration = symbol.valueDeclaration;
23
+
24
+ // Handle variable declarations
25
+ if (ts.isVariableDeclaration(declaration) && declaration.initializer) {
26
+ return declaration.initializer;
27
+ }
28
+
29
+ // Handle property assignments
30
+ if (ts.isPropertyAssignment(declaration) && declaration.initializer) {
31
+ return declaration.initializer;
32
+ }
33
+
34
+ // Handle parameter with default value
35
+ if (ts.isParameter(declaration) && declaration.initializer) {
36
+ return declaration.initializer;
37
+ }
38
+
39
+ return null;
40
+ } catch (error) {
41
+ return null;
42
+ }
43
+ }
44
+
45
+ /**
46
+ * Gets the type string for a node
47
+ * @param {Object} checker - TypeScript type checker
48
+ * @param {Object} node - AST node
49
+ * @returns {string} Type string
50
+ */
51
+ function getTypeOfNode(checker, node) {
52
+ try {
53
+ const type = checker.getTypeAtLocation(node);
54
+ return checker.typeToString(type);
55
+ } catch (error) {
56
+ return 'any';
57
+ }
58
+ }
59
+
60
+ /**
61
+ * Resolves a type string to its properties structure
62
+ * @param {Object} checker - TypeScript type checker
63
+ * @param {string} typeString - Type string to resolve
64
+ * @param {Set} [visitedTypes] - Set of visited types to prevent cycles
65
+ * @returns {Object} Resolved type structure
66
+ */
67
+ function resolveTypeToProperties(checker, typeString, visitedTypes = new Set()) {
68
+ // Prevent infinite recursion for circular references
69
+ if (visitedTypes.has(typeString)) {
70
+ return { type: 'object' };
71
+ }
72
+
73
+ // Handle primitive types
74
+ if (['string', 'number', 'boolean', 'any', 'unknown', 'null', 'undefined', 'void', 'never'].includes(typeString)) {
75
+ return { type: typeString };
76
+ }
77
+
78
+ // Handle array types: T[] or Array<T>
79
+ const arrayMatch = typeString.match(/^(.+)\[\]$/) || typeString.match(/^Array<(.+)>$/);
80
+ if (arrayMatch) {
81
+ const elementType = arrayMatch[1].trim();
82
+ visitedTypes.add(typeString);
83
+ const elementProps = resolveTypeToProperties(checker, elementType, visitedTypes);
84
+ return {
85
+ type: 'array',
86
+ items: elementProps
87
+ };
88
+ }
89
+
90
+ // Handle readonly array types: readonly T[] or ReadonlyArray<T>
91
+ const readonlyArrayMatch = typeString.match(/^readonly (.+)\[\]$/) || typeString.match(/^ReadonlyArray<(.+)>$/);
92
+ if (readonlyArrayMatch) {
93
+ const elementType = readonlyArrayMatch[1].trim();
94
+ visitedTypes.add(typeString);
95
+ const elementProps = resolveTypeToProperties(checker, elementType, visitedTypes);
96
+ return {
97
+ type: 'array',
98
+ items: elementProps
99
+ };
100
+ }
101
+
102
+ // Handle union types - preserve them as-is
103
+ if (typeString.includes('|')) {
104
+ return { type: typeString };
105
+ }
106
+
107
+ // Handle intersection types
108
+ if (typeString.includes('&')) {
109
+ // For simplicity, mark intersection types as 'object'
110
+ return { type: 'object' };
111
+ }
112
+
113
+ // Check if it looks like a custom type/interface
114
+ if (isCustomType(typeString)) {
115
+ return {
116
+ type: 'object',
117
+ __unresolved: typeString
118
+ };
119
+ }
120
+
121
+ // Default case - preserve the type string as-is
122
+ return { type: typeString };
123
+ }
124
+
125
+ /**
126
+ * Checks if a type string represents a custom type or interface
127
+ * @param {string} typeString - Type string to check
128
+ * @returns {boolean}
129
+ */
130
+ function isCustomType(typeString) {
131
+ // Custom types typically start with uppercase and don't contain certain characters
132
+ return typeString[0] === typeString[0].toUpperCase() &&
133
+ !typeString.includes('<') &&
134
+ !typeString.includes('|') &&
135
+ !typeString.includes('&') &&
136
+ !typeString.includes('(') &&
137
+ !typeString.includes('[');
138
+ }
139
+
140
+ /**
141
+ * Gets the basic type of an array element
142
+ * @param {Object} checker - TypeScript type checker
143
+ * @param {Object} element - Array element node
144
+ * @returns {string} Basic type string
145
+ */
146
+ function getBasicTypeOfArrayElement(checker, element) {
147
+ if (!element) return 'any';
148
+
149
+ // Check for literal values first
150
+ if (ts.isStringLiteral(element)) {
151
+ return 'string';
152
+ } else if (ts.isNumericLiteral(element)) {
153
+ return 'number';
154
+ } else if (element.kind === ts.SyntaxKind.TrueKeyword || element.kind === ts.SyntaxKind.FalseKeyword) {
155
+ return 'boolean';
156
+ } else if (ts.isObjectLiteralExpression(element)) {
157
+ return 'object';
158
+ } else if (ts.isArrayLiteralExpression(element)) {
159
+ return 'array';
160
+ } else if (element.kind === ts.SyntaxKind.NullKeyword) {
161
+ return 'null';
162
+ } else if (element.kind === ts.SyntaxKind.UndefinedKeyword) {
163
+ return 'undefined';
164
+ }
165
+
166
+ // For identifiers and other expressions, try to get the type
167
+ const typeString = getTypeOfNode(checker, element);
168
+
169
+ // Extract basic type from TypeScript type string
170
+ if (typeString.startsWith('"') || typeString.startsWith("'")) {
171
+ return 'string'; // String literal type
172
+ } else if (!isNaN(Number(typeString))) {
173
+ return 'number'; // Numeric literal type
174
+ } else if (typeString === 'true' || typeString === 'false') {
175
+ return 'boolean'; // Boolean literal type
176
+ } else if (typeString.includes('[]') || typeString.startsWith('Array<')) {
177
+ return 'array';
178
+ } else if (['string', 'number', 'boolean', 'object', 'null', 'undefined'].includes(typeString)) {
179
+ return typeString;
180
+ } else if (isCustomType(typeString)) {
181
+ return 'object';
182
+ }
183
+
184
+ return 'any';
185
+ }
186
+
187
+ module.exports = {
188
+ resolveIdentifierToInitializer,
189
+ getTypeOfNode,
190
+ resolveTypeToProperties,
191
+ isCustomType,
192
+ getBasicTypeOfArrayElement
193
+ };
@@ -0,0 +1,81 @@
1
+ /**
2
+ * @fileoverview AI-powered description generator for analytics events
3
+ * @module analyze-tracking/generateDescriptions
4
+ */
5
+
6
+ const { createPrompt } = require('./promptUtils');
7
+ const { createEventDescriptionSchema } = require('./schemaUtils');
8
+ const { sendPromptToLLM } = require('./llmUtils');
9
+
10
+ async function generateEventDescription(eventName, event, codebaseDir, model) {
11
+ const properties = event.properties || {};
12
+ const implementations = event.implementations || [];
13
+
14
+ // Create prompt for the LLM
15
+ const prompt = createPrompt(eventName, properties, implementations, codebaseDir);
16
+
17
+ // Define the output schema using Zod
18
+ const eventDescriptionSchema = createEventDescriptionSchema(properties);
19
+
20
+ // Send prompt to the LLM and get the structured response
21
+ const result = await sendPromptToLLM(prompt, eventDescriptionSchema, model);
22
+
23
+ return { eventName, descriptions: result?.descriptions || null };
24
+ }
25
+
26
+ async function generateDescriptions(events, codebaseDir, model) {
27
+ const eventPromises = Object.entries(events).map(([eventName, event]) =>
28
+ generateEventDescription(eventName, event, codebaseDir, model)
29
+ );
30
+
31
+ console.info(`Running ${eventPromises.length} prompts in parallel...`);
32
+
33
+ const results = await Promise.all(eventPromises);
34
+
35
+ // Process results and update the events object
36
+ results.forEach(({ eventName, descriptions }) => {
37
+ if (descriptions) {
38
+ const event = events[eventName];
39
+ event.description = descriptions.eventDescription;
40
+
41
+ // Update property descriptions recursively
42
+ function updatePropertyDescriptions(eventProperties, descriptionProperties) {
43
+ for (const propName in descriptionProperties) {
44
+ if (eventProperties[propName]) {
45
+ eventProperties[propName].description = descriptionProperties[propName].description;
46
+ if (eventProperties[propName].properties && descriptionProperties[propName].properties) {
47
+ updatePropertyDescriptions(
48
+ eventProperties[propName].properties,
49
+ descriptionProperties[propName].properties
50
+ );
51
+ }
52
+ }
53
+ }
54
+ }
55
+
56
+ updatePropertyDescriptions(event.properties, descriptions.properties);
57
+
58
+ // Update implementations with descriptions
59
+ for (let i = 0; i < descriptions.implementations.length; i++) {
60
+ if (event.implementations[i]) {
61
+ if (
62
+ event.implementations[i].path === descriptions.implementations[i].path &&
63
+ event.implementations[i].line === descriptions.implementations[i].line
64
+ ) {
65
+ event.implementations[i].description = descriptions.implementations[i].description;
66
+ } else {
67
+ console.error(`Returned implementation description does not match path or line for event: ${eventName}`);
68
+ }
69
+ }
70
+ }
71
+ } else {
72
+ console.error(`Failed to get description for event: ${eventName}`);
73
+ }
74
+ });
75
+
76
+ return events;
77
+ }
78
+
79
+ module.exports = {
80
+ generateDescriptions
81
+ };
@@ -0,0 +1,33 @@
1
+ /**
2
+ * @fileoverview LLM integration utilities for generating structured responses
3
+ * @module analyze-tracking/generateDescriptions/llmUtils
4
+ */
5
+
6
+ const { PromptTemplate } = require('@langchain/core/prompts');
7
+
8
+ async function sendPromptToLLM(prompt, schema, model) {
9
+ try {
10
+ const promptTemplate = new PromptTemplate({
11
+ template: `You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations.\n{input}`,
12
+ inputVariables: ['input'],
13
+ });
14
+
15
+ const formattedPrompt = await promptTemplate.format({
16
+ input: prompt,
17
+ });
18
+
19
+ const structuredModel = model.withStructuredOutput(schema);
20
+ const response = await structuredModel.invoke(formattedPrompt);
21
+
22
+ return {
23
+ descriptions: response,
24
+ };
25
+ } catch (error) {
26
+ console.error('Error during LLM response parsing:', error.message);
27
+ return null;
28
+ }
29
+ }
30
+
31
+ module.exports = {
32
+ sendPromptToLLM
33
+ };
@@ -0,0 +1,62 @@
1
+ /**
2
+ * @fileoverview Utilities for creating LLM prompts with code snippets
3
+ * @module analyze-tracking/generateDescriptions/promptUtils
4
+ */
5
+
6
+ const fs = require('fs');
7
+ const path = require('path');
8
+
9
+ function createPrompt(eventName, properties, implementations, codebaseDir) {
10
+ let prompt = `Event Name: "${eventName}"\n\n`;
11
+ prompt += `Properties:\n`;
12
+
13
+ function appendPropertiesToPrompt(properties, indent = '') {
14
+ for (const propName in properties) {
15
+ const prop = properties[propName];
16
+ prompt += `${indent}- "${propName}" (type: ${prop.type})\n`;
17
+ if (prop.properties) {
18
+ prompt += `${indent} Sub-properties:\n`;
19
+ appendPropertiesToPrompt(prop.properties, indent + ' ');
20
+ }
21
+ }
22
+ }
23
+
24
+ appendPropertiesToPrompt(properties);
25
+
26
+ // Add implementations with code snippets
27
+ prompt += `\nImplementations:\n`;
28
+ for (const impl of implementations) {
29
+ const codeSnippet = getCodeSnippet(path.join(codebaseDir, impl.path), impl.line);
30
+ prompt += `- Path: "${impl.path}", Line: ${impl.line}, Function: "${impl.function}", Destination: "${impl.destination}"\n`;
31
+ prompt += `Code Snippet:\n`;
32
+ prompt += '```\n';
33
+ prompt += codeSnippet + '\n';
34
+ prompt += '```\n';
35
+ }
36
+
37
+ return prompt;
38
+ }
39
+
40
+ function getCodeSnippet(filePath, lineNumber, contextLines = 5) {
41
+ // Extract a code snippet from the file around the specified line
42
+ try {
43
+ const fileContent = fs.readFileSync(filePath, 'utf8');
44
+ const lines = fileContent.split('\n');
45
+ const startLine = Math.max(0, lineNumber - contextLines - 1);
46
+ const endLine = Math.min(lines.length, lineNumber + contextLines);
47
+
48
+ const snippetLines = lines.slice(startLine, endLine);
49
+ return snippetLines.join('\n');
50
+ } catch (e) {
51
+ // Only log errors if not in test mode
52
+ if (process.env.NODE_ENV !== 'test' && !process.env.NODE_TEST_CONTEXT) {
53
+ console.error(`Failed to read file ${filePath}:`, e);
54
+ }
55
+ return '';
56
+ }
57
+ }
58
+
59
+ module.exports = {
60
+ createPrompt,
61
+ getCodeSnippet
62
+ };
@@ -0,0 +1,61 @@
1
+ /**
2
+ * @fileoverview Zod schema utilities for structuring event descriptions
3
+ * @module analyze-tracking/generateDescriptions/schemaUtils
4
+ */
5
+
6
+ const { z } = require('zod');
7
+
8
+ function createEventDescriptionSchema(properties) {
9
+ function buildPropertySchema(prop) {
10
+ if (prop.properties) {
11
+ const subPropertiesSchema = {};
12
+ for (const subPropName in prop.properties) {
13
+ subPropertiesSchema[subPropName] = buildPropertySchema(prop.properties[subPropName]);
14
+ }
15
+ return z.object({
16
+ description: z
17
+ .string()
18
+ .describe('A maximum of 10 words describing the property and what it means'),
19
+ properties: z.object(subPropertiesSchema),
20
+ });
21
+ } else {
22
+ return z.object({
23
+ description: z
24
+ .string()
25
+ .describe('A maximum of 10 words describing the property and what it means'),
26
+ });
27
+ }
28
+ }
29
+
30
+ // Define the schema for properties
31
+ const propertiesSchema = {};
32
+ for (const propName in properties) {
33
+ propertiesSchema[propName] = buildPropertySchema(properties[propName]);
34
+ }
35
+
36
+ // Define the schema for implementations
37
+ const implementationsSchema = z.array(
38
+ z.object({
39
+ description: z
40
+ .string()
41
+ .describe('A maximum of 10 words describing how this event is triggered without using the word "triggered"'),
42
+ path: z.string(),
43
+ line: z.number(),
44
+ })
45
+ );
46
+
47
+ // Construct the full schema
48
+ const eventDescriptionSchema = z.object({
49
+ eventDescription: z
50
+ .string()
51
+ .describe('A maximum of 10 words describing the event and what it tracks without using the word "tracks"'),
52
+ properties: z.object(propertiesSchema),
53
+ implementations: implementationsSchema,
54
+ });
55
+
56
+ return eventDescriptionSchema;
57
+ }
58
+
59
+ module.exports = {
60
+ createEventDescriptionSchema
61
+ };
package/src/index.js CHANGED
@@ -1,6 +1,11 @@
1
+ /**
2
+ * @fileoverview Main entry point for the analytics tracking analyzer
3
+ * @module analyze-tracking
4
+ */
5
+
1
6
  const { analyzeDirectory } = require('./analyze');
2
- const { getRepoDetails } = require('./repoDetails');
3
- const { generateYamlSchema } = require('./yamlGenerator');
7
+ const { getRepoDetails } = require('./utils/repoDetails');
8
+ const { generateYamlSchema } = require('./utils/yamlGenerator');
4
9
  const { generateDescriptions } = require('./generateDescriptions');
5
10
 
6
11
  const { ChatOpenAI } = require('@langchain/openai');
@@ -1,3 +1,8 @@
1
+ /**
2
+ * @fileoverview File system utilities for recursively reading directories
3
+ * @module analyze-tracking/utils/fileProcessor
4
+ */
5
+
1
6
  const fs = require('fs');
2
7
  const path = require('path');
3
8
 
@@ -1,3 +1,8 @@
1
+ /**
2
+ * @fileoverview Git repository utilities for extracting commit and repository information
3
+ * @module analyze-tracking/utils/repoDetails
4
+ */
5
+
1
6
  const fs = require('fs');
2
7
  const git = require('isomorphic-git');
3
8
  const { execSync } = require('child_process');
@@ -1,3 +1,8 @@
1
+ /**
2
+ * @fileoverview YAML schema generator for analytics tracking events
3
+ * @module analyze-tracking/utils/yamlGenerator
4
+ */
5
+
1
6
  const fs = require('fs');
2
7
  const yaml = require('js-yaml');
3
8