@flisk/analyze-tracking 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +35 -61
  2. package/bin/cli.js +1 -1
  3. package/package.json +18 -3
  4. package/src/analyze/go/astTraversal.js +121 -0
  5. package/src/analyze/go/constants.js +20 -0
  6. package/src/analyze/go/eventDeduplicator.js +47 -0
  7. package/src/analyze/go/eventExtractor.js +156 -0
  8. package/src/analyze/go/goAstParser/constants.js +39 -0
  9. package/src/analyze/go/goAstParser/expressionParser.js +281 -0
  10. package/src/analyze/go/goAstParser/index.js +52 -0
  11. package/src/analyze/go/goAstParser/statementParser.js +387 -0
  12. package/src/analyze/go/goAstParser/tokenizer.js +196 -0
  13. package/src/analyze/go/goAstParser/typeParser.js +202 -0
  14. package/src/analyze/go/goAstParser/utils.js +99 -0
  15. package/src/analyze/go/index.js +55 -0
  16. package/src/analyze/go/propertyExtractor.js +670 -0
  17. package/src/analyze/go/trackingDetector.js +71 -0
  18. package/src/analyze/go/trackingExtractor.js +54 -0
  19. package/src/analyze/go/typeContext.js +88 -0
  20. package/src/analyze/go/utils.js +215 -0
  21. package/src/analyze/index.js +11 -7
  22. package/src/analyze/javascript/constants.js +115 -0
  23. package/src/analyze/javascript/detectors/analytics-source.js +119 -0
  24. package/src/analyze/javascript/detectors/index.js +10 -0
  25. package/src/analyze/javascript/extractors/event-extractor.js +179 -0
  26. package/src/analyze/javascript/extractors/index.js +13 -0
  27. package/src/analyze/javascript/extractors/property-extractor.js +172 -0
  28. package/src/analyze/javascript/index.js +38 -0
  29. package/src/analyze/javascript/parser.js +126 -0
  30. package/src/analyze/javascript/utils/function-finder.js +123 -0
  31. package/src/analyze/python/index.js +111 -0
  32. package/src/analyze/python/pythonTrackingAnalyzer.py +814 -0
  33. package/src/analyze/ruby/detectors.js +46 -0
  34. package/src/analyze/ruby/extractors.js +258 -0
  35. package/src/analyze/ruby/index.js +51 -0
  36. package/src/analyze/ruby/traversal.js +123 -0
  37. package/src/analyze/ruby/types.js +30 -0
  38. package/src/analyze/ruby/visitor.js +66 -0
  39. package/src/analyze/typescript/constants.js +109 -0
  40. package/src/analyze/typescript/detectors/analytics-source.js +120 -0
  41. package/src/analyze/typescript/detectors/index.js +10 -0
  42. package/src/analyze/typescript/extractors/event-extractor.js +269 -0
  43. package/src/analyze/typescript/extractors/index.js +14 -0
  44. package/src/analyze/typescript/extractors/property-extractor.js +395 -0
  45. package/src/analyze/typescript/index.js +48 -0
  46. package/src/analyze/typescript/parser.js +131 -0
  47. package/src/analyze/typescript/utils/function-finder.js +114 -0
  48. package/src/analyze/typescript/utils/type-resolver.js +193 -0
  49. package/src/generateDescriptions/index.js +81 -0
  50. package/src/generateDescriptions/llmUtils.js +33 -0
  51. package/src/generateDescriptions/promptUtils.js +62 -0
  52. package/src/generateDescriptions/schemaUtils.js +61 -0
  53. package/src/index.js +7 -2
  54. package/src/{fileProcessor.js → utils/fileProcessor.js} +5 -0
  55. package/src/{repoDetails.js → utils/repoDetails.js} +5 -0
  56. package/src/{yamlGenerator.js → utils/yamlGenerator.js} +5 -0
  57. package/.github/workflows/npm-publish.yml +0 -33
  58. package/.github/workflows/pr-check.yml +0 -17
  59. package/jest.config.js +0 -7
  60. package/src/analyze/analyzeGoFile.js +0 -1164
  61. package/src/analyze/analyzeJsFile.js +0 -72
  62. package/src/analyze/analyzePythonFile.js +0 -41
  63. package/src/analyze/analyzeRubyFile.js +0 -409
  64. package/src/analyze/analyzeTsFile.js +0 -69
  65. package/src/analyze/go2json.js +0 -1069
  66. package/src/analyze/helpers.js +0 -217
  67. package/src/analyze/pythonTrackingAnalyzer.py +0 -439
  68. package/src/generateDescriptions.js +0 -196
  69. package/tests/detectSource.test.js +0 -20
  70. package/tests/extractProperties.test.js +0 -109
  71. package/tests/findWrappingFunction.test.js +0 -30
@@ -0,0 +1,196 @@
1
+ const { SIGIL, DOT, WHITESPACE, NEWLINE, NUMBER, QUOTE } = require('./constants');
2
+
3
+ /**
4
+ * Convert Go source code to tokens
5
+ * @param {string} src - Go source code
6
+ * @returns {Array<Object>} Array of token objects with tag, value, line, and col properties
7
+ */
8
+ function tokenize(src) {
9
+ let ident = "";
10
+ let isNum = false;
11
+ let tokens = [];
12
+ let i = 0;
13
+ let line = 1;
14
+ let col = 1;
15
+ let sigilset = Array.from(new Set(SIGIL.join("")));
16
+
17
+ /**
18
+ * Add a newline token if the last token is not already a newline
19
+ */
20
+ function newlineMaybe() {
21
+ if (tokens.length && tokens[tokens.length - 1].tag != "newline") {
22
+ tokens.push({ tag: "newline", value: "\n", line: line, col: col });
23
+ }
24
+ }
25
+
26
+ /**
27
+ * Push the current identifier/number to tokens
28
+ */
29
+ function pushIdent() {
30
+ if (ident.length) {
31
+ tokens.push({ tag: isNum ? "number" : "ident", value: ident, line: line, col: col - ident.length });
32
+ ident = "";
33
+ isNum = false;
34
+ }
35
+ }
36
+
37
+ /**
38
+ * Update line and column position based on character
39
+ * @param {string} char - Character being processed
40
+ */
41
+ function advancePosition(char) {
42
+ if (NEWLINE.includes(char)) {
43
+ line++;
44
+ col = 1;
45
+ } else {
46
+ col++;
47
+ }
48
+ }
49
+
50
+ while (i < src.length) {
51
+ if (WHITESPACE.includes(src[i])) {
52
+ pushIdent();
53
+ advancePosition(src[i]);
54
+ i++;
55
+ } else if (NEWLINE.includes(src[i])) {
56
+ pushIdent();
57
+ newlineMaybe();
58
+ advancePosition(src[i]);
59
+ i++;
60
+ } else if (src[i] == "/" && src[i + 1] == "/") {
61
+ // Single-line comment
62
+ var cmt = "";
63
+ while (src[i] != "\n" && i < src.length) {
64
+ cmt += src[i];
65
+ advancePosition(src[i]);
66
+ i++;
67
+ }
68
+ // tokens.push({tag:"comment",value:cmt});
69
+ newlineMaybe();
70
+ if (i < src.length) {
71
+ advancePosition(src[i]);
72
+ i++;
73
+ }
74
+ } else if (src[i] == "/" && src[i + 1] == "*") {
75
+ // Multi-line comment
76
+ advancePosition(src[i]);
77
+ advancePosition(src[i + 1]);
78
+ i += 2;
79
+ let lvl = 0;
80
+ while (true) {
81
+ if (i > src.length * 2) {
82
+ throw "Unexpected EOF";
83
+ }
84
+ if (src[i - 1] == "/" && src[i] == "*") {
85
+ lvl++;
86
+ }
87
+ if (src[i - 1] == "*" && src[i] == "/") {
88
+ if (!lvl) {
89
+ advancePosition(src[i]);
90
+ i++;
91
+ break;
92
+ }
93
+ lvl--;
94
+ }
95
+ advancePosition(src[i]);
96
+ i++;
97
+ }
98
+ } else if (QUOTE.includes(src[i])) {
99
+ // String/char literal
100
+ let startLine = line;
101
+ let startCol = col;
102
+ let j = i + 1;
103
+ advancePosition(src[i]); // advance for opening quote
104
+ while (true) {
105
+ if (src[j] == "\\") {
106
+ advancePosition(src[j]);
107
+ j++;
108
+ if (j < src.length) {
109
+ advancePosition(src[j]);
110
+ j++;
111
+ }
112
+ } else if (src[j] == src[i]) {
113
+ advancePosition(src[j]); // advance for closing quote
114
+ break;
115
+ } else {
116
+ advancePosition(src[j]);
117
+ j++;
118
+ }
119
+ }
120
+ j++;
121
+ tokens.push({ tag: src[i] == "'" ? "char" : "string", value: src.slice(i, j), line: startLine, col: startCol });
122
+ i = j;
123
+ } else if (src[i] == "." && src[i + 1] == "." && src[i + 2] == ".") {
124
+ // Ellipsis
125
+ pushIdent();
126
+ tokens.push({ tag: "sigil", value: "...", line: line, col: col });
127
+ advancePosition(src[i]);
128
+ advancePosition(src[i + 1]);
129
+ advancePosition(src[i + 2]);
130
+ i += 3;
131
+ } else if (sigilset.includes(src[i])) {
132
+ // Handle sigils
133
+ if (src[i] == "-" || src[i] == "+") { // e.g. 1e+8 1E-9
134
+ if (isNum && ident[ident.length - 1] == "e" || ident[ident.length - 1] == "E") {
135
+ ident += src[i];
136
+ advancePosition(src[i]);
137
+ i++;
138
+ continue;
139
+ }
140
+ }
141
+ pushIdent();
142
+ let done = false;
143
+ for (var j = 0; j < SIGIL.length; j++) {
144
+ let l = SIGIL[j].length;
145
+ let ok = true;
146
+ for (var k = 0; k < l; k++) {
147
+ if (src[i + k] != SIGIL[j][k]) {
148
+ ok = false;
149
+ break;
150
+ }
151
+ }
152
+ if (ok) {
153
+ tokens.push({ tag: "sigil", value: SIGIL[j], line: line, col: col });
154
+ for (let k = 0; k < l; k++) {
155
+ advancePosition(src[i + k]);
156
+ }
157
+ i += l;
158
+ done = true;
159
+ break;
160
+ }
161
+ }
162
+ } else if (DOT.includes(src[i])) {
163
+ // Handle dot
164
+ if (isNum) {
165
+ ident += src[i];
166
+ advancePosition(src[i]);
167
+ i++;
168
+ } else {
169
+ pushIdent();
170
+ tokens.push({ tag: "sigil", value: DOT, line: line, col: col });
171
+ advancePosition(src[i]);
172
+ i++;
173
+ }
174
+ } else if (NUMBER.includes(src[i])) {
175
+ // Handle numbers
176
+ if (ident.length == 0) {
177
+ isNum = true;
178
+ }
179
+ ident += src[i];
180
+ advancePosition(src[i]);
181
+ i++;
182
+ } else {
183
+ // Handle identifiers
184
+ ident += src[i];
185
+ advancePosition(src[i]);
186
+ i++;
187
+ }
188
+ }
189
+ pushIdent();
190
+ newlineMaybe();
191
+ return tokens;
192
+ }
193
+
194
+ module.exports = {
195
+ tokenize
196
+ };
@@ -0,0 +1,202 @@
1
+ const { tillNestEndImpl, splitTokensBy } = require('./utils');
2
+
3
+ /**
4
+ * Parse a type from tokens
5
+ * @param {Array<Object>} toks - Tokens representing a type
6
+ * @returns {Object} AST node representing the type
7
+ */
8
+ function parseType(toks) {
9
+ if (toks.length == 1) {
10
+ return { tag: toks[0].value };
11
+ }
12
+
13
+ let i = 0;
14
+ while (i < toks.length) {
15
+ function tillNestEnd(l, r) {
16
+ let [j, tk] = tillNestEndImpl(toks, i, l, r);
17
+ i = j;
18
+ return tk;
19
+ }
20
+
21
+ if (toks[i].value == "[") {
22
+ // Array type
23
+ let typ = { tag: "array", size: null, item: null };
24
+ i++;
25
+ typ.size = parseExpr(tillNestEnd("[", "]"));
26
+ i++;
27
+ typ.item = parseType(toks.slice(i));
28
+ return typ;
29
+ } else if (toks[i].value == "...") {
30
+ // Variadic type
31
+ let typ = { tag: "rest", item: null };
32
+ i++;
33
+ typ.item = parseType(toks.slice(i));
34
+ return typ;
35
+ } else if (toks[i].value == "*") {
36
+ // Pointer type
37
+ return { tag: "ptr", item: parseType(toks.slice(i + 1)) };
38
+ } else if (toks[i].value == "map") {
39
+ // Map type
40
+ let typ = { tag: "map", key: null, value: null };
41
+ i += 2;
42
+ let te = tillNestEnd("[", "]");
43
+ typ.key = parseType(te);
44
+ i++;
45
+ typ.value = parseType(toks.slice(i));
46
+ return typ;
47
+ } else if (toks[i].value == "func") {
48
+ // Function type
49
+ return { tag: "lambda", ...parseFuncSig(toks.slice(i + 1)) };
50
+ } else if (toks[i].value == "interface") {
51
+ // Interface type
52
+ return { tag: "interface" };
53
+ } else if (toks[i].value == "<-" && toks[i + 1].value == "chan") {
54
+ // Receive-only channel
55
+ return { tag: "chan", item: parseType(toks.slice(i + 2)), mode: 'i' };
56
+ } else if (toks[i].value == "chan" && toks[i + 1].value == "<-") {
57
+ // Send-only channel
58
+ return { tag: "chan", item: parseType(toks.slice(i + 2)), mode: 'o' };
59
+ } else if (toks[i].value == "chan") {
60
+ // Bidirectional channel
61
+ return { tag: "chan", item: parseType(toks.slice(i + 1)), mode: 'io' };
62
+ } else if (toks[i + 1] && toks[i + 1].value == ".") {
63
+ // Namespaced type
64
+ return { tag: "namespaced", namespace: toks[i].value, item: parseType(toks.slice(i + 2)) };
65
+ }
66
+ }
67
+ }
68
+
69
+ /**
70
+ * Parse return types from tokens
71
+ * @param {Array<Object>} toks - Tokens containing return types
72
+ * @returns {Array<Object>} Array of return type objects with name and type properties
73
+ */
74
+ function parseRetTypes(toks) {
75
+ let items = splitTokensBy(toks, ",");
76
+ let simple = true;
77
+
78
+ for (let j = 0; j < items.length; j++) {
79
+ if (items[j].length != 1) {
80
+ if (items[j][0].value != "map" && items[j][0].value != "[" && items[j][0].value != "*") {
81
+ simple = false;
82
+ }
83
+ break;
84
+ }
85
+ }
86
+
87
+ if (simple) {
88
+ return items.map(x => ({ name: null, type: parseType(x) }));
89
+ }
90
+
91
+ let ret = items.map(x => ({}));
92
+ for (let j = items.length - 1; j >= 0; j--) {
93
+ let name = items[j][0].value;
94
+ let type = items[j].slice(1);
95
+ if (!type.length) {
96
+ type = ret[j + 1].type;
97
+ } else {
98
+ type = parseType(type);
99
+ }
100
+ ret[j].name = name;
101
+ ret[j].type = type;
102
+ }
103
+ return ret;
104
+ }
105
+
106
+ /**
107
+ * Parse function arguments
108
+ * @param {Array<Object>} toks - Tokens containing arguments
109
+ * @returns {Array<Object>} Array of argument objects with name and type
110
+ */
111
+ function parseArgs(toks) {
112
+ let args = [];
113
+ let i = 0;
114
+ let lvl = 0;
115
+
116
+ while (i < toks.length) {
117
+ let arg = {};
118
+ arg.name = toks[i].value;
119
+ i++;
120
+ let typ = [];
121
+ let lvl = 0;
122
+ while (i < toks.length) {
123
+ if (toks[i].value == "(") {
124
+ lvl++;
125
+ } else if (toks[i].value == ")") {
126
+ lvl--;
127
+ } else if (toks[i].value == ",") {
128
+ if (lvl == 0) {
129
+ break;
130
+ }
131
+ }
132
+ typ.push(toks[i]);
133
+ i++;
134
+ }
135
+ arg.type = parseType(typ);
136
+ i++;
137
+ args.push(arg);
138
+ }
139
+
140
+ // Infer types for arguments without explicit types
141
+ for (i = args.length - 1; i >= 0; i--) {
142
+ if (args[i].type == undefined) {
143
+ args[i].type = args[i + 1].type;
144
+ }
145
+ }
146
+ return args;
147
+ }
148
+
149
+ /**
150
+ * Parse function signature
151
+ * @param {Array<Object>} toks - Tokens containing function signature
152
+ * @returns {Object} Object with args and returns properties
153
+ */
154
+ function parseFuncSig(toks) {
155
+ let lvl = 0;
156
+ let k;
157
+
158
+ for (k = 1; k < toks.length; k++) {
159
+ if (toks[k].value == "(") {
160
+ lvl++;
161
+ } else if (toks[k].value == ")") {
162
+ if (lvl == 0) {
163
+ break;
164
+ }
165
+ lvl--;
166
+ }
167
+ }
168
+
169
+ let args = toks.slice(1, k);
170
+ args = parseRetTypes(args);
171
+
172
+ let rets = toks.slice(k + 1);
173
+ if (rets.length) {
174
+ while (rets[0].value == "(") {
175
+ rets = rets.slice(1, -1);
176
+ }
177
+ rets = parseRetTypes(rets);
178
+ } else {
179
+ rets = [];
180
+ }
181
+
182
+ return { args: args, returns: rets };
183
+ }
184
+
185
+ // These need to be injected from the expression parser to avoid circular dependencies
186
+ let parseExpr = null;
187
+
188
+ /**
189
+ * Set the expression parser function (to avoid circular dependencies)
190
+ * @param {Function} exprParser - Expression parser function
191
+ */
192
+ function setExpressionParser(exprParser) {
193
+ parseExpr = exprParser;
194
+ }
195
+
196
+ module.exports = {
197
+ parseType,
198
+ parseRetTypes,
199
+ parseArgs,
200
+ parseFuncSig,
201
+ setExpressionParser
202
+ };
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Parser utility functions
3
+ */
4
+
5
+ /**
6
+ * Get token at index or empty object
7
+ * @param {Array<Object>} tokens - Token array
8
+ * @param {number} i - Token index
9
+ * @returns {Object} Token object or empty object
10
+ */
11
+ function getToken(tokens, i) {
12
+ return tokens[i] || {};
13
+ }
14
+
15
+ /**
16
+ * Check if tokens array contains a specific value
17
+ * @param {Array<Object>} toks - Array of tokens
18
+ * @param {string} val - Value to search for
19
+ * @returns {boolean} True if value is found
20
+ */
21
+ function tokensHasValue(toks, val) {
22
+ for (var i = 0; i < toks.length; i++) {
23
+ if (toks[i].value == val) {
24
+ return true;
25
+ }
26
+ }
27
+ return false;
28
+ }
29
+
30
+ /**
31
+ * Extract tokens until a matching closing delimiter
32
+ * @param {Array<Object>} toks - Token array
33
+ * @param {number} i - Starting index
34
+ * @param {string} l - Opening delimiter
35
+ * @param {string} r - Closing delimiter
36
+ * @returns {Array} [endIndex, extractedTokens]
37
+ */
38
+ function tillNestEndImpl(toks, i, l, r) {
39
+ let tk = [];
40
+ let lvl = 0;
41
+
42
+ while (true) {
43
+ if (i >= toks.length) {
44
+ return [i, tk];
45
+ }
46
+ if (toks[i].value == l) {
47
+ lvl++;
48
+ } else if (toks[i].value == r) {
49
+ if (lvl == 0) {
50
+ return [i, tk];
51
+ }
52
+ lvl--;
53
+ }
54
+ tk.push(toks[i]);
55
+ i++;
56
+ }
57
+ return [i, tk];
58
+ }
59
+
60
+ /**
61
+ * Split tokens by delimiter(s)
62
+ * @param {Array<Object>} toks - Tokens to split
63
+ * @param {string} delim - Primary delimiter
64
+ * @param {string} [delim2] - Optional secondary delimiter
65
+ * @returns {Array<Array<Object>>} Array of token groups
66
+ */
67
+ function splitTokensBy(toks, delim, delim2) {
68
+ let groups = [];
69
+ let gp = [];
70
+ let lvl = 0;
71
+ for (let i = 0; i < toks.length; i++) {
72
+ if (toks[i].value == "{" || toks[i].value == "(" || toks[i].value == "[") {
73
+ lvl++;
74
+ gp.push(toks[i]);
75
+ } else if (toks[i].value == "}" || toks[i].value == ")" || toks[i].value == "]") {
76
+ lvl--;
77
+ gp.push(toks[i]);
78
+ } else if (toks[i].value == delim && lvl == 0) {
79
+ groups.push(gp);
80
+ gp = [];
81
+ } else if (delim2 != undefined && toks[i].value == delim2 && lvl == 0) {
82
+ groups.push(gp);
83
+ gp = [];
84
+ } else {
85
+ gp.push(toks[i]);
86
+ }
87
+ }
88
+ if (groups.length || gp.length) {
89
+ groups.push(gp);
90
+ }
91
+ return groups;
92
+ }
93
+
94
+ module.exports = {
95
+ getToken,
96
+ tokensHasValue,
97
+ tillNestEndImpl,
98
+ splitTokensBy
99
+ };
@@ -0,0 +1,55 @@
1
+ /**
2
+ * @fileoverview Go analytics tracking analyzer - main entry point
3
+ * @module analyze/go
4
+ */
5
+
6
+ const fs = require('fs');
7
+ const { extractGoAST } = require('./goAstParser');
8
+ const { buildTypeContext } = require('./typeContext');
9
+ const { deduplicateEvents } = require('./eventDeduplicator');
10
+ const { extractEventsFromBody } = require('./astTraversal');
11
+
12
+ /**
13
+ * Analyze a Go file and extract tracking events
14
+ * @param {string} filePath - Path to the Go file to analyze
15
+ * @param {string|null} customFunction - Name of custom tracking function to detect (optional)
16
+ * @returns {Promise<Array>} Array of tracking events found in the file
17
+ * @throws {Error} If the file cannot be read or parsed
18
+ */
19
+ async function analyzeGoFile(filePath, customFunction) {
20
+ try {
21
+ // Read the Go file
22
+ const source = fs.readFileSync(filePath, 'utf8');
23
+
24
+ // Parse the Go file using goAstParser
25
+ const ast = extractGoAST(source);
26
+
27
+ // First pass: build type information for functions and variables
28
+ const typeContext = buildTypeContext(ast);
29
+
30
+ // Extract tracking events from the AST
31
+ const events = [];
32
+ let currentFunction = 'global';
33
+
34
+ // Walk through the AST
35
+ for (const node of ast) {
36
+ if (node.tag === 'func') {
37
+ currentFunction = node.name;
38
+ // Process the function body
39
+ if (node.body) {
40
+ extractEventsFromBody(node.body, events, filePath, currentFunction, customFunction, typeContext, currentFunction);
41
+ }
42
+ }
43
+ }
44
+
45
+ // Deduplicate events based on eventName, source, and function
46
+ const uniqueEvents = deduplicateEvents(events);
47
+
48
+ return uniqueEvents;
49
+ } catch (error) {
50
+ console.error(`Error analyzing Go file ${filePath}:`, error.message);
51
+ return [];
52
+ }
53
+ }
54
+
55
+ module.exports = { analyzeGoFile };