flow-tracer 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,302 @@
1
+ /**
2
+ * Summarizer — builds a compact manifest of what each file does.
3
+ *
4
+ * At index time, reads every code file and extracts:
5
+ * - Exported/top-level function/class names (via regex, not AST)
6
+ * - Import list
7
+ * - First doc comment (if any)
8
+ *
9
+ * Produces a manifest string (~50-80 bytes per file) that an LLM can read
10
+ * to understand the entire codebase structure and pick the right files
11
+ * for a given question.
12
+ */
13
+
14
+ import { readFileSync } from "fs";
15
+ import { extname } from "path";
16
+
17
+ /**
18
+ * Extract exported/top-level function and class names from file content.
19
+ * Language-detected by file extension. Returns short strings like:
20
+ * "export function findOrCreateOrder", "createOrder :: CartId -> Handler Order"
21
+ */
22
+ function extractSignatures(content, filePath) {
23
+ const ext = extname(filePath).toLowerCase();
24
+ const sigs = [];
25
+
26
+ switch (ext) {
27
+ case ".ts":
28
+ case ".js":
29
+ case ".tsx":
30
+ case ".jsx": {
31
+ // export function/const/class/type/interface
32
+ for (const m of content.matchAll(/^export\s+(?:async\s+)?(?:function|const|let|class|type|interface|enum)\s+(\w+)/gm)) {
33
+ sigs.push(m[0].slice(0, 80));
34
+ }
35
+ // export default function/class
36
+ for (const m of content.matchAll(/^export\s+default\s+(?:async\s+)?(?:function|class)\s*(\w*)/gm)) {
37
+ sigs.push(m[0].slice(0, 80));
38
+ }
39
+ // HTTP handlers: app.get/post, router.get/post, GET/POST/PATCH (SvelteKit)
40
+ for (const m of content.matchAll(/^export\s+(?:async\s+)?(?:function|const)\s+(GET|POST|PUT|PATCH|DELETE)\b/gm)) {
41
+ sigs.push(m[0].slice(0, 80));
42
+ }
43
+ // Express-style routes
44
+ for (const m of content.matchAll(/(?:app|router)\.(get|post|put|patch|delete)\s*\(\s*['"`]([^'"`]+)/gm)) {
45
+ sigs.push(`${m[1].toUpperCase()} ${m[2]}`);
46
+ }
47
+ break;
48
+ }
49
+
50
+ case ".svelte": {
51
+ // onMount, reactive statements, exported props
52
+ for (const m of content.matchAll(/onMount\s*\(\s*(?:async\s*)?\(\s*\)\s*=>\s*\{/g)) {
53
+ sigs.push("onMount handler");
54
+ }
55
+ for (const m of content.matchAll(/export\s+let\s+(\w+)/gm)) {
56
+ sigs.push(`prop: ${m[1]}`);
57
+ }
58
+ // Key function calls in script block
59
+ for (const m of content.matchAll(/(?:await\s+)?(\w+)\s*\(/gm)) {
60
+ if (m[1].length > 3 && /^[a-z]/.test(m[1]) && !["import", "require", "console", "window", "document", "fetch", "setTimeout", "setInterval", "clearTimeout", "clearInterval", "then", "catch", "finally", "push", "filter", "map", "reduce", "forEach", "find", "some", "every", "slice", "splice", "join", "split", "replace", "match", "test", "includes", "indexOf", "toString", "valueOf", "parse", "stringify", "assign", "keys", "values", "entries"].includes(m[1])) {
61
+ sigs.push(`calls: ${m[1]}`);
62
+ }
63
+ }
64
+ // Deduplicate calls
65
+ break;
66
+ }
67
+
68
+ case ".hs": {
69
+ // Module exports: module Foo (bar, baz) where
70
+ const moduleMatch = content.match(/^module\s+\S+\s*\(([\s\S]*?)\)\s*where/m);
71
+ if (moduleMatch) {
72
+ const exports = moduleMatch[1].replace(/\s+/g, " ").trim();
73
+ if (exports.length < 200) sigs.push(`exports: ${exports}`);
74
+ else sigs.push(`exports: ${exports.slice(0, 200)}...`);
75
+ }
76
+ // Top-level type signatures: functionName :: Type
77
+ for (const m of content.matchAll(/^(\w+)\s*::\s*(.+)$/gm)) {
78
+ const sig = `${m[1]} :: ${m[2].slice(0, 60)}`;
79
+ sigs.push(sig);
80
+ }
81
+ break;
82
+ }
83
+
84
+ case ".py": {
85
+ // def and class
86
+ for (const m of content.matchAll(/^(?:async\s+)?def\s+(\w+)\s*\(/gm)) {
87
+ sigs.push(`def ${m[1]}`);
88
+ }
89
+ for (const m of content.matchAll(/^class\s+(\w+)/gm)) {
90
+ sigs.push(`class ${m[1]}`);
91
+ }
92
+ // Flask/FastAPI routes
93
+ for (const m of content.matchAll(/@(?:app|router)\.(get|post|put|patch|delete)\s*\(\s*['"]([^'"]+)/gm)) {
94
+ sigs.push(`${m[1].toUpperCase()} ${m[2]}`);
95
+ }
96
+ break;
97
+ }
98
+
99
+ case ".go": {
100
+ // func declarations
101
+ for (const m of content.matchAll(/^func\s+(?:\(\w+\s+\*?\w+\)\s+)?(\w+)\s*\(/gm)) {
102
+ sigs.push(`func ${m[1]}`);
103
+ }
104
+ break;
105
+ }
106
+
107
+ case ".rs": {
108
+ // pub fn, pub struct, impl
109
+ for (const m of content.matchAll(/^pub\s+(?:async\s+)?fn\s+(\w+)/gm)) {
110
+ sigs.push(`pub fn ${m[1]}`);
111
+ }
112
+ for (const m of content.matchAll(/^pub\s+struct\s+(\w+)/gm)) {
113
+ sigs.push(`pub struct ${m[1]}`);
114
+ }
115
+ break;
116
+ }
117
+
118
+ case ".java":
119
+ case ".kt": {
120
+ // public/private methods and classes
121
+ for (const m of content.matchAll(/(?:public|private|protected)\s+(?:static\s+)?(?:class|interface|fun|void|\w+)\s+(\w+)/gm)) {
122
+ sigs.push(m[0].slice(0, 80));
123
+ }
124
+ break;
125
+ }
126
+
127
+ case ".rb": {
128
+ for (const m of content.matchAll(/^\s*def\s+(\w+)/gm)) sigs.push(`def ${m[1]}`);
129
+ for (const m of content.matchAll(/^\s*class\s+(\w+)/gm)) sigs.push(`class ${m[1]}`);
130
+ break;
131
+ }
132
+
133
+ case ".php": {
134
+ for (const m of content.matchAll(/(?:public|private|protected)\s+function\s+(\w+)/gm)) {
135
+ sigs.push(`function ${m[1]}`);
136
+ }
137
+ break;
138
+ }
139
+ }
140
+
141
+ // Deduplicate
142
+ return [...new Set(sigs)].slice(0, 20); // Cap at 20 signatures per file
143
+ }
144
+
145
+ /**
146
+ * Extract the first doc comment from a file (first 150 chars).
147
+ */
148
+ function extractFirstComment(content) {
149
+ // JS/TS/Java block comment: /** ... */ or /* ... */
150
+ const blockMatch = content.match(/\/\*\*?([\s\S]*?)\*\//);
151
+ if (blockMatch) {
152
+ const text = blockMatch[1].replace(/^\s*\*\s?/gm, "").trim();
153
+ return text.slice(0, 150);
154
+ }
155
+
156
+ // Haskell: {- ... -} or -- | ... lines
157
+ const hsBlock = content.match(/\{-([\s\S]*?)-\}/);
158
+ if (hsBlock) return hsBlock[1].trim().slice(0, 150);
159
+
160
+ // Python: """...""" or '''...'''
161
+ const pyMatch = content.match(/^(?:"""([\s\S]*?)"""|'''([\s\S]*?)''')/m);
162
+ if (pyMatch) return (pyMatch[1] || pyMatch[2]).trim().slice(0, 150);
163
+
164
+ // Leading // or # comments
165
+ const lines = content.split("\n");
166
+ const commentLines = [];
167
+ for (const line of lines) {
168
+ const trimmed = line.trim();
169
+ if (trimmed.startsWith("//") || trimmed.startsWith("#") || trimmed.startsWith("--")) {
170
+ commentLines.push(trimmed.replace(/^(?:\/\/|#|--)\s?/, ""));
171
+ } else if (trimmed && !trimmed.startsWith("import") && !trimmed.startsWith("module")) {
172
+ break;
173
+ }
174
+ }
175
+ if (commentLines.length > 0) return commentLines.join(" ").slice(0, 150);
176
+
177
+ return "";
178
+ }
179
+
180
+ /**
181
+ * Extract import module names from content (short form, not full paths).
182
+ * Returns compact import list like: ["OrderProcessing", "Cart.Main", "Platform.Order"]
183
+ */
184
+ function extractImportNames(content, filePath) {
185
+ const ext = extname(filePath).toLowerCase();
186
+ const imports = [];
187
+
188
+ if ([".ts", ".js", ".tsx", ".jsx", ".svelte"].includes(ext)) {
189
+ // from './foo/bar' → bar
190
+ // from '$lib/server/vayu' → vayu
191
+ for (const m of content.matchAll(/from\s+['"]([^'"]+)['"]/gm)) {
192
+ const imp = m[1];
193
+ // Get last meaningful segment
194
+ const segments = imp.split("/").filter(s => !s.startsWith(".") && !s.startsWith("$"));
195
+ const last = segments[segments.length - 1] || imp.split("/").pop();
196
+ if (last && last.length > 1 && !last.startsWith("@")) imports.push(last);
197
+ }
198
+ }
199
+
200
+ if (ext === ".hs") {
201
+ for (const m of content.matchAll(/^import\s+(?:qualified\s+)?(\S+)/gm)) {
202
+ // Shorten: Vayu.Services.Internal.Order.Main → Order.Main
203
+ const parts = m[1].split(".");
204
+ imports.push(parts.slice(-2).join("."));
205
+ }
206
+ }
207
+
208
+ if (ext === ".py") {
209
+ for (const m of content.matchAll(/^(?:from|import)\s+([\w.]+)/gm)) {
210
+ const parts = m[1].split(".");
211
+ imports.push(parts.slice(-2).join("."));
212
+ }
213
+ }
214
+
215
+ if (ext === ".go") {
216
+ for (const m of content.matchAll(/import\s+(?:\w+\s+)?"([^"]+)"/gm)) {
217
+ imports.push(m[1].split("/").pop());
218
+ }
219
+ }
220
+
221
+ return [...new Set(imports)].slice(0, 15);
222
+ }
223
+
224
+ /**
225
+ * Build a compact manifest of all files across all repos.
226
+ * This is what the LLM reads to decide which files to select for a question.
227
+ */
228
+ /**
229
+ * Build a compact manifest. Target: under 120KB so it fits in one Claude call.
230
+ *
231
+ * Format per file (single line, very compact):
232
+ * [repo] path/to/file.ts | sigs: fn1, fn2 | imports: mod1, mod2
233
+ */
234
+ /**
235
+ * Check if a file is likely to contain meaningful business logic
236
+ * (not just utilities, config, types, or boilerplate).
237
+ */
238
+ function isLikelyRelevant(filePath, sigCount) {
239
+ // Keep any file with at least 1 exported function/signature
240
+ if (sigCount >= 1) return true;
241
+
242
+ // Drop files with zero signatures (config, data, pure type defs)
243
+ return false;
244
+ }
245
+
246
+ export function buildManifest(indexedRepos) {
247
+ const entries = [];
248
+
249
+ for (const repoIndex of indexedRepos) {
250
+ const repoName = repoIndex.repo.name;
251
+
252
+ for (const fileInfo of repoIndex.files) {
253
+ try {
254
+ const content = readFileSync(fileInfo.fullPath, "utf-8");
255
+ if (content.trim().length === 0) continue;
256
+
257
+ const sigs = extractSignatures(content, fileInfo.file).slice(0, 5);
258
+ const imports = extractImportNames(content, fileInfo.file).slice(0, 6);
259
+
260
+ // Pre-filter: skip files with no exported functions
261
+ if (!isLikelyRelevant(fileInfo.file, sigs.length)) continue;
262
+
263
+ let line = `[${repoName}] ${fileInfo.file}`;
264
+ if (sigs.length > 0) line += ` | ${sigs.join(", ")}`;
265
+ if (imports.length > 0) line += ` | imports: ${imports.join(", ")}`;
266
+
267
+ // Hard cap line length to 140 chars
268
+ if (line.length > 140) line = line.slice(0, 137) + "...";
269
+
270
+ entries.push(line);
271
+ } catch {
272
+ // Skip unreadable files entirely (no point listing path-only entries)
273
+ }
274
+ }
275
+ }
276
+
277
+ let manifest = entries.join("\n");
278
+ console.log(`[manifest] ${entries.length} files, ${(manifest.length / 1024).toFixed(0)}KB`);
279
+
280
+ // If still too large, progressively compress
281
+ if (manifest.length > 120_000) {
282
+ // First: truncate long lines
283
+ manifest = manifest.split("\n").map(l => l.length > 120 ? l.slice(0, 117) + "..." : l).join("\n");
284
+ console.log(`[manifest] Truncated lines → ${(manifest.length / 1024).toFixed(0)}KB`);
285
+ }
286
+
287
+ if (manifest.length > 120_000) {
288
+ // Last resort: hard cap
289
+ const lines = manifest.split("\n");
290
+ let total = 0;
291
+ const kept = [];
292
+ for (const line of lines) {
293
+ if (total + line.length + 1 > 120_000) break;
294
+ kept.push(line);
295
+ total += line.length + 1;
296
+ }
297
+ manifest = kept.join("\n");
298
+ console.log(`[manifest] Hard capped: ${kept.length}/${lines.length} files → ${(manifest.length / 1024).toFixed(0)}KB`);
299
+ }
300
+
301
+ return manifest;
302
+ }