bmad-method 5.0.0-beta.1 → 5.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,45 +1,204 @@
1
1
  const fs = require("fs-extra");
2
2
  const path = require("node:path");
3
3
 
4
+ // Deno/Node compatibility: explicitly import process
5
+ const process = require("node:process");
6
+ const { execFile } = require("node:child_process");
7
+ const { promisify } = require("node:util");
8
+ const execFileAsync = promisify(execFile);
9
+
10
+ // Simple memoization across calls (keyed by realpath of startDir)
11
+ const _cache = new Map();
12
+
13
+ async function _tryRun(cmd, args, cwd, timeoutMs = 500) {
14
+ try {
15
+ const { stdout } = await execFileAsync(cmd, args, {
16
+ cwd,
17
+ timeout: timeoutMs,
18
+ windowsHide: true,
19
+ maxBuffer: 1024 * 1024,
20
+ });
21
+ const out = String(stdout || "").trim();
22
+ return out || null;
23
+ } catch {
24
+ return null;
25
+ }
26
+ }
27
+
28
+ async function _detectVcsTopLevel(startDir) {
29
+ // Run common VCS root queries in parallel; ignore failures
30
+ const gitP = _tryRun("git", ["rev-parse", "--show-toplevel"], startDir);
31
+ const hgP = _tryRun("hg", ["root"], startDir);
32
+ const svnP = (async () => {
33
+ const show = await _tryRun("svn", ["info", "--show-item", "wc-root"], startDir);
34
+ if (show) return show;
35
+ const info = await _tryRun("svn", ["info"], startDir);
36
+ if (info) {
37
+ const line = info.split(/\r?\n/).find((l) => l.toLowerCase().startsWith("working copy root path:"));
38
+ if (line) return line.split(":").slice(1).join(":").trim();
39
+ }
40
+ return null;
41
+ })();
42
+ const [git, hg, svn] = await Promise.all([gitP, hgP, svnP]);
43
+ return git || hg || svn || null;
44
+ }
45
+
4
46
  /**
5
- * Attempt to find the project root by walking up from startDir
6
- * Looks for common project markers like .git, package.json, pyproject.toml, etc.
47
+ * Attempt to find the project root by walking up from startDir.
48
+ * Uses a robust, prioritized set of ecosystem markers (VCS > workspaces/monorepo > lock/build > language config).
49
+ * Also recognizes package.json with "workspaces" as a workspace root.
50
+ * You can augment markers via env PROJECT_ROOT_MARKERS as a comma-separated list of file/dir names.
7
51
  * @param {string} startDir
8
52
  * @returns {Promise<string|null>} project root directory or null if not found
9
53
  */
10
54
  async function findProjectRoot(startDir) {
11
55
  try {
56
+ // Resolve symlinks for robustness (e.g., when invoked from a symlinked path)
12
57
  let dir = path.resolve(startDir);
13
- const root = path.parse(dir).root;
14
- const markers = [
15
- ".git",
16
- "package.json",
17
- "pnpm-workspace.yaml",
18
- "yarn.lock",
19
- "pnpm-lock.yaml",
20
- "pyproject.toml",
21
- "requirements.txt",
22
- "go.mod",
23
- "Cargo.toml",
24
- "composer.json",
25
- ".hg",
26
- ".svn",
27
- ];
58
+ try {
59
+ dir = await fs.realpath(dir);
60
+ } catch {
61
+ // ignore if realpath fails; continue with resolved path
62
+ }
63
+ const startKey = dir; // preserve starting point for caching
64
+ if (_cache.has(startKey)) return _cache.get(startKey);
65
+ const fsRoot = path.parse(dir).root;
66
+
67
+ // Helper to safely check for existence
68
+ const exists = (p) => fs.pathExists(p);
69
+
70
+ // Build checks: an array of { makePath: (dir) => string, weight }
71
+ const checks = [];
72
+
73
+ const add = (rel, weight) => {
74
+ const makePath = (d) => Array.isArray(rel) ? path.join(d, ...rel) : path.join(d, rel);
75
+ checks.push({ makePath, weight });
76
+ };
77
+
78
+ // Highest priority: explicit sentinel markers
79
+ add(".project-root", 110);
80
+ add(".workspace-root", 110);
81
+ add(".repo-root", 110);
82
+
83
+ // Highest priority: VCS roots
84
+ add(".git", 100);
85
+ add(".hg", 95);
86
+ add(".svn", 95);
87
+
88
+ // Monorepo/workspace indicators
89
+ add("pnpm-workspace.yaml", 90);
90
+ add("lerna.json", 90);
91
+ add("turbo.json", 90);
92
+ add("nx.json", 90);
93
+ add("rush.json", 90);
94
+ add("go.work", 90);
95
+ add("WORKSPACE", 90);
96
+ add("WORKSPACE.bazel", 90);
97
+ add("MODULE.bazel", 90);
98
+ add("pants.toml", 90);
99
+
100
+ // Lockfiles and package-manager/top-level locks
101
+ add("yarn.lock", 85);
102
+ add("pnpm-lock.yaml", 85);
103
+ add("package-lock.json", 85);
104
+ add("bun.lockb", 85);
105
+ add("Cargo.lock", 85);
106
+ add("composer.lock", 85);
107
+ add("poetry.lock", 85);
108
+ add("Pipfile.lock", 85);
109
+ add("Gemfile.lock", 85);
110
+
111
+ // Build-system root indicators
112
+ add("settings.gradle", 80);
113
+ add("settings.gradle.kts", 80);
114
+ add("gradlew", 80);
115
+ add("pom.xml", 80);
116
+ add("build.sbt", 80);
117
+ add(["project", "build.properties"], 80);
118
+
119
+ // Language/project config markers
120
+ add("deno.json", 75);
121
+ add("deno.jsonc", 75);
122
+ add("pyproject.toml", 75);
123
+ add("Pipfile", 75);
124
+ add("requirements.txt", 75);
125
+ add("go.mod", 75);
126
+ add("Cargo.toml", 75);
127
+ add("composer.json", 75);
128
+ add("mix.exs", 75);
129
+ add("Gemfile", 75);
130
+ add("CMakeLists.txt", 75);
131
+ add("stack.yaml", 75);
132
+ add("cabal.project", 75);
133
+ add("rebar.config", 75);
134
+ add("pubspec.yaml", 75);
135
+ add("flake.nix", 75);
136
+ add("shell.nix", 75);
137
+ add("default.nix", 75);
138
+ add(".tool-versions", 75);
139
+ add("package.json", 74); // generic Node project (lower than lockfiles/workspaces)
140
+
141
+ // Changesets
142
+ add([".changeset", "config.json"], 70);
143
+ add(".changeset", 70);
144
+
145
+ // Custom markers via env (comma-separated names)
146
+ if (process.env.PROJECT_ROOT_MARKERS) {
147
+ for (const name of process.env.PROJECT_ROOT_MARKERS.split(",").map((s) => s.trim()).filter(Boolean)) {
148
+ add(name, 72);
149
+ }
150
+ }
151
+
152
+ /** Check for package.json with "workspaces" */
153
+ const hasWorkspacePackageJson = async (d) => {
154
+ const pkgPath = path.join(d, "package.json");
155
+ if (!(await exists(pkgPath))) return false;
156
+ try {
157
+ const raw = await fs.readFile(pkgPath, "utf8");
158
+ const pkg = JSON.parse(raw);
159
+ return Boolean(pkg && pkg.workspaces);
160
+ } catch {
161
+ return false;
162
+ }
163
+ };
164
+
165
+ let best = null; // { dir, weight }
166
+
167
+ // Try to detect VCS toplevel once up-front; treat as authoritative slightly above .git marker
168
+ const vcsTop = await _detectVcsTopLevel(dir);
169
+ if (vcsTop) {
170
+ best = { dir: vcsTop, weight: 101 };
171
+ }
28
172
 
29
173
  while (true) {
30
- const exists = await Promise.all(
31
- markers.map((m) => fs.pathExists(path.join(dir, m))),
174
+ // Special check: package.json with "workspaces"
175
+ if (await hasWorkspacePackageJson(dir)) {
176
+ if (!best || 90 >= best.weight) best = { dir, weight: 90 };
177
+ }
178
+
179
+ // Evaluate all other checks in parallel
180
+ const results = await Promise.all(
181
+ checks.map(async (c) => ({ c, ok: await exists(c.makePath(dir)) })),
32
182
  );
33
- if (exists.some(Boolean)) {
34
- return dir;
183
+
184
+ for (const { c, ok } of results) {
185
+ if (!ok) continue;
186
+ if (!best || c.weight >= best.weight) {
187
+ best = { dir, weight: c.weight };
188
+ }
35
189
  }
36
- if (dir === root) break;
190
+
191
+ if (dir === fsRoot) break;
37
192
  dir = path.dirname(dir);
38
193
  }
39
- return null;
194
+
195
+ const out = best ? best.dir : null;
196
+ _cache.set(startKey, out);
197
+ return out;
40
198
  } catch {
41
199
  return null;
42
200
  }
43
201
  }
44
202
 
45
203
  module.exports = { findProjectRoot };
204
+
@@ -0,0 +1,331 @@
1
+ "use strict";
2
+
3
+ const fs = require("node:fs/promises");
4
+ const path = require("node:path");
5
+ const zlib = require("node:zlib");
6
+ const { Buffer } = require("node:buffer");
7
+ const crypto = require("node:crypto");
8
+ const cp = require("node:child_process");
9
+
10
+ const KB = 1024;
11
+ const MB = 1024 * KB;
12
+
13
+ const formatSize = (bytes) => {
14
+ if (bytes < 1024) return `${bytes} B`;
15
+ if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
16
+ if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
17
+ return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
18
+ };
19
+
20
+ const percentile = (sorted, p) => {
21
+ if (sorted.length === 0) return 0;
22
+ const idx = Math.min(sorted.length - 1, Math.max(0, Math.ceil((p / 100) * sorted.length) - 1));
23
+ return sorted[idx];
24
+ };
25
+
26
+ async function processWithLimit(items, fn, concurrency = 64) {
27
+ for (let i = 0; i < items.length; i += concurrency) {
28
+ await Promise.all(items.slice(i, i + concurrency).map(fn));
29
+ }
30
+ }
31
+
32
+ async function enrichAllFiles(textFiles, binaryFiles) {
33
+ /** @type {Array<{ path: string; absolutePath: string; size: number; lines?: number; isBinary: boolean; ext: string; dir: string; depth: number; hidden: boolean; mtimeMs: number; isSymlink: boolean; }>} */
34
+ const allFiles = [];
35
+
36
+ async function enrich(file, isBinary) {
37
+ const ext = (path.extname(file.path) || "").toLowerCase();
38
+ const dir = path.dirname(file.path) || ".";
39
+ const depth = file.path.split(path.sep).filter(Boolean).length;
40
+ const hidden = file.path.split(path.sep).some((seg) => seg.startsWith("."));
41
+ let mtimeMs = 0;
42
+ let isSymlink = false;
43
+ try {
44
+ const lst = await fs.lstat(file.absolutePath);
45
+ mtimeMs = lst.mtimeMs;
46
+ isSymlink = lst.isSymbolicLink();
47
+ } catch (_) { /* ignore lstat errors during enrichment */ }
48
+ allFiles.push({
49
+ path: file.path,
50
+ absolutePath: file.absolutePath,
51
+ size: file.size || 0,
52
+ lines: file.lines,
53
+ isBinary,
54
+ ext,
55
+ dir,
56
+ depth,
57
+ hidden,
58
+ mtimeMs,
59
+ isSymlink,
60
+ });
61
+ }
62
+
63
+ await processWithLimit(textFiles, (f) => enrich(f, false));
64
+ await processWithLimit(binaryFiles, (f) => enrich(f, true));
65
+ return allFiles;
66
+ }
67
+
68
+ function buildHistogram(allFiles) {
69
+ const buckets = [
70
+ [1 * KB, "0–1KB"],
71
+ [10 * KB, "1–10KB"],
72
+ [100 * KB, "10–100KB"],
73
+ [1 * MB, "100KB–1MB"],
74
+ [10 * MB, "1–10MB"],
75
+ [100 * MB, "10–100MB"],
76
+ [Infinity, ">=100MB"],
77
+ ];
78
+ const histogram = buckets.map(([_, label]) => ({ label, count: 0, bytes: 0 }));
79
+ for (const f of allFiles) {
80
+ for (let i = 0; i < buckets.length; i++) {
81
+ if (f.size < buckets[i][0]) {
82
+ histogram[i].count++;
83
+ histogram[i].bytes += f.size;
84
+ break;
85
+ }
86
+ }
87
+ }
88
+ return histogram;
89
+ }
90
+
91
+ function aggregateByExtension(allFiles) {
92
+ const byExtension = new Map();
93
+ for (const f of allFiles) {
94
+ const key = f.ext || "<none>";
95
+ const v = byExtension.get(key) || { ext: key, count: 0, bytes: 0 };
96
+ v.count++;
97
+ v.bytes += f.size;
98
+ byExtension.set(key, v);
99
+ }
100
+ return Array.from(byExtension.values()).sort((a, b) => b.bytes - a.bytes);
101
+ }
102
+
103
+ function aggregateByDirectory(allFiles) {
104
+ const byDirectory = new Map();
105
+ function addDirBytes(dir, bytes) {
106
+ const v = byDirectory.get(dir) || { dir, count: 0, bytes: 0 };
107
+ v.count++;
108
+ v.bytes += bytes;
109
+ byDirectory.set(dir, v);
110
+ }
111
+ for (const f of allFiles) {
112
+ const parts = f.dir === "." ? [] : f.dir.split(path.sep);
113
+ let acc = "";
114
+ for (let i = 0; i < parts.length; i++) {
115
+ acc = i === 0 ? parts[0] : acc + path.sep + parts[i];
116
+ addDirBytes(acc, f.size);
117
+ }
118
+ if (parts.length === 0) addDirBytes(".", f.size);
119
+ }
120
+ return Array.from(byDirectory.values()).sort((a, b) => b.bytes - a.bytes);
121
+ }
122
+
123
+ function computeDepthAndLongest(allFiles) {
124
+ const depthDistribution = new Map();
125
+ for (const f of allFiles) {
126
+ depthDistribution.set(f.depth, (depthDistribution.get(f.depth) || 0) + 1);
127
+ }
128
+ const longestPaths = [...allFiles]
129
+ .sort((a, b) => b.path.length - a.path.length)
130
+ .slice(0, 25)
131
+ .map((f) => ({ path: f.path, length: f.path.length, size: f.size }));
132
+ const depthDist = Array.from(depthDistribution.entries())
133
+ .sort((a, b) => a[0] - b[0])
134
+ .map(([depth, count]) => ({ depth, count }));
135
+ return { depthDist, longestPaths };
136
+ }
137
+
138
+ function computeTemporal(allFiles, nowMs) {
139
+ let oldest = null, newest = null;
140
+ const ageBuckets = [
141
+ { label: "> 1 year", minDays: 365, maxDays: Infinity, count: 0, bytes: 0 },
142
+ { label: "6–12 months", minDays: 180, maxDays: 365, count: 0, bytes: 0 },
143
+ { label: "1–6 months", minDays: 30, maxDays: 180, count: 0, bytes: 0 },
144
+ { label: "7–30 days", minDays: 7, maxDays: 30, count: 0, bytes: 0 },
145
+ { label: "1–7 days", minDays: 1, maxDays: 7, count: 0, bytes: 0 },
146
+ { label: "< 1 day", minDays: 0, maxDays: 1, count: 0, bytes: 0 },
147
+ ];
148
+ for (const f of allFiles) {
149
+ const ageDays = Math.max(0, (nowMs - (f.mtimeMs || nowMs)) / (24 * 60 * 60 * 1000));
150
+ for (const b of ageBuckets) {
151
+ if (ageDays >= b.minDays && ageDays < b.maxDays) {
152
+ b.count++;
153
+ b.bytes += f.size;
154
+ break;
155
+ }
156
+ }
157
+ if (!oldest || f.mtimeMs < oldest.mtimeMs) oldest = f;
158
+ if (!newest || f.mtimeMs > newest.mtimeMs) newest = f;
159
+ }
160
+ return {
161
+ oldest: oldest ? { path: oldest.path, mtime: oldest.mtimeMs ? new Date(oldest.mtimeMs).toISOString() : null } : null,
162
+ newest: newest ? { path: newest.path, mtime: newest.mtimeMs ? new Date(newest.mtimeMs).toISOString() : null } : null,
163
+ ageBuckets,
164
+ };
165
+ }
166
+
167
+ function computeQuality(allFiles, textFiles) {
168
+ const zeroByteFiles = allFiles.filter((f) => f.size === 0).length;
169
+ const emptyTextFiles = textFiles.filter((f) => (f.size || 0) === 0 || (f.lines || 0) === 0).length;
170
+ const hiddenFiles = allFiles.filter((f) => f.hidden).length;
171
+ const symlinks = allFiles.filter((f) => f.isSymlink).length;
172
+ const largeThreshold = 50 * MB;
173
+ const suspiciousThreshold = 100 * MB;
174
+ const largeFilesCount = allFiles.filter((f) => f.size >= largeThreshold).length;
175
+ const suspiciousLargeFilesCount = allFiles.filter((f) => f.size >= suspiciousThreshold).length;
176
+ return {
177
+ zeroByteFiles,
178
+ emptyTextFiles,
179
+ hiddenFiles,
180
+ symlinks,
181
+ largeFilesCount,
182
+ suspiciousLargeFilesCount,
183
+ largeThreshold,
184
+ };
185
+ }
186
+
187
+ function computeDuplicates(allFiles, textFiles) {
188
+ const duplicatesBySize = new Map();
189
+ for (const f of allFiles) {
190
+ const key = String(f.size);
191
+ const arr = duplicatesBySize.get(key) || [];
192
+ arr.push(f);
193
+ duplicatesBySize.set(key, arr);
194
+ }
195
+ const duplicateCandidates = [];
196
+ for (const [sizeKey, arr] of duplicatesBySize.entries()) {
197
+ if (arr.length < 2) continue;
198
+ const textGroup = arr.filter((f) => !f.isBinary);
199
+ const otherGroup = arr.filter((f) => f.isBinary);
200
+ const contentHashGroups = new Map();
201
+ for (const tf of textGroup) {
202
+ try {
203
+ const src = textFiles.find((x) => x.absolutePath === tf.absolutePath);
204
+ const content = src ? src.content : "";
205
+ const h = crypto.createHash("sha1").update(content).digest("hex");
206
+ const g = contentHashGroups.get(h) || [];
207
+ g.push(tf);
208
+ contentHashGroups.set(h, g);
209
+ } catch (_) { /* ignore hashing errors for duplicate detection */ }
210
+ }
211
+ for (const [_h, g] of contentHashGroups.entries()) {
212
+ if (g.length > 1) duplicateCandidates.push({ reason: "same-size+text-hash", size: Number(sizeKey), count: g.length, files: g.map((f) => f.path) });
213
+ }
214
+ if (otherGroup.length > 1) {
215
+ duplicateCandidates.push({ reason: "same-size", size: Number(sizeKey), count: otherGroup.length, files: otherGroup.map((f) => f.path) });
216
+ }
217
+ }
218
+ return duplicateCandidates;
219
+ }
220
+
221
+ function estimateCompressibility(textFiles) {
222
+ let compSampleBytes = 0;
223
+ let compCompressedBytes = 0;
224
+ for (const tf of textFiles) {
225
+ try {
226
+ const sampleLen = Math.min(256 * 1024, tf.size || 0);
227
+ if (sampleLen <= 0) continue;
228
+ const sample = tf.content.slice(0, sampleLen);
229
+ const gz = zlib.gzipSync(Buffer.from(sample, "utf8"));
230
+ compSampleBytes += sampleLen;
231
+ compCompressedBytes += gz.length;
232
+ } catch (_) { /* ignore compression errors during sampling */ }
233
+ }
234
+ return compSampleBytes > 0 ? compCompressedBytes / compSampleBytes : null;
235
+ }
236
+
237
+ function computeGitInfo(allFiles, rootDir, largeThreshold) {
238
+ const info = {
239
+ isRepo: false,
240
+ trackedCount: 0,
241
+ trackedBytes: 0,
242
+ untrackedCount: 0,
243
+ untrackedBytes: 0,
244
+ lfsCandidates: [],
245
+ };
246
+ try {
247
+ if (!rootDir) return info;
248
+ const top = cp.execFileSync("git", ["rev-parse", "--show-toplevel"], { cwd: rootDir, stdio: ["ignore", "pipe", "ignore"] }).toString().trim();
249
+ if (!top) return info;
250
+ info.isRepo = true;
251
+ const out = cp.execFileSync("git", ["ls-files", "-z"], { cwd: rootDir, stdio: ["ignore", "pipe", "ignore"] });
252
+ const tracked = new Set(out.toString().split("\0").filter(Boolean));
253
+ let trackedBytes = 0, trackedCount = 0, untrackedBytes = 0, untrackedCount = 0;
254
+ const lfsCandidates = [];
255
+ for (const f of allFiles) {
256
+ const isTracked = tracked.has(f.path);
257
+ if (isTracked) {
258
+ trackedCount++; trackedBytes += f.size;
259
+ if (f.size >= largeThreshold) lfsCandidates.push({ path: f.path, size: f.size });
260
+ } else {
261
+ untrackedCount++; untrackedBytes += f.size;
262
+ }
263
+ }
264
+ info.trackedCount = trackedCount;
265
+ info.trackedBytes = trackedBytes;
266
+ info.untrackedCount = untrackedCount;
267
+ info.untrackedBytes = untrackedBytes;
268
+ info.lfsCandidates = lfsCandidates.sort((a, b) => b.size - a.size).slice(0, 50);
269
+ } catch (_) { /* git not available or not a repo, ignore */ }
270
+ return info;
271
+ }
272
+
273
+ function computeLargestFiles(allFiles, totalBytes) {
274
+ const toPct = (num, den) => (den === 0 ? 0 : (num / den) * 100);
275
+ return [...allFiles]
276
+ .sort((a, b) => b.size - a.size)
277
+ .slice(0, 50)
278
+ .map((f) => ({
279
+ path: f.path,
280
+ size: f.size,
281
+ sizeFormatted: formatSize(f.size),
282
+ percentOfTotal: toPct(f.size, totalBytes),
283
+ ext: f.ext || "",
284
+ isBinary: f.isBinary,
285
+ mtime: f.mtimeMs ? new Date(f.mtimeMs).toISOString() : null,
286
+ }));
287
+ }
288
+
289
+ function mdTable(rows, headers) {
290
+ const header = `| ${headers.join(" | ")} |`;
291
+ const sep = `| ${headers.map(() => "---").join(" | ")} |`;
292
+ const body = rows.map((r) => `| ${r.join(" | ")} |`).join("\n");
293
+ return `${header}\n${sep}\n${body}`;
294
+ }
295
+
296
+ function buildMarkdownReport(largestFiles, byExtensionArr, byDirectoryArr, totalBytes) {
297
+ const toPct = (num, den) => (den === 0 ? 0 : (num / den) * 100);
298
+ const md = [];
299
+ md.push("\n### Top Largest Files (Top 50)\n");
300
+ md.push(mdTable(
301
+ largestFiles.map((f) => [f.path, f.sizeFormatted, `${f.percentOfTotal.toFixed(2)}%`, f.ext || "", f.isBinary ? "binary" : "text"]),
302
+ ["Path", "Size", "% of total", "Ext", "Type"],
303
+ ));
304
+ md.push("\n\n### Top Extensions by Bytes (Top 20)\n");
305
+ const topExtRows = byExtensionArr.slice(0, 20).map((e) => [e.ext, String(e.count), formatSize(e.bytes), `${toPct(e.bytes, totalBytes).toFixed(2)}%`]);
306
+ md.push(mdTable(topExtRows, ["Ext", "Count", "Bytes", "% of total"]));
307
+ md.push("\n\n### Top Directories by Bytes (Top 20)\n");
308
+ const topDirRows = byDirectoryArr.slice(0, 20).map((d) => [d.dir, String(d.count), formatSize(d.bytes), `${toPct(d.bytes, totalBytes).toFixed(2)}%`]);
309
+ md.push(mdTable(topDirRows, ["Directory", "Files", "Bytes", "% of total"]));
310
+ return md.join("\n");
311
+ }
312
+
313
+ module.exports = {
314
+ KB,
315
+ MB,
316
+ formatSize,
317
+ percentile,
318
+ processWithLimit,
319
+ enrichAllFiles,
320
+ buildHistogram,
321
+ aggregateByExtension,
322
+ aggregateByDirectory,
323
+ computeDepthAndLongest,
324
+ computeTemporal,
325
+ computeQuality,
326
+ computeDuplicates,
327
+ estimateCompressibility,
328
+ computeGitInfo,
329
+ computeLargestFiles,
330
+ buildMarkdownReport,
331
+ };
@@ -1,29 +1,79 @@
1
- function calculateStatistics(aggregatedContent, xmlFileSize) {
2
- const { textFiles, binaryFiles, errors } = aggregatedContent;
3
-
4
- const totalTextSize = textFiles.reduce((sum, file) => sum + file.size, 0);
5
- const totalBinarySize = binaryFiles.reduce((sum, file) => sum + file.size, 0);
6
- const totalSize = totalTextSize + totalBinarySize;
1
+ const H = require("./stats.helpers.js");
7
2
 
8
- const totalLines = textFiles.reduce((sum, file) => sum + file.lines, 0);
3
+ async function calculateStatistics(aggregatedContent, xmlFileSize, rootDir) {
4
+ const { textFiles, binaryFiles, errors } = aggregatedContent;
9
5
 
6
+ const totalLines = textFiles.reduce((sum, f) => sum + (f.lines || 0), 0);
10
7
  const estimatedTokens = Math.ceil(xmlFileSize / 4);
11
8
 
12
- const formatSize = (bytes) => {
13
- if (bytes < 1024) return `${bytes} B`;
14
- if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
15
- return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
16
- };
9
+ // Build enriched file list
10
+ const allFiles = await H.enrichAllFiles(textFiles, binaryFiles);
11
+ const totalBytes = allFiles.reduce((s, f) => s + f.size, 0);
12
+ const sizes = allFiles.map((f) => f.size).sort((a, b) => a - b);
13
+ const avgSize = sizes.length ? totalBytes / sizes.length : 0;
14
+ const medianSize = sizes.length ? H.percentile(sizes, 50) : 0;
15
+ const p90 = H.percentile(sizes, 90);
16
+ const p95 = H.percentile(sizes, 95);
17
+ const p99 = H.percentile(sizes, 99);
18
+
19
+ const histogram = H.buildHistogram(allFiles);
20
+ const byExtensionArr = H.aggregateByExtension(allFiles);
21
+ const byDirectoryArr = H.aggregateByDirectory(allFiles);
22
+ const { depthDist, longestPaths } = H.computeDepthAndLongest(allFiles);
23
+ const temporal = H.computeTemporal(allFiles, Date.now());
24
+ const quality = H.computeQuality(allFiles, textFiles);
25
+ const duplicateCandidates = H.computeDuplicates(allFiles, textFiles);
26
+ const compressibilityRatio = H.estimateCompressibility(textFiles);
27
+ const git = H.computeGitInfo(allFiles, rootDir, quality.largeThreshold);
28
+ const largestFiles = H.computeLargestFiles(allFiles, totalBytes);
29
+ const markdownReport = H.buildMarkdownReport(
30
+ largestFiles,
31
+ byExtensionArr,
32
+ byDirectoryArr,
33
+ totalBytes,
34
+ );
17
35
 
18
36
  return {
37
+ // Back-compat summary
19
38
  totalFiles: textFiles.length + binaryFiles.length,
20
39
  textFiles: textFiles.length,
21
40
  binaryFiles: binaryFiles.length,
22
41
  errorFiles: errors.length,
23
- totalSize: formatSize(totalSize),
24
- xmlSize: formatSize(xmlFileSize),
42
+ totalSize: H.formatSize(totalBytes),
43
+ totalBytes,
44
+ xmlSize: H.formatSize(xmlFileSize),
25
45
  totalLines,
26
46
  estimatedTokens: estimatedTokens.toLocaleString(),
47
+
48
+ // Distributions and percentiles
49
+ avgFileSize: avgSize,
50
+ medianFileSize: medianSize,
51
+ p90,
52
+ p95,
53
+ p99,
54
+ histogram,
55
+
56
+ // Extensions and directories
57
+ byExtension: byExtensionArr,
58
+ byDirectory: byDirectoryArr,
59
+ depthDistribution: depthDist,
60
+ longestPaths,
61
+
62
+ // Temporal
63
+ temporal,
64
+
65
+ // Quality signals
66
+ quality,
67
+
68
+ // Duplicates and compressibility
69
+ duplicateCandidates,
70
+ compressibilityRatio,
71
+
72
+ // Git-aware
73
+ git,
74
+
75
+ largestFiles,
76
+ markdownReport,
27
77
  };
28
78
  }
29
79