@rog0x/mcp-file-tools 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts ADDED
@@ -0,0 +1,230 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { Server } from "@modelcontextprotocol/sdk/server/index.js";
4
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
5
+ import {
6
+ CallToolRequestSchema,
7
+ ListToolsRequestSchema,
8
+ } from "@modelcontextprotocol/sdk/types.js";
9
+ import { dirTree } from "./tools/dir-tree.js";
10
+ import { fileStats } from "./tools/file-stats.js";
11
+ import { duplicateFinder } from "./tools/duplicate-finder.js";
12
+ import { codeCounter } from "./tools/code-counter.js";
13
+ import { fileSearch } from "./tools/file-search.js";
14
+
15
+ const server = new Server(
16
+ {
17
+ name: "mcp-file-tools",
18
+ version: "1.0.0",
19
+ },
20
+ {
21
+ capabilities: {
22
+ tools: {},
23
+ },
24
+ }
25
+ );
26
+
27
+ // List available tools
28
+ server.setRequestHandler(ListToolsRequestSchema, async () => ({
29
+ tools: [
30
+ {
31
+ name: "dir_tree",
32
+ description:
33
+ "Generate a visual directory tree structure (like the `tree` command). Shows files and folders with configurable depth and ignore patterns.",
34
+ inputSchema: {
35
+ type: "object" as const,
36
+ properties: {
37
+ dir_path: { type: "string", description: "Absolute path to the directory" },
38
+ max_depth: {
39
+ type: "number",
40
+ description: "Maximum depth to traverse (default: 5, 0 = unlimited)",
41
+ },
42
+ ignore_patterns: {
43
+ type: "array",
44
+ items: { type: "string" },
45
+ description:
46
+ "Patterns to ignore (default: node_modules, .git, dist, __pycache__, .next, .cache, coverage)",
47
+ },
48
+ },
49
+ required: ["dir_path"],
50
+ },
51
+ },
52
+ {
53
+ name: "file_stats",
54
+ description:
55
+ "Analyze a directory: total files, total size, file count by extension, largest files, newest/oldest files.",
56
+ inputSchema: {
57
+ type: "object" as const,
58
+ properties: {
59
+ dir_path: { type: "string", description: "Absolute path to the directory" },
60
+ max_depth: {
61
+ type: "number",
62
+ description: "Maximum depth to traverse (default: 10, 0 = unlimited)",
63
+ },
64
+ },
65
+ required: ["dir_path"],
66
+ },
67
+ },
68
+ {
69
+ name: "duplicate_finder",
70
+ description:
71
+ "Find duplicate files by comparing size and MD5 hash. Returns groups of duplicates with paths and wasted space.",
72
+ inputSchema: {
73
+ type: "object" as const,
74
+ properties: {
75
+ dir_path: { type: "string", description: "Absolute path to the directory" },
76
+ min_size: {
77
+ type: "number",
78
+ description: "Minimum file size in bytes to consider (default: 1)",
79
+ },
80
+ max_depth: {
81
+ type: "number",
82
+ description: "Maximum depth to traverse (default: 10)",
83
+ },
84
+ },
85
+ required: ["dir_path"],
86
+ },
87
+ },
88
+ {
89
+ name: "code_counter",
90
+ description:
91
+ "Count lines of code by programming language: total lines, code lines, comment lines, blank lines (like cloc/scc).",
92
+ inputSchema: {
93
+ type: "object" as const,
94
+ properties: {
95
+ dir_path: { type: "string", description: "Absolute path to the directory" },
96
+ max_depth: {
97
+ type: "number",
98
+ description: "Maximum depth to traverse (default: 10)",
99
+ },
100
+ },
101
+ required: ["dir_path"],
102
+ },
103
+ },
104
+ {
105
+ name: "file_search",
106
+ description:
107
+ "Search for files by name pattern (glob), content (regex), size range, and/or date range.",
108
+ inputSchema: {
109
+ type: "object" as const,
110
+ properties: {
111
+ dir_path: { type: "string", description: "Absolute path to the directory to search" },
112
+ name_pattern: {
113
+ type: "string",
114
+ description: 'Glob pattern for file names (e.g. "**/*.ts", "*.json")',
115
+ },
116
+ content_pattern: {
117
+ type: "string",
118
+ description: "Regex pattern to search within file contents",
119
+ },
120
+ min_size: {
121
+ type: "number",
122
+ description: "Minimum file size in bytes",
123
+ },
124
+ max_size: {
125
+ type: "number",
126
+ description: "Maximum file size in bytes",
127
+ },
128
+ modified_after: {
129
+ type: "string",
130
+ description: "ISO date string - only files modified after this date",
131
+ },
132
+ modified_before: {
133
+ type: "string",
134
+ description: "ISO date string - only files modified before this date",
135
+ },
136
+ max_results: {
137
+ type: "number",
138
+ description: "Maximum number of results to return (default: 100)",
139
+ },
140
+ },
141
+ required: ["dir_path"],
142
+ },
143
+ },
144
+ ],
145
+ }));
146
+
147
+ // Handle tool calls
148
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
149
+ const { name, arguments: args } = request.params;
150
+
151
+ try {
152
+ switch (name) {
153
+ case "dir_tree": {
154
+ const result = await dirTree({
155
+ dirPath: args?.dir_path as string,
156
+ maxDepth: args?.max_depth as number | undefined,
157
+ ignorePatterns: args?.ignore_patterns as string[] | undefined,
158
+ });
159
+ return {
160
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
161
+ };
162
+ }
163
+
164
+ case "file_stats": {
165
+ const result = await fileStats({
166
+ dirPath: args?.dir_path as string,
167
+ maxDepth: args?.max_depth as number | undefined,
168
+ });
169
+ return {
170
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
171
+ };
172
+ }
173
+
174
+ case "duplicate_finder": {
175
+ const result = await duplicateFinder({
176
+ dirPath: args?.dir_path as string,
177
+ minSize: args?.min_size as number | undefined,
178
+ maxDepth: args?.max_depth as number | undefined,
179
+ });
180
+ return {
181
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
182
+ };
183
+ }
184
+
185
+ case "code_counter": {
186
+ const result = await codeCounter({
187
+ dirPath: args?.dir_path as string,
188
+ maxDepth: args?.max_depth as number | undefined,
189
+ });
190
+ return {
191
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
192
+ };
193
+ }
194
+
195
+ case "file_search": {
196
+ const result = await fileSearch({
197
+ dirPath: args?.dir_path as string,
198
+ namePattern: args?.name_pattern as string | undefined,
199
+ contentPattern: args?.content_pattern as string | undefined,
200
+ minSize: args?.min_size as number | undefined,
201
+ maxSize: args?.max_size as number | undefined,
202
+ modifiedAfter: args?.modified_after as string | undefined,
203
+ modifiedBefore: args?.modified_before as string | undefined,
204
+ maxResults: args?.max_results as number | undefined,
205
+ });
206
+ return {
207
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
208
+ };
209
+ }
210
+
211
+ default:
212
+ throw new Error(`Unknown tool: ${name}`);
213
+ }
214
+ } catch (error: unknown) {
215
+ const message = error instanceof Error ? error.message : String(error);
216
+ return {
217
+ content: [{ type: "text", text: `Error: ${message}` }],
218
+ isError: true,
219
+ };
220
+ }
221
+ });
222
+
223
+ // Start server
224
+ async function main() {
225
+ const transport = new StdioServerTransport();
226
+ await server.connect(transport);
227
+ console.error("MCP File Tools server running on stdio");
228
+ }
229
+
230
+ main().catch(console.error);
@@ -0,0 +1,208 @@
1
+ import * as fs from "fs";
2
+ import * as path from "path";
3
+
4
+ interface CodeCounterOptions {
5
+ dirPath: string;
6
+ maxDepth?: number;
7
+ }
8
+
9
+ interface LanguageStats {
10
+ files: number;
11
+ totalLines: number;
12
+ codeLines: number;
13
+ commentLines: number;
14
+ blankLines: number;
15
+ }
16
+
17
+ interface CodeCounterResult {
18
+ directory: string;
19
+ totalFiles: number;
20
+ totalLines: number;
21
+ totalCodeLines: number;
22
+ totalCommentLines: number;
23
+ totalBlankLines: number;
24
+ languages: Record<string, LanguageStats>;
25
+ }
26
+
27
+ interface LangDef {
28
+ name: string;
29
+ extensions: string[];
30
+ singleLineComment: string[];
31
+ blockCommentStart?: string;
32
+ blockCommentEnd?: string;
33
+ }
34
+
35
+ const LANGUAGES: LangDef[] = [
36
+ { name: "TypeScript", extensions: [".ts", ".tsx"], singleLineComment: ["//"], blockCommentStart: "/*", blockCommentEnd: "*/" },
37
+ { name: "JavaScript", extensions: [".js", ".jsx", ".mjs", ".cjs"], singleLineComment: ["//"], blockCommentStart: "/*", blockCommentEnd: "*/" },
38
+ { name: "Python", extensions: [".py", ".pyw"], singleLineComment: ["#"], blockCommentStart: '"""', blockCommentEnd: '"""' },
39
+ { name: "Java", extensions: [".java"], singleLineComment: ["//"], blockCommentStart: "/*", blockCommentEnd: "*/" },
40
+ { name: "C/C++", extensions: [".c", ".cpp", ".cc", ".h", ".hpp"], singleLineComment: ["//"], blockCommentStart: "/*", blockCommentEnd: "*/" },
41
+ { name: "C#", extensions: [".cs"], singleLineComment: ["//"], blockCommentStart: "/*", blockCommentEnd: "*/" },
42
+ { name: "Go", extensions: [".go"], singleLineComment: ["//"], blockCommentStart: "/*", blockCommentEnd: "*/" },
43
+ { name: "Rust", extensions: [".rs"], singleLineComment: ["//"], blockCommentStart: "/*", blockCommentEnd: "*/" },
44
+ { name: "Ruby", extensions: [".rb"], singleLineComment: ["#"], blockCommentStart: "=begin", blockCommentEnd: "=end" },
45
+ { name: "PHP", extensions: [".php"], singleLineComment: ["//", "#"], blockCommentStart: "/*", blockCommentEnd: "*/" },
46
+ { name: "Swift", extensions: [".swift"], singleLineComment: ["//"], blockCommentStart: "/*", blockCommentEnd: "*/" },
47
+ { name: "Kotlin", extensions: [".kt", ".kts"], singleLineComment: ["//"], blockCommentStart: "/*", blockCommentEnd: "*/" },
48
+ { name: "HTML", extensions: [".html", ".htm"], singleLineComment: [], blockCommentStart: "<!--", blockCommentEnd: "-->" },
49
+ { name: "CSS", extensions: [".css", ".scss", ".sass", ".less"], singleLineComment: ["//"], blockCommentStart: "/*", blockCommentEnd: "*/" },
50
+ { name: "JSON", extensions: [".json"], singleLineComment: [], },
51
+ { name: "YAML", extensions: [".yml", ".yaml"], singleLineComment: ["#"] },
52
+ { name: "Markdown", extensions: [".md", ".mdx"], singleLineComment: [] },
53
+ { name: "Shell", extensions: [".sh", ".bash", ".zsh"], singleLineComment: ["#"] },
54
+ { name: "SQL", extensions: [".sql"], singleLineComment: ["--"], blockCommentStart: "/*", blockCommentEnd: "*/" },
55
+ { name: "Lua", extensions: [".lua"], singleLineComment: ["--"], blockCommentStart: "--[[", blockCommentEnd: "]]" },
56
+ ];
57
+
58
+ const EXT_TO_LANG: Record<string, LangDef> = {};
59
+ for (const lang of LANGUAGES) {
60
+ for (const ext of lang.extensions) {
61
+ EXT_TO_LANG[ext] = lang;
62
+ }
63
+ }
64
+
65
+ function countLines(
66
+ filePath: string,
67
+ lang: LangDef
68
+ ): { totalLines: number; codeLines: number; commentLines: number; blankLines: number } {
69
+ let content: string;
70
+ try {
71
+ content = fs.readFileSync(filePath, "utf-8");
72
+ } catch {
73
+ return { totalLines: 0, codeLines: 0, commentLines: 0, blankLines: 0 };
74
+ }
75
+
76
+ const lines = content.split(/\r?\n/);
77
+ let blankLines = 0;
78
+ let commentLines = 0;
79
+ let codeLines = 0;
80
+ let inBlockComment = false;
81
+
82
+ for (const line of lines) {
83
+ const trimmed = line.trim();
84
+
85
+ if (trimmed === "") {
86
+ blankLines++;
87
+ continue;
88
+ }
89
+
90
+ if (inBlockComment) {
91
+ commentLines++;
92
+ if (lang.blockCommentEnd && trimmed.includes(lang.blockCommentEnd)) {
93
+ inBlockComment = false;
94
+ }
95
+ continue;
96
+ }
97
+
98
+ if (lang.blockCommentStart && trimmed.startsWith(lang.blockCommentStart)) {
99
+ commentLines++;
100
+ if (!lang.blockCommentEnd || !trimmed.includes(lang.blockCommentEnd, lang.blockCommentStart.length)) {
101
+ inBlockComment = true;
102
+ }
103
+ continue;
104
+ }
105
+
106
+ const isSingleComment = lang.singleLineComment.some((prefix) => trimmed.startsWith(prefix));
107
+ if (isSingleComment) {
108
+ commentLines++;
109
+ } else {
110
+ codeLines++;
111
+ }
112
+ }
113
+
114
+ return {
115
+ totalLines: lines.length,
116
+ codeLines,
117
+ commentLines,
118
+ blankLines,
119
+ };
120
+ }
121
+
122
+ function collectFiles(
123
+ dirPath: string,
124
+ depth: number,
125
+ maxDepth: number,
126
+ files: string[]
127
+ ): void {
128
+ if (maxDepth > 0 && depth >= maxDepth) return;
129
+
130
+ let entries: fs.Dirent[];
131
+ try {
132
+ entries = fs.readdirSync(dirPath, { withFileTypes: true });
133
+ } catch {
134
+ return;
135
+ }
136
+
137
+ for (const entry of entries) {
138
+ const fullPath = path.join(dirPath, entry.name);
139
+
140
+ if (entry.isDirectory()) {
141
+ if (["node_modules", ".git", "dist", "__pycache__", ".next", "coverage", "build"].includes(entry.name)) continue;
142
+ collectFiles(fullPath, depth + 1, maxDepth, files);
143
+ } else if (entry.isFile()) {
144
+ const ext = path.extname(entry.name).toLowerCase();
145
+ if (EXT_TO_LANG[ext]) {
146
+ files.push(fullPath);
147
+ }
148
+ }
149
+ }
150
+ }
151
+
152
+ export async function codeCounter(options: CodeCounterOptions): Promise<CodeCounterResult> {
153
+ const { dirPath, maxDepth = 10 } = options;
154
+ const resolvedPath = path.resolve(dirPath);
155
+
156
+ if (!fs.existsSync(resolvedPath)) {
157
+ throw new Error(`Directory not found: ${resolvedPath}`);
158
+ }
159
+
160
+ const files: string[] = [];
161
+ collectFiles(resolvedPath, 0, maxDepth, files);
162
+
163
+ const langStats: Record<string, LanguageStats> = {};
164
+ let totalLines = 0;
165
+ let totalCodeLines = 0;
166
+ let totalCommentLines = 0;
167
+ let totalBlankLines = 0;
168
+
169
+ for (const filePath of files) {
170
+ const ext = path.extname(filePath).toLowerCase();
171
+ const lang = EXT_TO_LANG[ext];
172
+ if (!lang) continue;
173
+
174
+ const counts = countLines(filePath, lang);
175
+
176
+ if (!langStats[lang.name]) {
177
+ langStats[lang.name] = { files: 0, totalLines: 0, codeLines: 0, commentLines: 0, blankLines: 0 };
178
+ }
179
+
180
+ langStats[lang.name].files++;
181
+ langStats[lang.name].totalLines += counts.totalLines;
182
+ langStats[lang.name].codeLines += counts.codeLines;
183
+ langStats[lang.name].commentLines += counts.commentLines;
184
+ langStats[lang.name].blankLines += counts.blankLines;
185
+
186
+ totalLines += counts.totalLines;
187
+ totalCodeLines += counts.codeLines;
188
+ totalCommentLines += counts.commentLines;
189
+ totalBlankLines += counts.blankLines;
190
+ }
191
+
192
+ // Sort languages by code lines descending
193
+ const sortedLangs: Record<string, LanguageStats> = {};
194
+ const sorted = Object.entries(langStats).sort((a, b) => b[1].codeLines - a[1].codeLines);
195
+ for (const [name, stats] of sorted) {
196
+ sortedLangs[name] = stats;
197
+ }
198
+
199
+ return {
200
+ directory: resolvedPath,
201
+ totalFiles: files.length,
202
+ totalLines,
203
+ totalCodeLines,
204
+ totalCommentLines,
205
+ totalBlankLines,
206
+ languages: sortedLangs,
207
+ };
208
+ }
@@ -0,0 +1,108 @@
1
+ import * as fs from "fs";
2
+ import * as path from "path";
3
+
4
+ interface DirTreeOptions {
5
+ dirPath: string;
6
+ maxDepth?: number;
7
+ ignorePatterns?: string[];
8
+ }
9
+
10
+ interface TreeResult {
11
+ tree: string;
12
+ totalDirs: number;
13
+ totalFiles: number;
14
+ }
15
+
16
+ const DEFAULT_IGNORE = ["node_modules", ".git", "dist", "__pycache__", ".next", ".cache", "coverage"];
17
+
18
+ function shouldIgnore(name: string, ignorePatterns: string[]): boolean {
19
+ return ignorePatterns.some((pattern) => {
20
+ if (pattern.includes("*")) {
21
+ const regex = new RegExp("^" + pattern.replace(/\*/g, ".*") + "$");
22
+ return regex.test(name);
23
+ }
24
+ return name === pattern;
25
+ });
26
+ }
27
+
28
+ function buildTree(
29
+ dirPath: string,
30
+ prefix: string,
31
+ depth: number,
32
+ maxDepth: number,
33
+ ignorePatterns: string[],
34
+ stats: { dirs: number; files: number }
35
+ ): string[] {
36
+ if (maxDepth > 0 && depth >= maxDepth) {
37
+ return [];
38
+ }
39
+
40
+ let entries: fs.Dirent[];
41
+ try {
42
+ entries = fs.readdirSync(dirPath, { withFileTypes: true });
43
+ } catch {
44
+ return [`${prefix}[permission denied]`];
45
+ }
46
+
47
+ entries = entries.filter((e) => !shouldIgnore(e.name, ignorePatterns));
48
+ entries.sort((a, b) => {
49
+ if (a.isDirectory() && !b.isDirectory()) return -1;
50
+ if (!a.isDirectory() && b.isDirectory()) return 1;
51
+ return a.name.localeCompare(b.name);
52
+ });
53
+
54
+ const lines: string[] = [];
55
+
56
+ for (let i = 0; i < entries.length; i++) {
57
+ const entry = entries[i];
58
+ const isLast = i === entries.length - 1;
59
+ const connector = isLast ? "\u2514\u2500\u2500 " : "\u251C\u2500\u2500 ";
60
+ const childPrefix = isLast ? " " : "\u2502 ";
61
+
62
+ if (entry.isDirectory()) {
63
+ stats.dirs++;
64
+ lines.push(`${prefix}${connector}${entry.name}/`);
65
+ const childLines = buildTree(
66
+ path.join(dirPath, entry.name),
67
+ prefix + childPrefix,
68
+ depth + 1,
69
+ maxDepth,
70
+ ignorePatterns,
71
+ stats
72
+ );
73
+ lines.push(...childLines);
74
+ } else {
75
+ stats.files++;
76
+ lines.push(`${prefix}${connector}${entry.name}`);
77
+ }
78
+ }
79
+
80
+ return lines;
81
+ }
82
+
83
+ export async function dirTree(options: DirTreeOptions): Promise<TreeResult> {
84
+ const { dirPath, maxDepth = 5, ignorePatterns = DEFAULT_IGNORE } = options;
85
+
86
+ const resolvedPath = path.resolve(dirPath);
87
+
88
+ if (!fs.existsSync(resolvedPath)) {
89
+ throw new Error(`Directory not found: ${resolvedPath}`);
90
+ }
91
+
92
+ const stat = fs.statSync(resolvedPath);
93
+ if (!stat.isDirectory()) {
94
+ throw new Error(`Not a directory: ${resolvedPath}`);
95
+ }
96
+
97
+ const stats = { dirs: 0, files: 0 };
98
+ const rootName = path.basename(resolvedPath);
99
+ const lines = [`${rootName}/`];
100
+ const childLines = buildTree(resolvedPath, "", 0, maxDepth, ignorePatterns, stats);
101
+ lines.push(...childLines);
102
+
103
+ return {
104
+ tree: lines.join("\n"),
105
+ totalDirs: stats.dirs,
106
+ totalFiles: stats.files,
107
+ };
108
+ }
@@ -0,0 +1,145 @@
1
+ import * as fs from "fs";
2
+ import * as path from "path";
3
+ import * as crypto from "crypto";
4
+
5
+ interface DuplicateFinderOptions {
6
+ dirPath: string;
7
+ minSize?: number;
8
+ maxDepth?: number;
9
+ }
10
+
11
+ interface DuplicateGroup {
12
+ hash: string;
13
+ size: string;
14
+ sizeBytes: number;
15
+ files: string[];
16
+ }
17
+
18
+ interface DuplicateResult {
19
+ directory: string;
20
+ totalFilesScanned: number;
21
+ duplicateGroups: number;
22
+ totalDuplicateFiles: number;
23
+ wastedSpace: string;
24
+ wastedSpaceBytes: number;
25
+ groups: DuplicateGroup[];
26
+ }
27
+
28
+ function formatSize(bytes: number): string {
29
+ if (bytes === 0) return "0 B";
30
+ const units = ["B", "KB", "MB", "GB", "TB"];
31
+ const i = Math.floor(Math.log(bytes) / Math.log(1024));
32
+ return (bytes / Math.pow(1024, i)).toFixed(2) + " " + units[i];
33
+ }
34
+
35
+ function collectFiles(
36
+ dirPath: string,
37
+ depth: number,
38
+ maxDepth: number,
39
+ files: { path: string; size: number }[]
40
+ ): void {
41
+ if (maxDepth > 0 && depth >= maxDepth) return;
42
+
43
+ let entries: fs.Dirent[];
44
+ try {
45
+ entries = fs.readdirSync(dirPath, { withFileTypes: true });
46
+ } catch {
47
+ return;
48
+ }
49
+
50
+ for (const entry of entries) {
51
+ const fullPath = path.join(dirPath, entry.name);
52
+
53
+ if (entry.isDirectory()) {
54
+ if (["node_modules", ".git", "dist", "__pycache__"].includes(entry.name)) continue;
55
+ collectFiles(fullPath, depth + 1, maxDepth, files);
56
+ } else if (entry.isFile()) {
57
+ try {
58
+ const stat = fs.statSync(fullPath);
59
+ files.push({ path: fullPath, size: stat.size });
60
+ } catch {
61
+ // Skip inaccessible files
62
+ }
63
+ }
64
+ }
65
+ }
66
+
67
+ function hashFile(filePath: string): string {
68
+ const content = fs.readFileSync(filePath);
69
+ return crypto.createHash("md5").update(content).digest("hex");
70
+ }
71
+
72
+ export async function duplicateFinder(options: DuplicateFinderOptions): Promise<DuplicateResult> {
73
+ const { dirPath, minSize = 1, maxDepth = 10 } = options;
74
+ const resolvedPath = path.resolve(dirPath);
75
+
76
+ if (!fs.existsSync(resolvedPath)) {
77
+ throw new Error(`Directory not found: ${resolvedPath}`);
78
+ }
79
+
80
+ const allFiles: { path: string; size: number }[] = [];
81
+ collectFiles(resolvedPath, 0, maxDepth, allFiles);
82
+
83
+ // Filter by minimum size
84
+ const filteredFiles = allFiles.filter((f) => f.size >= minSize);
85
+
86
+ // Group by size first (optimization: only hash files with matching sizes)
87
+ const sizeGroups: Record<number, { path: string; size: number }[]> = {};
88
+ for (const file of filteredFiles) {
89
+ if (!sizeGroups[file.size]) {
90
+ sizeGroups[file.size] = [];
91
+ }
92
+ sizeGroups[file.size].push(file);
93
+ }
94
+
95
+ // Hash only files that share a size
96
+ const hashGroups: Record<string, { path: string; size: number }[]> = {};
97
+ for (const [, group] of Object.entries(sizeGroups)) {
98
+ if (group.length < 2) continue;
99
+
100
+ for (const file of group) {
101
+ try {
102
+ const hash = hashFile(file.path);
103
+ if (!hashGroups[hash]) {
104
+ hashGroups[hash] = [];
105
+ }
106
+ hashGroups[hash].push(file);
107
+ } catch {
108
+ // Skip files that can't be read
109
+ }
110
+ }
111
+ }
112
+
113
+ // Build result groups (only actual duplicates)
114
+ const groups: DuplicateGroup[] = [];
115
+ let totalDuplicateFiles = 0;
116
+ let wastedSpaceBytes = 0;
117
+
118
+ for (const [hash, files] of Object.entries(hashGroups)) {
119
+ if (files.length < 2) continue;
120
+
121
+ const sizeBytes = files[0].size;
122
+ groups.push({
123
+ hash,
124
+ size: formatSize(sizeBytes),
125
+ sizeBytes,
126
+ files: files.map((f) => f.path),
127
+ });
128
+
129
+ totalDuplicateFiles += files.length;
130
+ wastedSpaceBytes += sizeBytes * (files.length - 1);
131
+ }
132
+
133
+ // Sort by wasted space descending
134
+ groups.sort((a, b) => b.sizeBytes * (b.files.length - 1) - a.sizeBytes * (a.files.length - 1));
135
+
136
+ return {
137
+ directory: resolvedPath,
138
+ totalFilesScanned: filteredFiles.length,
139
+ duplicateGroups: groups.length,
140
+ totalDuplicateFiles,
141
+ wastedSpace: formatSize(wastedSpaceBytes),
142
+ wastedSpaceBytes,
143
+ groups: groups.slice(0, 50),
144
+ };
145
+ }