npm - @zwa73/dev-utils - Versions diffs - 1.0.87 → 1.0.88 - Mend

@zwa73/dev-utils 1.0.87 → 1.0.88

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/cjs/Command/MapPath.js +34 -25
package/dist/cjs/Command/Release.js +3 -1
package/dist/cjs/Command/ScanDups.js +75 -55
package/dist/mjs/Command/MapPath.js +34 -25
package/dist/mjs/Command/Release.js +3 -1
package/dist/mjs/Command/ScanDups.js +76 -56
package/package.json +1 -1

package/dist/cjs/Command/MapPath.js CHANGED Viewed

@@ -7,8 +7,7 @@ exports.CmdMapPath = void 0;
 const utils_1 = require("@zwa73/utils");
 const fs_1 = __importDefault(require("fs"));
 const pathe_1 = __importDefault(require("pathe"));
-const DupMethodList = ["skip", "overwrite", "move"];
-const DupMethodWithoutMove = DupMethodList.filter(t => t != 'move');
+const DupMethodList = ["skip", "overwrite"];
 /**重命名文件或路径 */
 const CmdMapPath = (program) => program
     .command("Map-Path")
@@ -17,12 +16,14 @@ const CmdMapPath = (program) => program
     .argument("<regex>", "要匹配的正则表达式, posix风格路径")
     .argument("<replacement>", "替换字符串")
     .option("-e, --exclude <regex>", "排除文件的正则表达式")
-    .option(`-d, --duplicate-handling <${DupMethodWithoutMove.join('|')}|[path:string]>`, `处理重名文件的方式:
+    .option(`-d, --duplicate-handling <${DupMethodList.join('|')}|[path:string]>`, `处理重名文件的方式:
 skip 不进行处理
 overwrite 覆盖重名
 其他字符串 将重名部分映射到指定目录下的对应位置, 再次重复将会覆盖`, "skip")
     .option("-r, --recursive", "是否处理子目录", false)
     .option("-m, --move", "重命名而不是复制文件", false)
+    .option("-o, --output <path>", "输出到某个绝对路径而非当前目录", '')
+    .option("-i, --input <path>", "扫描某个绝对路径而非当前目录", '')
     .option("-t, --test", "不对文件进行实际操作, 在控制台输出映射结果", false)
     .action(async (regexStr, replacement, options) => {
     const regex = new RegExp(regexStr);
@@ -30,42 +31,50 @@ overwrite 覆盖重名
     if (!DupMethodList.includes(options.duplicateHandling))
         (0, utils_1.throwError)(`${options.duplicateHandling} 不是有效的 duplicate-handling`);
     const duplicateHandling = options.duplicateHandling;
-    const basePath = process.cwd();
+    const absout = options.output.length > 0;
+    const absin = options.input.length > 0;
+    const searchPath = absin ? options.input : process.cwd();
+    const outPath = absout ? options.output : process.cwd();
     // 遍历当前目录下的所有文件
-    const filePaths = (await utils_1.UtilFT.fileSearchRegex(basePath, regex.source, { relative: options.recursive }))
-        .map((filePath) => pathe_1.default.relative(basePath, filePath))
-        .filter((filePath) => excludeRegex ? (!excludeRegex.test(filePath)) : true);
+    const filePaths = (await utils_1.UtilFT.fileSearchRegex(searchPath, regex, { relative: options.recursive }))
+        .map(fp => pathe_1.default.relative(searchPath, fp))
+        .filter(fp => excludeRegex ? (!excludeRegex.test(fp)) : true);
     //对单个路径映射
     const mapPath = async (source, target) => {
-        const dir = pathe_1.default.parse(target).dir;
-        await utils_1.UtilFT.ensurePathExists(dir, { dir: true });
         if (options.test)
             return utils_1.SLogger.info(`${source} -> ${target}`);
+        const dir = pathe_1.default.parse(target).dir;
+        await utils_1.UtilFT.ensurePathExists(dir, { dir: true });
         if (options.move)
-            await fs_1.default.promises.rename(source, target);
-        else
-            await fs_1.default.promises.copyFile(source, target);
+            return fs_1.default.promises.rename(source, target);
+        return fs_1.default.promises.copyFile(source, target);
     };
-    for (const filePath of filePaths) {
+    await Promise.all(filePaths.map(async (rawfilePath) => {
+        const filePath = pathe_1.default.normalize(rawfilePath);
+        const replacedFilePath = filePath.replace(regex, replacement);
         // 重命名文件
-        const newFilePath = filePath.replace(regex, replacement);
+        const oldFilePath = absin
+            ? pathe_1.default.join(searchPath, filePath)
+            : filePath;
+        const newFilePath = absout
+            ? pathe_1.default.join(outPath, replacedFilePath) //如果是绝对路径输出则拼接绝对路径
+            : replacedFilePath;
         // 如果文件名发生了变化
-        if (newFilePath === filePath)
-            continue;
+        if (newFilePath === oldFilePath)
+            return;
         //如果文件已存在
         if (await utils_1.UtilFT.pathExists(newFilePath)) {
-            if (DupMethodWithoutMove.includes(options.duplicateHandling)) {
-                const fixhd = duplicateHandling;
-                await (0, utils_1.match)(fixhd, {
+            //如果是跳过或覆盖
+            if (DupMethodList.includes(options.duplicateHandling)) {
+                return (0, utils_1.match)(duplicateHandling, {
                     'skip': () => utils_1.SLogger.info(`重名文件存在，跳过：${newFilePath}`),
-                    'overwrite': () => mapPath(filePath, newFilePath),
+                    'overwrite': () => mapPath(oldFilePath, newFilePath),
                 });
             }
-            else
-                await mapPath(filePath, pathe_1.default.join(duplicateHandling, newFilePath));
+            //如果是转移位置
+            return mapPath(oldFilePath, pathe_1.default.join(outPath, duplicateHandling, replacedFilePath));
         }
-        else
-            await mapPath(filePath, newFilePath);
-    }
+        return mapPath(oldFilePath, newFilePath);
+    }));
 });
 exports.CmdMapPath = CmdMapPath;

package/dist/cjs/Command/Release.js CHANGED Viewed

@@ -34,6 +34,8 @@ function checkVersion(oldVersion, newVersion) {
 async function updateVersion(newVersion) {
     const packagePath = pathe_1.default.join(RouteInterface_1.PROCESS_PATH, "package.json");
     const packageData = await (0, utils_1.memoize)(utils_1.UtilFT.loadJSONFile)(packagePath);
+    if (newVersion == 'current')
+        return packageData.version;
     if (newVersion) {
         checkVersion(packageData.version, newVersion);
         packageData.version = newVersion;
@@ -59,7 +61,7 @@ const CmdRelease = (program) => program
     .command("Release")
     .alias("release")
     .description("更新版本号并发布包")
-    .option("-v, --version <version>", "指定发布的版本号 格式应为 `${number}.${number}.${number}`")
+    .option("-v, --version <version>", "指定发布的版本号, 为 `current` 时不更新版本号, 格式应为 `${number}.${number}.${number}`")
     .option("-a, --access <access>", "npm publish 的 access 参数 默认 public", "public")
     .option("-l, --local <path>", "仅打包到本地对印目录下 如./build/", undefined)
     .action(async (opt) => {

package/dist/cjs/Command/ScanDups.js CHANGED Viewed

@@ -5,97 +5,117 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.CmdScanDups = void 0;
 const utils_1 = require("@zwa73/utils");
-const fs_1 = __importDefault(require("fs"));
-const crypto_1 = __importDefault(require("crypto"));
 const pathe_1 = __importDefault(require("pathe"));
 const cli_progress_1 = __importDefault(require("cli-progress"));
+const fs_1 = __importDefault(require("fs"));
 const { tap } = utils_1.UtilFP;
-async function calculateHash(filePath) {
-    return new Promise((resolve, reject) => {
-        const hash = crypto_1.default.createHash('md5');
-        const stream = fs_1.default.createReadStream(filePath);
-        stream.on('data', (data) => hash.update(data));
-        stream.on('end', () => resolve(hash.digest('hex')));
-        stream.on('error', reject);
-    });
-}
-async function calculateSampledHash(filePath, chunkSize = 1024, // 每块大小（例如 1KB）
-chunkCount = 10) {
-    const stats = await fs_1.default.promises.stat(filePath);
-    const totalSize = stats.size;
-    if (totalSize < chunkSize * chunkCount)
-        return calculateHash(filePath);
-    const positions = Array.from({ length: chunkCount }, (_, i) => Math.floor((totalSize / chunkCount) * i));
-    const hash = crypto_1.default.createHash("md5"); // 或其他哈希算法，如 CRC32/BLAKE3
-    for (const position of positions) {
-        const buffer = await new Promise((resolve, reject) => {
-            // 创建一个文件流，限制读取范围
-            const stream = fs_1.default.createReadStream(filePath, {
-                start: position,
-                end: Math.min(position + chunkSize - 1, totalSize - 1), // 确保不会超出文件末尾
-                highWaterMark: chunkSize, // 高效流式块
-            });
-            const chunks = [];
-            stream.on("data", (chunk) => chunks.push(chunk));
-            stream.on("end", () => resolve(Buffer.concat(chunks))); // 合并块数据
-            stream.on("error", reject);
+/**hashlist转为dupmap */
+const reduce2Dupmap = (list) => list.reduce((acc, cur) => {
+    const files = acc[cur.hash] ?? [];
+    acc[cur.hash] = [...files, cur.filePath]; // 分类采样哈希到对应文件路径
+    return acc;
+}, {});
+/**从dupmap提取重复的filepath */
+const reduce2DupFpList = (map) => Object.entries(map).reduce((acc, [hash, files]) => files.length > 1 ? [...acc, ...files] : acc, // 筛选出重复采样哈希的文件路径
+[]);
+/**扫描文件结构 */
+const scanDirStruct = (0, utils_1.memoize)(async (root) => {
+    const list = await fs_1.default.promises.readdir(root, { withFileTypes: true });
+    const stack = await Promise.all(list.map(async (fp) => fp.isDirectory()
+        ? [fp.name, await scanDirStruct(pathe_1.default.join(root, fp.name))]
+        : [fp.name, pathe_1.default.join(root, fp.name)]));
+    return stack.reduce((acc, cur) => ({
+        ...acc, [cur[0]]: cur[1],
+    }), {});
+});
+const hashQueue = new utils_1.PromiseQueue({ concurrency: 8 });
+/**计算文件结构的hash */
+async function struct2hash(struct, fn) {
+    const recursion = (struct) => {
+        return Object.entries(struct).map(async ([k, v]) => {
+            if (typeof v === "string")
+                return [await hashQueue.enqueue(async () => fn(v))];
+            return (await Promise.all(recursion(v))).flat();
         });
-        hash.update(buffer); // 更新哈希计算
-    }
-    return hash.digest("hex"); // 返回最终哈希值
+    };
+    return utils_1.UtilFunc.calcHash((await Promise.all(recursion(struct)))
+        .flat().join('|'), { algorithm: "blake2b512" });
 }
 /**重命名文件或路径 scan_duplicates */
 const CmdScanDups = (program) => program
     .command("Scan-Dups")
     .alias("scandups")
     .description("扫描当前目录下hash重复的文件")
-    .option("-re, --regex <regex>", "文件的正则表达式, 使用posix路径", ".*")
+    .option("-i, --include <regex>", "文件的正则表达式, 使用posix路径", ".*")
     .option("-o, --out <dir|console>", "输出的json文件路径, 默认 scandups.json, 为 \"console\" 时无文件输出", "scandups")
     .option("-r, --recursive", "是否处理子目录, 默认 true", true)
+    .option("-s, --struct", `结构模式
+只扫描 -i 参数目录下的单层目录, 并计算目录是否出现相同的层次结构
+默认 false`, true)
+    .option("-d, --dir <dirs...>", `扫描的根目录, 以空格分隔 -d "a/b c" "d e/f", 默认为命令行当前目录`, [])
     .action(async (options) => {
-    const regex = new RegExp(options.regex);
+    const { out: outtype, recursive, include, struct, dir } = options;
+    const regex = new RegExp(include);
     const basePath = process.cwd();
-    // **添加一个多步进度条**
+    const pathList = dir.length <= 0
+        ? [process.cwd()]
+        : dir.map(d => pathe_1.default.isAbsolute(d) ? d : pathe_1.default.join(process.cwd(), d));
+    // 添加一个多步进度条
     const progressBar = new cli_progress_1.default.MultiBar({
         clearOnComplete: true, hideCursor: true,
         format: " {task} [{bar}] {percentage}% | ETA: {eta}s | {value}/{total} | {status}",
     }, cli_progress_1.default.Presets.shades_classic);
+    // 文件结构进度条
+    const structProgress = struct ? progressBar.create(1, 0, { task: "结构扫描", status: "准备中..." }) : undefined;
     // 采样哈希进度条
     const sampledProgress = progressBar.create(1, 0, { task: "快速扫描", status: "准备中..." });
     // 完整哈希进度条
     const fullHashProgress = progressBar.create(1, 0, { task: "完整扫描", status: "准备中..." });
-    const reduce2Dupmap = (list) => list.reduce((acc, cur) => {
-        const files = acc[cur.hash] ?? [];
-        acc[cur.hash] = [...files, cur.filePath]; // 分类采样哈希到对应文件路径
-        return acc;
-    }, {});
     await (0, utils_1.pipe)(
     // 第一步：文件搜索，获取符合正则的文件路径
-    utils_1.UtilFT.fileSearchRegex(basePath, regex.source, { recursive: options.recursive }), tap(list => void sampledProgress.setTotal(list.length) ??
-        void sampledProgress.update(0, { status: `总计 ${list.length} 个文件` })),
+    Promise.all(pathList.map(async (p) => {
+        if (struct)
+            return (await fs_1.default.promises.readdir(p, { withFileTypes: true }))
+                .filter(fp => fp.isDirectory())
+                .map(fp => pathe_1.default.join(p, fp.name));
+        return await utils_1.UtilFT.fileSearchRegex(p, regex.source, { recursive });
+    })), stacklist => stacklist.flat(), // 扁平化文件路径列表
+    // 如果是结构模式则先筛选相同结构
+    // 扁平化文件路径列表
+    list => struct ? (0, utils_1.pipe)(list, tap(dupFpList => (structProgress.setTotal(dupFpList.length),
+        structProgress.update(0, { status: `结构扫描检出 ${dupFpList.length} 个可能的相等项` }))), list => utils_1.Stream.from(list, 8)
+        .map(async (filePath) => ({
+        filePath,
+        hash: await struct2hash(await scanDirStruct(filePath), cpath => pathe_1.default.relative(filePath, cpath)).then(tap(() => structProgress.increment()))
+    }))
+        .toArray(), reduce2Dupmap, reduce2DupFpList) : list,
     // 第二步：快速扫描，计算采样哈希
-    list => utils_1.Stream.from(list, 8)
+    tap(list => (sampledProgress.setTotal(list.length),
+        sampledProgress.update(0, { status: `总计 ${list.length} 个文件` }))), list => utils_1.Stream.from(list, 8)
         .map(async (filePath) => ({
         filePath,
-        hash: await calculateSampledHash(filePath).then(tap(() => sampledProgress.increment())),
+        hash: struct
+            ? await struct2hash(await scanDirStruct(filePath), (str) => utils_1.UtilFT.calculateHash(str, { sampled: true })).then(tap(() => sampledProgress.increment()))
+            : await utils_1.UtilFT.calculateHash(filePath, { sampled: true }).then(tap(() => sampledProgress.increment())),
     }))
         .toArray(),
-    // 第三步：筛选重复的采样哈希（去掉唯一的采样哈希）
-    reduce2Dupmap, map => Object.entries(map).reduce((acc, [hash, files]) => files.length > 1 ? [...acc, ...files] : acc, // 筛选出重复采样哈希的文件路径
-    []), tap(dupPaths => void fullHashProgress.setTotal(dupPaths.length) ??
-        void fullHashProgress.update(0, { status: `快速扫描检出 ${dupPaths.length} 个可能的相等项` })),
+    // 第三步：筛选重复的采样哈希 (去掉唯一的采样哈希)
+    reduce2Dupmap, reduce2DupFpList,
     // 第四步：对筛选出的重复文件路径并发计算完整哈希
-    dups => utils_1.Stream.from(dups, 8)
+    tap(dupFpList => (fullHashProgress.setTotal(dupFpList.length),
+        fullHashProgress.update(0, { status: `快速扫描检出 ${dupFpList.length} 个可能的相等项` }))), dupFpList => utils_1.Stream.from(dupFpList, 8)
         .map(async (filePath) => ({
         filePath,
-        hash: await calculateHash(filePath).then(tap(() => fullHashProgress.increment())), // 计算完整哈希
+        hash: struct
+            ? await struct2hash(await scanDirStruct(filePath), (str) => utils_1.UtilFT.calculateHash(str)).then(tap(() => sampledProgress.increment()))
+            : await utils_1.UtilFT.calculateHash(filePath).then(tap(() => fullHashProgress.increment())), // 计算完整哈希
     }))
         .toArray(),
-    // 第五步：重新整理完整哈希结果，过滤唯一哈希
+    // 第五步：重新整理完整哈希结果, 过滤唯一哈希
     reduce2Dupmap, map => Object.entries(map).reduce((acc, [hash, files]) => files.length <= 1 ? acc : { ...acc, [hash]: files }, {}),
     // 第六步：输出结果
-    tap(() => progressBar.stop()), out => (0, utils_1.match)(options.out, {
+    tap(() => progressBar.stop()), out => (0, utils_1.match)(outtype, {
         "console": () => utils_1.SLogger.info(out),
-    }, () => utils_1.UtilFT.writeJSONFile(pathe_1.default.join(basePath, options.out), out)));
+    }, () => utils_1.UtilFT.writeJSONFile(pathe_1.default.join(basePath, outtype), out)));
 });
 exports.CmdScanDups = CmdScanDups;

package/dist/mjs/Command/MapPath.js CHANGED Viewed

@@ -1,8 +1,7 @@
 import { SLogger, UtilFT, match, throwError } from "@zwa73/utils";
 import fs from "fs";
 import path from "pathe";
-const DupMethodList = ["skip", "overwrite", "move"];
-const DupMethodWithoutMove = DupMethodList.filter(t => t != 'move');
+const DupMethodList = ["skip", "overwrite"];
 /**重命名文件或路径 */
 export const CmdMapPath = (program) => program
     .command("Map-Path")
@@ -11,12 +10,14 @@ export const CmdMapPath = (program) => program
     .argument("<regex>", "要匹配的正则表达式, posix风格路径")
     .argument("<replacement>", "替换字符串")
     .option("-e, --exclude <regex>", "排除文件的正则表达式")
-    .option(`-d, --duplicate-handling <${DupMethodWithoutMove.join('|')}|[path:string]>`, `处理重名文件的方式:
+    .option(`-d, --duplicate-handling <${DupMethodList.join('|')}|[path:string]>`, `处理重名文件的方式:
 skip 不进行处理
 overwrite 覆盖重名
 其他字符串 将重名部分映射到指定目录下的对应位置, 再次重复将会覆盖`, "skip")
     .option("-r, --recursive", "是否处理子目录", false)
     .option("-m, --move", "重命名而不是复制文件", false)
+    .option("-o, --output <path>", "输出到某个绝对路径而非当前目录", '')
+    .option("-i, --input <path>", "扫描某个绝对路径而非当前目录", '')
     .option("-t, --test", "不对文件进行实际操作, 在控制台输出映射结果", false)
     .action(async (regexStr, replacement, options) => {
     const regex = new RegExp(regexStr);
@@ -24,41 +25,49 @@ overwrite 覆盖重名
     if (!DupMethodList.includes(options.duplicateHandling))
         throwError(`${options.duplicateHandling} 不是有效的 duplicate-handling`);
     const duplicateHandling = options.duplicateHandling;
-    const basePath = process.cwd();
+    const absout = options.output.length > 0;
+    const absin = options.input.length > 0;
+    const searchPath = absin ? options.input : process.cwd();
+    const outPath = absout ? options.output : process.cwd();
     // 遍历当前目录下的所有文件
-    const filePaths = (await UtilFT.fileSearchRegex(basePath, regex.source, { relative: options.recursive }))
-        .map((filePath) => path.relative(basePath, filePath))
-        .filter((filePath) => excludeRegex ? (!excludeRegex.test(filePath)) : true);
+    const filePaths = (await UtilFT.fileSearchRegex(searchPath, regex, { relative: options.recursive }))
+        .map(fp => path.relative(searchPath, fp))
+        .filter(fp => excludeRegex ? (!excludeRegex.test(fp)) : true);
     //对单个路径映射
     const mapPath = async (source, target) => {
-        const dir = path.parse(target).dir;
-        await UtilFT.ensurePathExists(dir, { dir: true });
         if (options.test)
             return SLogger.info(`${source} -> ${target}`);
+        const dir = path.parse(target).dir;
+        await UtilFT.ensurePathExists(dir, { dir: true });
         if (options.move)
-            await fs.promises.rename(source, target);
-        else
-            await fs.promises.copyFile(source, target);
+            return fs.promises.rename(source, target);
+        return fs.promises.copyFile(source, target);
     };
-    for (const filePath of filePaths) {
+    await Promise.all(filePaths.map(async (rawfilePath) => {
+        const filePath = path.normalize(rawfilePath);
+        const replacedFilePath = filePath.replace(regex, replacement);
         // 重命名文件
-        const newFilePath = filePath.replace(regex, replacement);
+        const oldFilePath = absin
+            ? path.join(searchPath, filePath)
+            : filePath;
+        const newFilePath = absout
+            ? path.join(outPath, replacedFilePath) //如果是绝对路径输出则拼接绝对路径
+            : replacedFilePath;
         // 如果文件名发生了变化
-        if (newFilePath === filePath)
-            continue;
+        if (newFilePath === oldFilePath)
+            return;
         //如果文件已存在
         if (await UtilFT.pathExists(newFilePath)) {
-            if (DupMethodWithoutMove.includes(options.duplicateHandling)) {
-                const fixhd = duplicateHandling;
-                await match(fixhd, {
+            //如果是跳过或覆盖
+            if (DupMethodList.includes(options.duplicateHandling)) {
+                return match(duplicateHandling, {
                     'skip': () => SLogger.info(`重名文件存在，跳过：${newFilePath}`),
-                    'overwrite': () => mapPath(filePath, newFilePath),
+                    'overwrite': () => mapPath(oldFilePath, newFilePath),
                 });
             }
-            else
-                await mapPath(filePath, path.join(duplicateHandling, newFilePath));
+            //如果是转移位置
+            return mapPath(oldFilePath, path.join(outPath, duplicateHandling, replacedFilePath));
         }
-        else
-            await mapPath(filePath, newFilePath);
-    }
+        return mapPath(oldFilePath, newFilePath);
+    }));
 });

package/dist/mjs/Command/Release.js CHANGED Viewed

@@ -28,6 +28,8 @@ function checkVersion(oldVersion, newVersion) {
 async function updateVersion(newVersion) {
     const packagePath = path.join(PROCESS_PATH, "package.json");
     const packageData = await memoize(UtilFT.loadJSONFile)(packagePath);
+    if (newVersion == 'current')
+        return packageData.version;
     if (newVersion) {
         checkVersion(packageData.version, newVersion);
         packageData.version = newVersion;
@@ -53,7 +55,7 @@ export const CmdRelease = (program) => program
     .command("Release")
     .alias("release")
     .description("更新版本号并发布包")
-    .option("-v, --version <version>", "指定发布的版本号 格式应为 `${number}.${number}.${number}`")
+    .option("-v, --version <version>", "指定发布的版本号, 为 `current` 时不更新版本号, 格式应为 `${number}.${number}.${number}`")
     .option("-a, --access <access>", "npm publish 的 access 参数 默认 public", "public")
     .option("-l, --local <path>", "仅打包到本地对印目录下 如./build/", undefined)
     .action(async (opt) => {

package/dist/mjs/Command/ScanDups.js CHANGED Viewed

@@ -1,94 +1,114 @@
-import { match, pipe, SLogger, Stream, UtilFP, UtilFT } from "@zwa73/utils";
-import fs from "fs";
-import crypto from 'crypto';
+import { match, memoize, pipe, PromiseQueue, SLogger, Stream, UtilFP, UtilFT, UtilFunc } from "@zwa73/utils";
 import path from "pathe";
 import cliProgress from "cli-progress";
+import fs from 'fs';
 const { tap } = UtilFP;
-async function calculateHash(filePath) {
-    return new Promise((resolve, reject) => {
-        const hash = crypto.createHash('md5');
-        const stream = fs.createReadStream(filePath);
-        stream.on('data', (data) => hash.update(data));
-        stream.on('end', () => resolve(hash.digest('hex')));
-        stream.on('error', reject);
-    });
-}
-async function calculateSampledHash(filePath, chunkSize = 1024, // 每块大小（例如 1KB）
-chunkCount = 10) {
-    const stats = await fs.promises.stat(filePath);
-    const totalSize = stats.size;
-    if (totalSize < chunkSize * chunkCount)
-        return calculateHash(filePath);
-    const positions = Array.from({ length: chunkCount }, (_, i) => Math.floor((totalSize / chunkCount) * i));
-    const hash = crypto.createHash("md5"); // 或其他哈希算法，如 CRC32/BLAKE3
-    for (const position of positions) {
-        const buffer = await new Promise((resolve, reject) => {
-            // 创建一个文件流，限制读取范围
-            const stream = fs.createReadStream(filePath, {
-                start: position,
-                end: Math.min(position + chunkSize - 1, totalSize - 1), // 确保不会超出文件末尾
-                highWaterMark: chunkSize, // 高效流式块
-            });
-            const chunks = [];
-            stream.on("data", (chunk) => chunks.push(chunk));
-            stream.on("end", () => resolve(Buffer.concat(chunks))); // 合并块数据
-            stream.on("error", reject);
+/**hashlist转为dupmap */
+const reduce2Dupmap = (list) => list.reduce((acc, cur) => {
+    const files = acc[cur.hash] ?? [];
+    acc[cur.hash] = [...files, cur.filePath]; // 分类采样哈希到对应文件路径
+    return acc;
+}, {});
+/**从dupmap提取重复的filepath */
+const reduce2DupFpList = (map) => Object.entries(map).reduce((acc, [hash, files]) => files.length > 1 ? [...acc, ...files] : acc, // 筛选出重复采样哈希的文件路径
+[]);
+/**扫描文件结构 */
+const scanDirStruct = memoize(async (root) => {
+    const list = await fs.promises.readdir(root, { withFileTypes: true });
+    const stack = await Promise.all(list.map(async (fp) => fp.isDirectory()
+        ? [fp.name, await scanDirStruct(path.join(root, fp.name))]
+        : [fp.name, path.join(root, fp.name)]));
+    return stack.reduce((acc, cur) => ({
+        ...acc, [cur[0]]: cur[1],
+    }), {});
+});
+const hashQueue = new PromiseQueue({ concurrency: 8 });
+/**计算文件结构的hash */
+async function struct2hash(struct, fn) {
+    const recursion = (struct) => {
+        return Object.entries(struct).map(async ([k, v]) => {
+            if (typeof v === "string")
+                return [await hashQueue.enqueue(async () => fn(v))];
+            return (await Promise.all(recursion(v))).flat();
         });
-        hash.update(buffer); // 更新哈希计算
-    }
-    return hash.digest("hex"); // 返回最终哈希值
+    };
+    return UtilFunc.calcHash((await Promise.all(recursion(struct)))
+        .flat().join('|'), { algorithm: "blake2b512" });
 }
 /**重命名文件或路径 scan_duplicates */
 export const CmdScanDups = (program) => program
     .command("Scan-Dups")
     .alias("scandups")
     .description("扫描当前目录下hash重复的文件")
-    .option("-re, --regex <regex>", "文件的正则表达式, 使用posix路径", ".*")
+    .option("-i, --include <regex>", "文件的正则表达式, 使用posix路径", ".*")
     .option("-o, --out <dir|console>", "输出的json文件路径, 默认 scandups.json, 为 \"console\" 时无文件输出", "scandups")
     .option("-r, --recursive", "是否处理子目录, 默认 true", true)
+    .option("-s, --struct", `结构模式
+只扫描 -i 参数目录下的单层目录, 并计算目录是否出现相同的层次结构
+默认 false`, true)
+    .option("-d, --dir <dirs...>", `扫描的根目录, 以空格分隔 -d "a/b c" "d e/f", 默认为命令行当前目录`, [])
     .action(async (options) => {
-    const regex = new RegExp(options.regex);
+    const { out: outtype, recursive, include, struct, dir } = options;
+    const regex = new RegExp(include);
     const basePath = process.cwd();
-    // **添加一个多步进度条**
+    const pathList = dir.length <= 0
+        ? [process.cwd()]
+        : dir.map(d => path.isAbsolute(d) ? d : path.join(process.cwd(), d));
+    // 添加一个多步进度条
     const progressBar = new cliProgress.MultiBar({
         clearOnComplete: true, hideCursor: true,
         format: " {task} [{bar}] {percentage}% | ETA: {eta}s | {value}/{total} | {status}",
     }, cliProgress.Presets.shades_classic);
+    // 文件结构进度条
+    const structProgress = struct ? progressBar.create(1, 0, { task: "结构扫描", status: "准备中..." }) : undefined;
     // 采样哈希进度条
     const sampledProgress = progressBar.create(1, 0, { task: "快速扫描", status: "准备中..." });
     // 完整哈希进度条
     const fullHashProgress = progressBar.create(1, 0, { task: "完整扫描", status: "准备中..." });
-    const reduce2Dupmap = (list) => list.reduce((acc, cur) => {
-        const files = acc[cur.hash] ?? [];
-        acc[cur.hash] = [...files, cur.filePath]; // 分类采样哈希到对应文件路径
-        return acc;
-    }, {});
     await pipe(
     // 第一步：文件搜索，获取符合正则的文件路径
-    UtilFT.fileSearchRegex(basePath, regex.source, { recursive: options.recursive }), tap(list => void sampledProgress.setTotal(list.length) ??
-        void sampledProgress.update(0, { status: `总计 ${list.length} 个文件` })),
+    Promise.all(pathList.map(async (p) => {
+        if (struct)
+            return (await fs.promises.readdir(p, { withFileTypes: true }))
+                .filter(fp => fp.isDirectory())
+                .map(fp => path.join(p, fp.name));
+        return await UtilFT.fileSearchRegex(p, regex.source, { recursive });
+    })), stacklist => stacklist.flat(), // 扁平化文件路径列表
+    // 如果是结构模式则先筛选相同结构
+    // 扁平化文件路径列表
+    list => struct ? pipe(list, tap(dupFpList => (structProgress.setTotal(dupFpList.length),
+        structProgress.update(0, { status: `结构扫描检出 ${dupFpList.length} 个可能的相等项` }))), list => Stream.from(list, 8)
+        .map(async (filePath) => ({
+        filePath,
+        hash: await struct2hash(await scanDirStruct(filePath), cpath => path.relative(filePath, cpath)).then(tap(() => structProgress.increment()))
+    }))
+        .toArray(), reduce2Dupmap, reduce2DupFpList) : list,
     // 第二步：快速扫描，计算采样哈希
-    list => Stream.from(list, 8)
+    tap(list => (sampledProgress.setTotal(list.length),
+        sampledProgress.update(0, { status: `总计 ${list.length} 个文件` }))), list => Stream.from(list, 8)
         .map(async (filePath) => ({
         filePath,
-        hash: await calculateSampledHash(filePath).then(tap(() => sampledProgress.increment())),
+        hash: struct
+            ? await struct2hash(await scanDirStruct(filePath), (str) => UtilFT.calculateHash(str, { sampled: true })).then(tap(() => sampledProgress.increment()))
+            : await UtilFT.calculateHash(filePath, { sampled: true }).then(tap(() => sampledProgress.increment())),
     }))
         .toArray(),
-    // 第三步：筛选重复的采样哈希（去掉唯一的采样哈希）
-    reduce2Dupmap, map => Object.entries(map).reduce((acc, [hash, files]) => files.length > 1 ? [...acc, ...files] : acc, // 筛选出重复采样哈希的文件路径
-    []), tap(dupPaths => void fullHashProgress.setTotal(dupPaths.length) ??
-        void fullHashProgress.update(0, { status: `快速扫描检出 ${dupPaths.length} 个可能的相等项` })),
+    // 第三步：筛选重复的采样哈希 (去掉唯一的采样哈希)
+    reduce2Dupmap, reduce2DupFpList,
     // 第四步：对筛选出的重复文件路径并发计算完整哈希
-    dups => Stream.from(dups, 8)
+    tap(dupFpList => (fullHashProgress.setTotal(dupFpList.length),
+        fullHashProgress.update(0, { status: `快速扫描检出 ${dupFpList.length} 个可能的相等项` }))), dupFpList => Stream.from(dupFpList, 8)
         .map(async (filePath) => ({
         filePath,
-        hash: await calculateHash(filePath).then(tap(() => fullHashProgress.increment())), // 计算完整哈希
+        hash: struct
+            ? await struct2hash(await scanDirStruct(filePath), (str) => UtilFT.calculateHash(str)).then(tap(() => sampledProgress.increment()))
+            : await UtilFT.calculateHash(filePath).then(tap(() => fullHashProgress.increment())), // 计算完整哈希
     }))
         .toArray(),
-    // 第五步：重新整理完整哈希结果，过滤唯一哈希
+    // 第五步：重新整理完整哈希结果, 过滤唯一哈希
     reduce2Dupmap, map => Object.entries(map).reduce((acc, [hash, files]) => files.length <= 1 ? acc : { ...acc, [hash]: files }, {}),
     // 第六步：输出结果
-    tap(() => progressBar.stop()), out => match(options.out, {
+    tap(() => progressBar.stop()), out => match(outtype, {
         "console": () => SLogger.info(out),
-    }, () => UtilFT.writeJSONFile(path.join(basePath, options.out), out)));
+    }, () => UtilFT.writeJSONFile(path.join(basePath, outtype), out)));
 });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@zwa73/dev-utils",
-	"version": "1.0.87",
+	"version": "1.0.88",
 	"description": "编译与调试工具",
 	"exports": {
 		".": {