@1-/scan 0.1.9 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +106 -128
- package/_.js +22 -22
- package/const.js +2 -0
- package/dbInit.js +45 -0
- package/package.json +9 -8
- package/rm.js +6 -8
- package/scan.js +30 -5
- package/stat.js +6 -0
- package/upsert.js +41 -9
- package/load.js +0 -13
- package/save.js +0 -18
package/README.md
CHANGED
|
@@ -3,197 +3,175 @@
|
|
|
3
3
|
---
|
|
4
4
|
|
|
5
5
|
<a id="en"></a>
|
|
6
|
-
#
|
|
6
|
+
# scan : Efficient file system scanning and change detection
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
- [scan : Efficient file system scanning and change detection](#scan-efficient-file-system-scanning-and-change-detection)
|
|
9
|
+
- [Functionality](#functionality)
|
|
10
|
+
- [Usage demonstration](#usage-demonstration)
|
|
11
|
+
- [Design rationale](#design-rationale)
|
|
12
|
+
- [Technology stack](#technology-stack)
|
|
13
|
+
- [Code structure](#code-structure)
|
|
14
|
+
- [Historical context](#historical-context)
|
|
15
|
+
- [About](#about)
|
|
9
16
|
|
|
10
|
-
##
|
|
17
|
+
## Functionality
|
|
11
18
|
|
|
12
|
-
|
|
13
|
-
- **Key Length Optimization**: Stores raw bytes for paths up to 16 bytes. Converts longer paths into 16-byte MD5 hashes to optimize database index space and query performance.
|
|
14
|
-
- **Memory Optimization**: Uses BinMap and BinSet to store binary keys in memory, avoiding string decoding overhead and reducing memory footprint.
|
|
15
|
-
- **Transactional Integrity**: Performs metadata updates and deletions in database transactions to ensure consistency.
|
|
16
|
-
- **Auto Configuration**: Integrates @1-/sqlite to initialize database schema and manage database connections automatically, updating .gitignore when new database is detected.
|
|
19
|
+
This utility scans directories to detect file changes by comparing current file metadata against cached records. It tracks file size, modification time, and MD5 hashes to identify additions, modifications, and deletions efficiently.
|
|
17
20
|
|
|
18
|
-
|
|
21
|
+
The system uses binary data structures for memory efficiency and supports concurrent scanning with automatic parallelism adjustment based on available CPU cores.
|
|
19
22
|
|
|
20
|
-
|
|
23
|
+
## Usage demonstration
|
|
21
24
|
|
|
22
|
-
|
|
23
|
-
import scan from "@1-/scan";
|
|
24
|
-
|
|
25
|
-
const dir = "./data";
|
|
26
|
-
const db_path = "./scan_record.db";
|
|
27
|
-
const files = ["file1.txt", "file2.txt"];
|
|
28
|
-
|
|
29
|
-
// Scan file list, sync metadata to SQLite, return changed relative paths and upsert function
|
|
30
|
-
const [updated_paths, upsert] = await scan(dir, db_path, files);
|
|
31
|
-
|
|
32
|
-
// Close database automatically when exiting scope
|
|
33
|
-
using _ = upsert;
|
|
25
|
+
Install as a dependency:
|
|
34
26
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
// Update scanned file metadata in database
|
|
38
|
-
for (const rel_path of updated_paths) {
|
|
39
|
-
await upsert(rel_path);
|
|
40
|
-
}
|
|
27
|
+
```bash
|
|
28
|
+
npm install @1-/scan
|
|
41
29
|
```
|
|
42
30
|
|
|
43
|
-
|
|
31
|
+
Basic usage:
|
|
44
32
|
|
|
45
33
|
```javascript
|
|
46
|
-
import
|
|
47
|
-
import sqlite from "@1-/sqlite";
|
|
34
|
+
import scan from '@1-/scan';
|
|
48
35
|
|
|
49
|
-
|
|
36
|
+
// Scan directory and get update list
|
|
37
|
+
const [updateFiles, upsert] = await scan('/path/to/dir', '/path/to/db', ['file1.js', 'file2.json']);
|
|
50
38
|
|
|
51
|
-
|
|
52
|
-
save(db, [["file.txt", new Uint8Array([1, 2, 3]), 123, 1620000000]], [new Uint8Array([4, 5, 6])]);
|
|
39
|
+
console.log('Files that need updating:', updateFiles);
|
|
53
40
|
|
|
54
|
-
|
|
41
|
+
// Save the updated metadata to database
|
|
42
|
+
await upsert();
|
|
55
43
|
```
|
|
56
44
|
|
|
57
|
-
##
|
|
45
|
+
## Design rationale
|
|
58
46
|
|
|
59
|
-
|
|
47
|
+
The architecture prioritizes efficiency through several key design decisions:
|
|
60
48
|
|
|
61
|
-
|
|
49
|
+
- Binary data structures (BinSet, BinMap) minimize memory overhead
|
|
50
|
+
- Base64url encoding for path keys enables compact storage
|
|
51
|
+
- Concurrent scanning with dynamic parallelism limits
|
|
52
|
+
- Two-phase comparison: quick metadata check followed by expensive MD5 verification only when needed
|
|
53
|
+
- CSV-based persistent storage for simplicity and portability
|
|
62
54
|
|
|
63
|
-
|
|
64
|
-
2. **Load Records**: `load.js` checks and creates `scanMtimeLen` table. Reads stored hashes, sizes, and modification times to restore memory mappings inside `BinMap`.
|
|
65
|
-
3. **Compare Files**: `scan.js` iterates over input file list, calling `stat.js` for metadata and utilizing `@1-/hash` to map paths to 16-byte binary keys. Adds files with mismatched size or modification time to change list.
|
|
66
|
-
4. **Delete and Return**: `rm.js` deletes absent or unscanned records in transaction. Returns changed paths list and `upsert` function (provided by `upsert.js`) for persistence, supporting automatic resource disposal.
|
|
55
|
+

|
|
67
56
|
|
|
68
|
-
##
|
|
57
|
+
## Technology stack
|
|
69
58
|
|
|
70
|
-
-
|
|
71
|
-
-
|
|
72
|
-
-
|
|
73
|
-
-
|
|
74
|
-
-
|
|
59
|
+
- Node.js runtime with modern ES modules
|
|
60
|
+
- Binary data structures: `@3-/binset`, `@3-/binmap`
|
|
61
|
+
- Base64url encoding: `@3-/base64url`
|
|
62
|
+
- File hashing: `@1-/md5`
|
|
63
|
+
- CSV processing: `@1-/csv`
|
|
64
|
+
- Gitignore management: `@1-/upsert_gitignore`
|
|
65
|
+
- Concurrency control: `@3-/plimit`
|
|
75
66
|
|
|
76
|
-
##
|
|
67
|
+
## Code structure
|
|
77
68
|
|
|
78
|
-
```
|
|
79
|
-
|
|
80
|
-
├──
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
└── tests # Unit tests
|
|
69
|
+
```
|
|
70
|
+
src/
|
|
71
|
+
├── _.js # Main entry point and exports
|
|
72
|
+
├── const.js # Constants (database filenames)
|
|
73
|
+
├── dbInit.js # Database initialization and loading
|
|
74
|
+
├── rm.js # File removal from database
|
|
75
|
+
├── scan.js # Core scanning logic
|
|
76
|
+
├── stat.js # File system statistics collection
|
|
77
|
+
├── upsert.js # Database persistence logic
|
|
78
|
+
└── test/ # Test files
|
|
89
79
|
```
|
|
90
80
|
|
|
91
|
-
##
|
|
92
|
-
|
|
93
|
-
SQLite was created by D. Richard Hipp in 2000 while designing board software for guided-missile destroyers. The system originally depended on commercial database that required constant database administration; connection loss could stall the entire damage control application. Hipp designed serverless, zero-configuration embedded database that directly reads and writes local files, marking the birth of SQLite.
|
|
81
|
+
## Historical context
|
|
94
82
|
|
|
95
|
-
|
|
83
|
+
File scanning utilities trace their origins to early Unix tools like `find` and `diff`. Modern implementations face new challenges with massive file systems and cloud storage. This implementation draws inspiration from incremental backup systems developed in the 1990s, which pioneered the two-phase comparison approach (quick metadata check followed by content verification) to balance speed and accuracy. The use of binary data structures reflects contemporary optimizations for memory-constrained environments and high-performance computing scenarios.
|
|
96
84
|
## About
|
|
97
85
|
|
|
98
86
|
This library is developed by [WebC.site](https://webc.site).
|
|
99
87
|
|
|
100
88
|
[WebC.site](https://webc.site): A new paradigm of web development for AI
|
|
101
89
|
|
|
90
|
+
|
|
102
91
|
---
|
|
103
92
|
|
|
104
93
|
<a id="zh"></a>
|
|
105
|
-
#
|
|
94
|
+
# scan : 高效的文件系统扫描与变更检测
|
|
106
95
|
|
|
107
|
-
|
|
96
|
+
- [scan : 高效的文件系统扫描与变更检测](#scan-高效的文件系统扫描与变更检测)
|
|
97
|
+
- [功能介绍](#功能介绍)
|
|
98
|
+
- [使用演示](#使用演示)
|
|
99
|
+
- [设计思路](#设计思路)
|
|
100
|
+
- [技术栈](#技术栈)
|
|
101
|
+
- [代码结构](#代码结构)
|
|
102
|
+
- [历史故事](#历史故事)
|
|
103
|
+
- [关于](#关于)
|
|
108
104
|
|
|
109
|
-
##
|
|
105
|
+
## 功能介绍
|
|
110
106
|
|
|
111
|
-
|
|
112
|
-
- **键长优化**:路径长度不大于 16 字节时存储原始字节,超出 16 字节转换为 16 字节 MD5 值,优化索引空间与查询性能。
|
|
113
|
-
- **内存优化**:使用 BinMap 与 BinSet 存储二进制键,避免字符串解码,降低内存占用。
|
|
114
|
-
- **事务保障**:元数据变更与删除操作合并在数据库事务中执行,确保数据一致性。
|
|
115
|
-
- **自动配置**:集成 @1-/sqlite,自动初始化数据库表结构,并在检测到新数据库时自动更新 .gitignore。
|
|
107
|
+
本工具通过比对当前文件元数据与缓存记录,扫描目录以检测文件变更。系统跟踪文件大小、修改时间及MD5哈希值,高效识别新增、修改和删除操作。
|
|
116
108
|
|
|
117
|
-
|
|
109
|
+
采用二进制数据结构实现内存效率优化,并支持基于可用CPU核心数自动调整的并发扫描。
|
|
118
110
|
|
|
119
|
-
|
|
111
|
+
## 使用演示
|
|
120
112
|
|
|
121
|
-
|
|
122
|
-
import scan from "@1-/scan";
|
|
123
|
-
|
|
124
|
-
const dir = "./data";
|
|
125
|
-
const db_path = "./scan_record.db";
|
|
126
|
-
const files = ["file1.txt", "file2.txt"];
|
|
127
|
-
|
|
128
|
-
// 扫描文件列表并同步至 SQLite,返回已变更的相对路径列表与更新函数
|
|
129
|
-
const [updated_paths, upsert] = await scan(dir, db_path, files);
|
|
130
|
-
|
|
131
|
-
// 退出作用域自动关闭数据库
|
|
132
|
-
using _ = upsert;
|
|
113
|
+
安装为依赖项:
|
|
133
114
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
// 更新已处理文件的元数据至数据库
|
|
137
|
-
for (const rel_path of updated_paths) {
|
|
138
|
-
await upsert(rel_path);
|
|
139
|
-
}
|
|
115
|
+
```bash
|
|
116
|
+
npm install @1-/scan
|
|
140
117
|
```
|
|
141
118
|
|
|
142
|
-
|
|
119
|
+
基础用法:
|
|
143
120
|
|
|
144
121
|
```javascript
|
|
145
|
-
import
|
|
146
|
-
import sqlite from "@1-/sqlite";
|
|
122
|
+
import scan from '@1-/scan';
|
|
147
123
|
|
|
148
|
-
|
|
124
|
+
// 扫描目录并获取更新列表
|
|
125
|
+
const [updateFiles, upsert] = await scan('/path/to/dir', '/path/to/db', ['file1.js', 'file2.json']);
|
|
149
126
|
|
|
150
|
-
|
|
151
|
-
save(db, [["file.txt", new Uint8Array([1, 2, 3]), 123, 1620000000]], [new Uint8Array([4, 5, 6])]);
|
|
127
|
+
console.log('需要更新的文件:', updateFiles);
|
|
152
128
|
|
|
153
|
-
|
|
129
|
+
// 将更新后的元数据保存至数据库
|
|
130
|
+
await upsert();
|
|
154
131
|
```
|
|
155
132
|
|
|
156
|
-
##
|
|
133
|
+
## 设计思路
|
|
157
134
|
|
|
158
|
-
|
|
135
|
+
架构设计优先考虑效率,关键决策包括:
|
|
159
136
|
|
|
160
|
-
|
|
137
|
+
- 二进制数据结构(BinSet、BinMap)最小化内存开销
|
|
138
|
+
- 路径键使用base64url编码实现紧凑存储
|
|
139
|
+
- 并发扫描配合动态并行度限制
|
|
140
|
+
- 两阶段比对:快速元数据检查后仅在必要时执行耗时MD5验证
|
|
141
|
+
- 基于CSV的持久化存储确保简单性和可移植性
|
|
161
142
|
|
|
162
|
-
|
|
163
|
-
2. **加载记录**:`load.js` 检查并创建 `scanMtimeLen` 表。读取已记录的哈希、大小及修改时间,恢复至内存映射 `BinMap` 中。
|
|
164
|
-
3. **比对文件**:`scan.js` 遍历输入文件列表,调用 `stat.js` 获取元数据,并利用 `@1-/hash` 将路径映射为 16 字节二进制键。比对大小或修改时间,差异项归入变更列表。
|
|
165
|
-
4. **删除与返回**:`rm.js` 在事务中批量删除物理移除或不再扫描的记录。返回变更路径列表与 `upsert` 函数(`upsert.js` 提供),用以更新数据库,支持自动释放。
|
|
143
|
+

|
|
166
144
|
|
|
167
|
-
##
|
|
145
|
+
## 技术栈
|
|
168
146
|
|
|
169
|
-
-
|
|
170
|
-
-
|
|
171
|
-
-
|
|
172
|
-
-
|
|
173
|
-
-
|
|
147
|
+
- Node.js运行时,支持现代ES模块
|
|
148
|
+
- 二进制数据结构:`@3-/binset`、`@3-/binmap`
|
|
149
|
+
- Base64url编码:`@3-/base64url`
|
|
150
|
+
- 文件哈希:`@1-/md5`
|
|
151
|
+
- CSV处理:`@1-/csv`
|
|
152
|
+
- Gitignore管理:`@1-/upsert_gitignore`
|
|
153
|
+
- 并发控制:`@3-/plimit`
|
|
174
154
|
|
|
175
|
-
##
|
|
155
|
+
## 代码结构
|
|
176
156
|
|
|
177
|
-
```
|
|
178
|
-
|
|
179
|
-
├──
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
└── tests # 单元测试
|
|
157
|
+
```
|
|
158
|
+
src/
|
|
159
|
+
├── _.js # 主入口点及导出
|
|
160
|
+
├── const.js # 常量定义(数据库文件名)
|
|
161
|
+
├── dbInit.js # 数据库初始化与加载
|
|
162
|
+
├── rm.js # 从数据库移除文件
|
|
163
|
+
├── scan.js # 核心扫描逻辑
|
|
164
|
+
├── stat.js # 文件系统统计信息收集
|
|
165
|
+
├── upsert.js # 数据库持久化逻辑
|
|
166
|
+
└── test/ # 测试文件
|
|
188
167
|
```
|
|
189
168
|
|
|
190
|
-
##
|
|
191
|
-
|
|
192
|
-
SQLite 的诞生源自导弹驱逐舰板载损害控制软件项目。2000 年,D. Richard Hipp 为美国海军设计该系统时,遭遇商业数据库因配置复杂、无法承受断连和崩溃之痛点。Hipp 随后设计出免服务器配置、直接读写本地文件之嵌入式数据库,即 SQLite。
|
|
169
|
+
## 历史故事
|
|
193
170
|
|
|
194
|
-
|
|
171
|
+
文件扫描工具源于早期Unix命令如`find`和`diff`。现代实现面临海量文件系统和云存储的新挑战。本实现借鉴了20世纪90年代开发的增量备份系统,该系统首创两阶段比对方法(先快速元数据检查,再按需内容验证),在速度与准确性间取得平衡。二进制数据结构的采用反映了当代针对内存受限环境和高性能计算场景的优化趋势。
|
|
195
172
|
## 关于
|
|
196
173
|
|
|
197
174
|
本库由 [WebC.site](https://webc.site) 开发。
|
|
198
175
|
|
|
199
176
|
[WebC.site](https://webc.site) : 面向人工智能的网站开发新范式
|
|
177
|
+
|
package/_.js
CHANGED
|
@@ -1,31 +1,31 @@
|
|
|
1
|
-
import sqlite from "@1-/sqlite";
|
|
2
|
-
import { BinMap } from "@3-/binmap";
|
|
3
|
-
import vbE from "@3-/vb/vbE.js";
|
|
4
1
|
import { availableParallelism } from "node:os";
|
|
5
2
|
import pLimit from "@3-/plimit";
|
|
6
|
-
import
|
|
7
|
-
import { existsSync } from "node:fs";
|
|
8
|
-
import { join, dirname, basename } from "node:path";
|
|
9
|
-
import load from "./load.js";
|
|
3
|
+
import dbInit from "./dbInit.js";
|
|
10
4
|
import scan from "./scan.js";
|
|
11
5
|
import rm from "./rm.js";
|
|
12
6
|
import upsert from "./upsert.js";
|
|
13
7
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
8
|
+
/*
|
|
9
|
+
扫描指定目录下文件列表,比对缓存并做清理,返回更新列表和 upsert 存储函数
|
|
10
|
+
dir: 扫描的目标目录
|
|
11
|
+
db_dir: 数据库存放目录
|
|
12
|
+
files: 待扫描的文件列表
|
|
13
|
+
返回值: [update, upsert]
|
|
14
|
+
update: 发生变动需要更新的相对路径列表
|
|
15
|
+
upsert: 用于将新扫描记录保存至数据库的 dispose 异步函数
|
|
16
|
+
*/
|
|
17
|
+
export default async (dir, db_dir, files) => {
|
|
18
|
+
const [db_mtime, db_md5] = await dbInit(db_dir),
|
|
19
|
+
existing_mtime = db_mtime,
|
|
20
|
+
existing_md5 = db_md5,
|
|
21
|
+
limit = pLimit(availableParallelism()),
|
|
22
|
+
[scanned, update] = await scan(dir, files, existing_mtime, existing_md5, limit, db_mtime),
|
|
23
|
+
rm_paths = (existing) => [...existing.keys()].filter((path) => !scanned.has(path));
|
|
22
24
|
|
|
23
|
-
|
|
25
|
+
[
|
|
26
|
+
[db_mtime, existing_mtime],
|
|
27
|
+
[db_md5, existing_md5],
|
|
28
|
+
].forEach(([db, existing]) => rm(db, rm_paths(existing)));
|
|
24
29
|
|
|
25
|
-
|
|
26
|
-
rm_hashes = db_rows.filter(({ hash }) => !scanned.has(hash)).map(({ hash }) => hash);
|
|
27
|
-
|
|
28
|
-
rm(db, rm_hashes);
|
|
29
|
-
|
|
30
|
-
return [update, upsert(db, dir)];
|
|
30
|
+
return [update, upsert(db_mtime, db_md5, dir, db_dir)];
|
|
31
31
|
};
|
package/const.js
ADDED
package/dbInit.js
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { BinMap } from "@3-/binmap";
|
|
2
|
+
import load from "@1-/csv/load.js";
|
|
3
|
+
import upsertGitignore from "@1-/upsert_gitignore";
|
|
4
|
+
import vbE from "@3-/vb/vbE.js";
|
|
5
|
+
import b64Uint8 from "@3-/base64url/b64Uint8.js";
|
|
6
|
+
import { existsSync } from "node:fs";
|
|
7
|
+
import { join, basename } from "node:path";
|
|
8
|
+
import { MTIME, MD5 } from "./const.js";
|
|
9
|
+
|
|
10
|
+
const loadDb = async (path, db, parseVal) => {
|
|
11
|
+
if (existsSync(path)) {
|
|
12
|
+
const rows = await load(path);
|
|
13
|
+
rows.forEach(([path_str, ...vals]) => {
|
|
14
|
+
db.set(b64Uint8(path_str), parseVal(vals));
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
/*
|
|
20
|
+
初始化/打开修改时间和 md5 的 csv 数据库,并将数据库文件加入 gitignore
|
|
21
|
+
db_dir: 数据库存放目录
|
|
22
|
+
返回值: [db_mtime, db_md5]
|
|
23
|
+
*/
|
|
24
|
+
export default async (db_dir) => {
|
|
25
|
+
const li = [MTIME, MD5],
|
|
26
|
+
path_li = li.map((x) => join(db_dir, x + ".csv"));
|
|
27
|
+
|
|
28
|
+
if (path_li.some((x) => !existsSync(x))) {
|
|
29
|
+
upsertGitignore(
|
|
30
|
+
join(db_dir, ".gitignore"),
|
|
31
|
+
path_li.map((x) => basename(x)),
|
|
32
|
+
);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const db_mtime = new BinMap(),
|
|
36
|
+
db_md5 = new BinMap(),
|
|
37
|
+
[mtime_path, md5_path] = path_li;
|
|
38
|
+
|
|
39
|
+
await Promise.all([
|
|
40
|
+
loadDb(mtime_path, db_mtime, ([size, mtime]) => vbE([Number(size), Number(mtime)])),
|
|
41
|
+
loadDb(md5_path, db_md5, ([md5_str]) => b64Uint8(md5_str)),
|
|
42
|
+
]);
|
|
43
|
+
|
|
44
|
+
return [db_mtime, db_md5];
|
|
45
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@1-/scan",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.11",
|
|
4
4
|
"description": "Incrementally scan directory files and track metadata in SQLite / 增量扫描目录文件并使用 SQLite 记录元数据",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"directory",
|
|
@@ -21,16 +21,17 @@
|
|
|
21
21
|
".": "./_.js",
|
|
22
22
|
"./*": "./*"
|
|
23
23
|
},
|
|
24
|
-
"
|
|
24
|
+
"dependencies": {
|
|
25
|
+
"@1-/csv": "^0.1.4",
|
|
25
26
|
"@1-/hash": "^0.1.0",
|
|
26
|
-
"@1-/
|
|
27
|
-
"@
|
|
28
|
-
"@3-/
|
|
27
|
+
"@1-/md5": "^0.1.1",
|
|
28
|
+
"@1-/upsert_gitignore": "^0.1.7",
|
|
29
|
+
"@3-/base64url": "^0.1.4",
|
|
30
|
+
"@3-/binmap": "^0.1.22",
|
|
31
|
+
"@3-/binset": "^0.1.8",
|
|
29
32
|
"@3-/int": "^0.1.1",
|
|
30
33
|
"@3-/plimit": "^0.1.3",
|
|
31
34
|
"@3-/u8": "^0.1.2",
|
|
32
|
-
"@3-/
|
|
33
|
-
"@3-/vb": "^0.1.6",
|
|
34
|
-
"@1-/upsert_gitignore": "^0.1.3"
|
|
35
|
+
"@3-/vb": "^0.1.6"
|
|
35
36
|
}
|
|
36
37
|
}
|
package/rm.js
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
/*
|
|
2
|
+
批量删除记录
|
|
3
|
+
db: BinMap 实例
|
|
4
|
+
rm: 待删除的路径 path 数组
|
|
5
|
+
*/
|
|
3
6
|
export default (db, rm) => {
|
|
4
|
-
|
|
5
|
-
tx(db, () => {
|
|
6
|
-
const del = db.prepare("DELETE FROM scanMtimeLen WHERE hash=?");
|
|
7
|
-
rm.forEach((hash) => del.run(hash));
|
|
8
|
-
});
|
|
9
|
-
}
|
|
7
|
+
rm.forEach((path) => db.delete(path));
|
|
10
8
|
};
|
package/scan.js
CHANGED
|
@@ -1,21 +1,46 @@
|
|
|
1
1
|
import { BinSet } from "@3-/binset";
|
|
2
2
|
import u8eq from "@3-/u8/u8eq.js";
|
|
3
3
|
import vbE from "@3-/vb/vbE.js";
|
|
4
|
+
import { join } from "node:path";
|
|
4
5
|
import stat from "./stat.js";
|
|
6
|
+
import pathMd5 from "@1-/md5/pathMd5.js";
|
|
5
7
|
|
|
6
|
-
|
|
8
|
+
/*
|
|
9
|
+
扫描指定目录下的文件,并与已有记录进行对比
|
|
10
|
+
dir: 扫描目录
|
|
11
|
+
files: 相对路径列表
|
|
12
|
+
existing: 已有的元数据 Map
|
|
13
|
+
existing_md5: 已有的 md5 Map
|
|
14
|
+
limit: 并发限制器
|
|
15
|
+
db_mtime: 修改时间数据库实例
|
|
16
|
+
返回值: [scanned, update]
|
|
17
|
+
scanned: 已扫描文件的 path 集合
|
|
18
|
+
update: 需要更新或新增的文件相对路径列表
|
|
19
|
+
*/
|
|
20
|
+
export default async (dir, files, existing, existing_md5, limit, db_mtime) => {
|
|
7
21
|
const scanned = new BinSet(),
|
|
8
22
|
update = [];
|
|
23
|
+
|
|
9
24
|
await Promise.all(
|
|
10
25
|
files.map((rel_path) =>
|
|
11
26
|
limit(async () => {
|
|
12
27
|
try {
|
|
13
|
-
const [size, mtime,
|
|
14
|
-
val = existing.get(
|
|
28
|
+
const [size, mtime, path] = await stat(dir, rel_path),
|
|
29
|
+
val = existing.get(path);
|
|
15
30
|
|
|
16
|
-
scanned.add(
|
|
31
|
+
scanned.add(path);
|
|
17
32
|
|
|
18
|
-
if (!val
|
|
33
|
+
if (!val) {
|
|
34
|
+
update.push(rel_path);
|
|
35
|
+
} else if (!u8eq(val, vbE([size, mtime]))) {
|
|
36
|
+
const old_md5 = existing_md5.get(path);
|
|
37
|
+
if (old_md5) {
|
|
38
|
+
const cur_md5 = await pathMd5(join(dir, rel_path));
|
|
39
|
+
if (u8eq(old_md5, cur_md5)) {
|
|
40
|
+
db_mtime.set(path, vbE([size, mtime]));
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
19
44
|
update.push(rel_path);
|
|
20
45
|
}
|
|
21
46
|
} catch {}
|
package/stat.js
CHANGED
|
@@ -3,6 +3,12 @@ import { join } from "node:path";
|
|
|
3
3
|
import int from "@3-/int";
|
|
4
4
|
import strmd5 from "@1-/hash/strmd5.js";
|
|
5
5
|
|
|
6
|
+
/*
|
|
7
|
+
获取文件的大小、修改时间(秒)以及路径哈希
|
|
8
|
+
dir: 基础目录
|
|
9
|
+
rel_path: 相对路径
|
|
10
|
+
返回值: [大小, 修改时间, 路径哈希]
|
|
11
|
+
*/
|
|
6
12
|
export default async (dir, rel_path) => {
|
|
7
13
|
const { size, mtimeMs: mtime_ms } = await fsStat(join(dir, rel_path));
|
|
8
14
|
return [size, int(mtime_ms), strmd5(rel_path)];
|
package/upsert.js
CHANGED
|
@@ -1,13 +1,45 @@
|
|
|
1
|
+
import { writeFileSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import dumps from "@1-/csv/dumps.js";
|
|
4
|
+
import vbD from "@3-/vb/vbD.js";
|
|
5
|
+
import vbE from "@3-/vb/vbE.js";
|
|
6
|
+
import uint8B64 from "@3-/base64url/uint8B64.js";
|
|
7
|
+
import pathMd5 from "@1-/md5/pathMd5.js";
|
|
1
8
|
import stat from "./stat.js";
|
|
9
|
+
import { MTIME, MD5 } from "./const.js";
|
|
10
|
+
|
|
11
|
+
const saveDb = (db_dir, db, name, toRow) => {
|
|
12
|
+
const li = [];
|
|
13
|
+
for (const [key, val] of db.entries()) {
|
|
14
|
+
li.push(toRow(key, val));
|
|
15
|
+
}
|
|
16
|
+
writeFileSync(join(db_dir, name + ".csv"), dumps(li), "utf8");
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
/*
|
|
20
|
+
创建用于插入/更新文件元数据(大小、修改时间、md5)的函数,并在资源释放时同步写入 CSV 文件
|
|
21
|
+
db_mtime: 修改时间 BinMap 实例
|
|
22
|
+
db_md5: md5 BinMap 实例
|
|
23
|
+
dir: 扫描目录
|
|
24
|
+
db_dir: 数据库(CSV)存放目录
|
|
25
|
+
返回值: upsert 异步函数 (带有 [Symbol.dispose] 方法)
|
|
26
|
+
*/
|
|
27
|
+
export default (db_mtime, db_md5, dir, db_dir) => {
|
|
28
|
+
const upsert = async (rel_path) => {
|
|
29
|
+
try {
|
|
30
|
+
const [size, mtime, path] = await stat(dir, rel_path),
|
|
31
|
+
md5 = await pathMd5(join(dir, rel_path));
|
|
32
|
+
db_mtime.set(path, vbE([size, mtime]));
|
|
33
|
+
db_md5.set(path, md5);
|
|
34
|
+
} catch {}
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
upsert[Symbol.dispose] = () => {
|
|
38
|
+
saveDb(db_dir, db_mtime, MTIME, (key, val) => [uint8B64(key), ...vbD(val)]);
|
|
39
|
+
saveDb(db_dir, db_md5, MD5, (key, val) => [uint8B64(key), uint8B64(val)]);
|
|
40
|
+
db_mtime[Symbol.dispose]();
|
|
41
|
+
db_md5[Symbol.dispose]();
|
|
42
|
+
};
|
|
2
43
|
|
|
3
|
-
export default (db, dir) => {
|
|
4
|
-
const insert = db.prepare("INSERT OR REPLACE INTO scanMtimeLen(hash,size,mtime)VALUES(?,?,?)"),
|
|
5
|
-
upsert = async (rel_path) => {
|
|
6
|
-
try {
|
|
7
|
-
const [size, mtime, hash] = await stat(dir, rel_path);
|
|
8
|
-
insert.run(hash, size, mtime);
|
|
9
|
-
} catch {}
|
|
10
|
-
};
|
|
11
|
-
upsert[Symbol.dispose] = () => db.close();
|
|
12
44
|
return upsert;
|
|
13
45
|
};
|
package/load.js
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
const SQLITE_ERROR = 1;
|
|
2
|
-
|
|
3
|
-
export default (db) => {
|
|
4
|
-
try {
|
|
5
|
-
return db.prepare("SELECT hash,size,mtime FROM scanMtimeLen").all();
|
|
6
|
-
} catch (err) {
|
|
7
|
-
if (err.errno === SQLITE_ERROR) {
|
|
8
|
-
db.exec("CREATE TABLE scanMtimeLen(hash PRIMARY KEY,size INT UNSIGNED,mtime INT UNSIGNED)");
|
|
9
|
-
return [];
|
|
10
|
-
}
|
|
11
|
-
throw err;
|
|
12
|
-
}
|
|
13
|
-
};
|
package/save.js
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
import tx from "@1-/sqlite/tx.js";
|
|
2
|
-
|
|
3
|
-
export default (db, update, rm) => {
|
|
4
|
-
if (update.length > 0 || rm.length > 0) {
|
|
5
|
-
tx(db, () => {
|
|
6
|
-
if (update.length > 0) {
|
|
7
|
-
const insert = db.prepare(
|
|
8
|
-
"INSERT OR REPLACE INTO scanMtimeLen(hash,size,mtime)VALUES(?,?,?)",
|
|
9
|
-
);
|
|
10
|
-
update.forEach(([_, h, size, mtime]) => insert.run(h, size, mtime));
|
|
11
|
-
}
|
|
12
|
-
if (rm.length > 0) {
|
|
13
|
-
const del = db.prepare("DELETE FROM scanMtimeLen WHERE hash=?");
|
|
14
|
-
rm.forEach((h) => del.run(h));
|
|
15
|
-
}
|
|
16
|
-
});
|
|
17
|
-
}
|
|
18
|
-
};
|