@1-/scan 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -28
- package/_.js +2 -2
- package/load.js +2 -2
- package/package.json +1 -1
- package/save.js +4 -2
package/README.md
CHANGED
|
@@ -9,11 +9,11 @@ Incrementally scans directory files, compares file sizes and modification times
|
|
|
9
9
|
|
|
10
10
|
## Features
|
|
11
11
|
|
|
12
|
-
- **Incremental Scanning**:
|
|
12
|
+
- **Incremental Scanning**: Processes only new, modified, or deleted files, avoiding redundant file system operations.
|
|
13
13
|
- **Key Optimization**: Stores relative paths within 16 bytes directly as raw bytes; hashes longer paths to 16-byte MD5 digests to optimize database index space and query performance.
|
|
14
14
|
- **Metadata Compression**: Compresses file sizes and modification times using Varint (variable-length byte) encoding.
|
|
15
|
-
- **Transactional Integrity**: Packages updates and deletions in
|
|
16
|
-
- **
|
|
15
|
+
- **Transactional Integrity**: Packages updates and deletions in database transactions to guarantee consistency.
|
|
16
|
+
- **File Filtering**: Supports custom ignore callback functions to filter files and directories.
|
|
17
17
|
- **Native Database**: Integrates Bun native `bun:sqlite` module, eliminating external database driver dependencies.
|
|
18
18
|
|
|
19
19
|
## Usage
|
|
@@ -63,6 +63,28 @@ for (const rel_path of updated_paths) {
|
|
|
63
63
|
}
|
|
64
64
|
```
|
|
65
65
|
|
|
66
|
+
### Bulk Storage Module Usage
|
|
67
|
+
|
|
68
|
+
```javascript
|
|
69
|
+
import save from "@1-/scan/save.js";
|
|
70
|
+
import sqlite from "@1-/scan/sqlite.js";
|
|
71
|
+
|
|
72
|
+
const db = sqlite("./scan_record.db");
|
|
73
|
+
|
|
74
|
+
// Bulk update and delete metadata
|
|
75
|
+
save(
|
|
76
|
+
db,
|
|
77
|
+
[
|
|
78
|
+
["file.txt", new Uint8Array([1, 2, 3]), 123, 1620000000]
|
|
79
|
+
],
|
|
80
|
+
[
|
|
81
|
+
new Uint8Array([4, 5, 6])
|
|
82
|
+
]
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
db.close();
|
|
86
|
+
```
|
|
87
|
+
|
|
66
88
|
## Design Ideas
|
|
67
89
|
|
|
68
90
|
The main entry orchestrates independent modules to execute the incremental scanning and synchronization flow.
|
|
@@ -79,15 +101,15 @@ graph TD
|
|
|
79
101
|
```
|
|
80
102
|
|
|
81
103
|
1. **Initialize Connection (`sqlite.js`)**: Opens SQLite database connection and configures automatic connection disposal.
|
|
82
|
-
2. **Load Records (`load.js`)**: Automatically creates
|
|
104
|
+
2. **Load Records (`load.js`)**: Automatically creates `scanMtimeLen` table if missing, retrieves existing file hashes, sizes, and modification times, and reconstructs reference set in memory.
|
|
83
105
|
3. **Walk & Compare (`dirWalk.js`)**: Traverses directory structure recursively. Paths are transformed into 16-byte keys via `hash.js`. File attributes are encoded using `@3-/vb` and compared against database records to identify additions and modifications.
|
|
84
106
|
4. **Delete & Return Upsert**: Uses `trans.js` to execute transaction-safe deletions for deleted files, and returns modified relative paths and an `upsert` function so that caller can update database records.
|
|
85
|
-
5. **Independent Sync Helper (`save.js`)**: Exported independent module to execute bulk
|
|
107
|
+
5. **Independent Sync Helper (`save.js`)**: Exported independent module to execute bulk updates and deletions in transactions.
|
|
86
108
|
|
|
87
109
|
## Tech Stack
|
|
88
110
|
|
|
89
111
|
- **Bun**: Runtime environment and test framework.
|
|
90
|
-
- **Bun SQLite**: Native
|
|
112
|
+
- **Bun SQLite**: Native SQLite engine built into Bun.
|
|
91
113
|
- **@1-/walk**: Directory walker with ignore support.
|
|
92
114
|
- **@3-/vb**: Variable-length byte (Varint) encoder and decoder.
|
|
93
115
|
- **@3-/binmap / @3-/binset**: Memory-efficient collections designed for binary keys.
|
|
@@ -104,7 +126,7 @@ graph TD
|
|
|
104
126
|
│ ├── save.js # Independent helper executing bulk updates and deletions
|
|
105
127
|
│ ├── sqlite.js # Connection manager instantiating SQLite database
|
|
106
128
|
│ └── trans.js # Transaction wrapper providing rollback mechanism
|
|
107
|
-
└── tests # Test
|
|
129
|
+
└── tests # Test directory
|
|
108
130
|
```
|
|
109
131
|
|
|
110
132
|
## History
|
|
@@ -119,16 +141,16 @@ To conserve disk space and reduce I/O overhead, SQLite utilizes Varint (variable
|
|
|
119
141
|
<a id="zh"></a>
|
|
120
142
|
# @1-/scan : 增量扫描目录文件并使用 SQLite 记录元数据
|
|
121
143
|
|
|
122
|
-
|
|
144
|
+
增量扫描目录文件,比对大小与修改时间以检测变更,同步元数据至 SQLite 数据库,返回发生变更之相对路径列表。
|
|
123
145
|
|
|
124
146
|
## 功能介绍
|
|
125
147
|
|
|
126
|
-
-
|
|
127
|
-
-
|
|
128
|
-
- **元数据压缩**:使用 Varint
|
|
129
|
-
-
|
|
130
|
-
-
|
|
131
|
-
- **原生依赖**:基于 Bun 内置 `bun:sqlite`
|
|
148
|
+
- **增量扫描**:处理新增、修改或删除之文件,避免冗余文件系统读写,提升同步效率。
|
|
149
|
+
- **路径压缩**:相对路径长度不大于 16 字节时保留原始字节;超出 16 字节则转换为 16 字节 MD5 值作为主键,优化索引空间与查询性能。
|
|
150
|
+
- **元数据压缩**:使用 Varint(可变字节整型)编码方式压缩存储文件大小与修改时间。
|
|
151
|
+
- **事务安全**:将更新与删除操作合并在数据库事务中执行,确保数据一致性。
|
|
152
|
+
- **文件过滤**:支持自定义过滤函数以排除特定文件与目录。
|
|
153
|
+
- **原生依赖**:基于 Bun 内置 `bun:sqlite` 模块,免去安装与编译数据库驱动步骤。
|
|
132
154
|
|
|
133
155
|
## 使用演示
|
|
134
156
|
|
|
@@ -140,7 +162,7 @@ import scan from "@1-/scan";
|
|
|
140
162
|
const dir = "./data";
|
|
141
163
|
const db_path = "./scan_record.db";
|
|
142
164
|
|
|
143
|
-
// 扫描目录并同步至 SQLite
|
|
165
|
+
// 扫描目录并同步至 SQLite,返回发生变更之相对路径列表与更新函数
|
|
144
166
|
const [updated_paths, upsert] = await scan(dir, db_path);
|
|
145
167
|
|
|
146
168
|
// 退出作用域时自动关闭数据库
|
|
@@ -148,13 +170,13 @@ using _upsert = upsert;
|
|
|
148
170
|
|
|
149
171
|
console.log("更新文件列表:", updated_paths);
|
|
150
172
|
|
|
151
|
-
//
|
|
173
|
+
// 更新已处理文件元数据至数据库
|
|
152
174
|
for (const rel_path of updated_paths) {
|
|
153
175
|
await upsert(rel_path);
|
|
154
176
|
}
|
|
155
177
|
```
|
|
156
178
|
|
|
157
|
-
###
|
|
179
|
+
### 过滤规则扫描
|
|
158
180
|
|
|
159
181
|
```javascript
|
|
160
182
|
import scan from "@1-/scan";
|
|
@@ -162,7 +184,7 @@ import scan from "@1-/scan";
|
|
|
162
184
|
const dir = "./data";
|
|
163
185
|
const db_path = "./scan_record.db";
|
|
164
186
|
|
|
165
|
-
//
|
|
187
|
+
// 过滤临时文件与特定配置
|
|
166
188
|
const ignore = (kind, rel_path) => {
|
|
167
189
|
return rel_path.startsWith("temp/") || rel_path === "config.json";
|
|
168
190
|
};
|
|
@@ -177,9 +199,31 @@ for (const rel_path of updated_paths) {
|
|
|
177
199
|
}
|
|
178
200
|
```
|
|
179
201
|
|
|
202
|
+
### 批量存储模块使用
|
|
203
|
+
|
|
204
|
+
```javascript
|
|
205
|
+
import save from "@1-/scan/save.js";
|
|
206
|
+
import sqlite from "@1-/scan/sqlite.js";
|
|
207
|
+
|
|
208
|
+
const db = sqlite("./scan_record.db");
|
|
209
|
+
|
|
210
|
+
// 批量更新与删除元数据
|
|
211
|
+
save(
|
|
212
|
+
db,
|
|
213
|
+
[
|
|
214
|
+
["file.txt", new Uint8Array([1, 2, 3]), 123, 1620000000]
|
|
215
|
+
],
|
|
216
|
+
[
|
|
217
|
+
new Uint8Array([4, 5, 6])
|
|
218
|
+
]
|
|
219
|
+
);
|
|
220
|
+
|
|
221
|
+
db.close();
|
|
222
|
+
```
|
|
223
|
+
|
|
180
224
|
## 设计思路
|
|
181
225
|
|
|
182
|
-
|
|
226
|
+
系统主入口调度各独立模块完成增量扫描与数据同步。
|
|
183
227
|
|
|
184
228
|
```mermaid
|
|
185
229
|
graph TD
|
|
@@ -189,19 +233,19 @@ graph TD
|
|
|
189
233
|
DirWalk -->|调用| Walk["@1-/walk/walkRelIgnore"]
|
|
190
234
|
DirWalk -->|处理路径键| Hash["hash.js"]
|
|
191
235
|
Entry -->|4. 删除失效记录并返回更新函数| Trans["trans.js"]
|
|
192
|
-
Save["save.js (
|
|
236
|
+
Save["save.js (独立批量存储模块)"] -->|事务保障| Trans
|
|
193
237
|
```
|
|
194
238
|
|
|
195
239
|
1. **初始化连接 (`sqlite.js`)**:打开 SQLite 数据库,并配置自动释放连接机制。
|
|
196
|
-
2. **加载记录 (`load.js`)
|
|
240
|
+
2. **加载记录 (`load.js`)**:若数据表 `scanMtimeLen` 不存在则自动创建,读取已记录的文件哈希、大小及修改时间,在内存中还原比对集合。
|
|
197
241
|
3. **文件系统扫描 (`dirWalk.js`)**:递归遍历目录,利用 `hash.js` 将路径映射为 16 字节键。对比当前文件与数据库元数据(利用 `@3-/vb` 进行压缩状态对比),筛选出新增和修改的文件。
|
|
198
|
-
4. **删除与返回更新函数**:使用 `trans.js`
|
|
199
|
-
5.
|
|
242
|
+
4. **删除与返回更新函数**:使用 `trans.js` 开启事务,批量删除已被移除的记录,并返回变更的相对路径列表与 `upsert` 函数,供调用者持久化数据。
|
|
243
|
+
5. **独立批量存储模块 (`save.js`)**:供外部调用的独立工具模块,用于在事务中批量写入与删除。
|
|
200
244
|
|
|
201
245
|
## 技术栈
|
|
202
246
|
|
|
203
|
-
- **Bun**:JavaScript
|
|
204
|
-
- **Bun SQLite
|
|
247
|
+
- **Bun**:JavaScript 运行时与测试框架。
|
|
248
|
+
- **Bun SQLite**:内置 SQLite 实现。
|
|
205
249
|
- **@1-/walk**:支持过滤规则的目录递归遍历工具。
|
|
206
250
|
- **@3-/vb**:Varint(可变字节)编码与解码器。
|
|
207
251
|
- **@3-/binmap / @3-/binset**:针对二进制键优化的 Map 和 Set 容器。
|
|
@@ -218,12 +262,12 @@ graph TD
|
|
|
218
262
|
│ ├── save.js # 独立导出的批量持久化与删除辅助函数
|
|
219
263
|
│ ├── sqlite.js # 创建并配置 SQLite 数据库实例
|
|
220
264
|
│ └── trans.js # 封装 SQLite 事务,提供异常回滚机制
|
|
221
|
-
└── tests #
|
|
265
|
+
└── tests # 单元测试目录
|
|
222
266
|
```
|
|
223
267
|
|
|
224
268
|
## 历史故事
|
|
225
269
|
|
|
226
|
-
SQLite
|
|
270
|
+
SQLite 的诞生源自海军军工项目。2000 年,D. Richard Hipp 为美国海军陆战队设计导弹驱逐舰板载损害控制软件时,遭遇商业数据库因配置复杂、日常维护繁琐且连接丢失即导致系统瘫痪之痛点。Hipp 随后设计出免服务器配置、直接读写本地文件之嵌入式数据库,即 SQLite。
|
|
227
271
|
|
|
228
|
-
|
|
272
|
+
为了节省磁盘空间与降低读写延迟,SQLite 广泛应用了 Varint(可变字节整型)编码。在这种编码下,数值较小的整数仅占用 1 字节,只有大数值才会占用更多字节。本项目中对文件大小和修改时间采用同样的压缩设计,秉承了 SQLite 节省空间与高效之设计哲学。
|
|
229
273
|
../doc/zh/about.md
|
package/_.js
CHANGED
|
@@ -21,12 +21,12 @@ export default async (dir, db_path, ignore) => {
|
|
|
21
21
|
|
|
22
22
|
if (to_delete.length > 0) {
|
|
23
23
|
trans(db, () => {
|
|
24
|
-
const del = db.prepare("DELETE FROM
|
|
24
|
+
const del = db.prepare("DELETE FROM scanMtimeLen WHERE hash=?");
|
|
25
25
|
to_delete.forEach((h) => del.run(h));
|
|
26
26
|
});
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
const insert = db.prepare("INSERT OR REPLACE INTO
|
|
29
|
+
const insert = db.prepare("INSERT OR REPLACE INTO scanMtimeLen(hash,size,mtime)VALUES(?,?,?)"),
|
|
30
30
|
upsert = async (rel_path) => {
|
|
31
31
|
const fp = join(dir, rel_path),
|
|
32
32
|
{ size, mtimeMs } = await stat(fp),
|
package/load.js
CHANGED
|
@@ -2,10 +2,10 @@ const SQLITE_ERROR = 1;
|
|
|
2
2
|
|
|
3
3
|
export default (db) => {
|
|
4
4
|
try {
|
|
5
|
-
return db.prepare("SELECT hash,size,mtime FROM
|
|
5
|
+
return db.prepare("SELECT hash,size,mtime FROM scanMtimeLen").all();
|
|
6
6
|
} catch (err) {
|
|
7
7
|
if (err.errno === SQLITE_ERROR) {
|
|
8
|
-
db.exec("CREATE TABLE
|
|
8
|
+
db.exec("CREATE TABLE scanMtimeLen(hash PRIMARY KEY,size INT UNSIGNED,mtime INT UNSIGNED)");
|
|
9
9
|
return [];
|
|
10
10
|
}
|
|
11
11
|
throw err;
|
package/package.json
CHANGED
package/save.js
CHANGED
|
@@ -4,11 +4,13 @@ export default (db, to_update, to_delete) => {
|
|
|
4
4
|
if (to_update.length > 0 || to_delete.length > 0) {
|
|
5
5
|
trans(db, () => {
|
|
6
6
|
if (to_update.length > 0) {
|
|
7
|
-
const insert = db.prepare(
|
|
7
|
+
const insert = db.prepare(
|
|
8
|
+
"INSERT OR REPLACE INTO scanMtimeLen(hash,size,mtime)VALUES(?,?,?)",
|
|
9
|
+
);
|
|
8
10
|
to_update.forEach(([_, h, size, mtime]) => insert.run(h, size, mtime));
|
|
9
11
|
}
|
|
10
12
|
if (to_delete.length > 0) {
|
|
11
|
-
const del = db.prepare("DELETE FROM
|
|
13
|
+
const del = db.prepare("DELETE FROM scanMtimeLen WHERE hash=?");
|
|
12
14
|
to_delete.forEach((h) => del.run(h));
|
|
13
15
|
}
|
|
14
16
|
});
|