@1-/scan 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -32
- package/_.js +52 -37
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -3,17 +3,17 @@
|
|
|
3
3
|
---
|
|
4
4
|
|
|
5
5
|
<a id="en"></a>
|
|
6
|
-
# @1-/scan :
|
|
6
|
+
# @1-/scan : SQLite-backed incremental directory scanner
|
|
7
7
|
|
|
8
|
-
Incrementally scans directory files, compares file sizes and modification times to detect changes, synchronizes metadata to SQLite database, and returns
|
|
8
|
+
Incrementally scans directory files, compares file sizes and modification times to detect changes, synchronizes metadata to SQLite database, and returns list of changed relative paths.
|
|
9
9
|
|
|
10
10
|
## 1. Features
|
|
11
11
|
|
|
12
|
-
- **Incremental
|
|
13
|
-
- **Key Optimization**: Stores
|
|
14
|
-
- **Memory
|
|
15
|
-
- **Transactional Integrity**: Performs updates and deletions
|
|
16
|
-
- **Configuration
|
|
12
|
+
- **Incremental Scan**: Compares size and modification time, filtering unchanged files to reduce disk I/O.
|
|
13
|
+
- **Key Length Optimization**: Stores raw bytes for paths up to 16 bytes. Converts longer paths into 16-byte MD5 hashes to optimize database index space and query performance.
|
|
14
|
+
- **Memory Optimization**: Uses BinMap and BinSet to store binary keys in memory, avoiding string decoding overhead and reducing memory footprint.
|
|
15
|
+
- **Transactional Integrity**: Performs metadata updates and deletions in database transactions to ensure consistency.
|
|
16
|
+
- **Zero Configuration**: Integrates @1-/sqlite to initialize database schema and manage database connections automatically.
|
|
17
17
|
|
|
18
18
|
## 2. Usage
|
|
19
19
|
|
|
@@ -26,10 +26,10 @@ const dir = "./data";
|
|
|
26
26
|
const db_path = "./scan_record.db";
|
|
27
27
|
const files = ["file1.txt", "file2.txt"];
|
|
28
28
|
|
|
29
|
-
// Scan file list
|
|
29
|
+
// Scan file list, sync metadata to SQLite, return changed relative paths and upsert function
|
|
30
30
|
const [updated_paths, upsert] = await scan(dir, db_path, files);
|
|
31
31
|
|
|
32
|
-
//
|
|
32
|
+
// Close database automatically when exiting scope
|
|
33
33
|
using _ = upsert;
|
|
34
34
|
|
|
35
35
|
console.log("Updated files:", updated_paths);
|
|
@@ -54,16 +54,16 @@ save(db, [["file.txt", new Uint8Array([1, 2, 3]), 123, 1620000000]], [new Uint8A
|
|
|
54
54
|
db.close();
|
|
55
55
|
```
|
|
56
56
|
|
|
57
|
-
## 3. Design
|
|
57
|
+
## 3. Design
|
|
58
58
|
|
|
59
|
-
|
|
59
|
+
Main entry orchestrates modules to scan directories and synchronize metadata.
|
|
60
60
|
|
|
61
|
-

|
|
62
62
|
|
|
63
|
-
1. **Initialize Connection**:
|
|
64
|
-
2. **Load Records**: `load.js` checks
|
|
65
|
-
3. **Compare
|
|
66
|
-
4. **Delete and Return**: Deletes absent records in
|
|
63
|
+
1. **Initialize Connection**: Calls `@1-/sqlite` to open SQLite database.
|
|
64
|
+
2. **Load Records**: `load.js` checks `scanMtimeLen` table, creates it if missing. Reads stored hashes, sizes, and modification times to restore memory mappings.
|
|
65
|
+
3. **Compare Files**: Iterates over input file list. Maps paths to 16-byte binary keys via `@1-/hash`. Adds files with mismatched size or modification time to update list.
|
|
66
|
+
4. **Delete and Return**: Deletes absent or unscanned records in transaction. Returns changed paths list and `upsert` function for persistence.
|
|
67
67
|
|
|
68
68
|
## 4. Tech Stack
|
|
69
69
|
|
|
@@ -86,7 +86,7 @@ The entry point orchestrates independent modules to execute the incremental scan
|
|
|
86
86
|
|
|
87
87
|
## 6. History
|
|
88
88
|
|
|
89
|
-
SQLite was created by D. Richard Hipp in 2000 while designing board software for guided-missile destroyers. The system originally depended on
|
|
89
|
+
SQLite was created by D. Richard Hipp in 2000 while designing board software for guided-missile destroyers. The system originally depended on commercial database that required constant database administration; connection loss could stall the entire damage control application. Hipp designed serverless, zero-configuration embedded database that directly reads and writes local files, marking the birth of SQLite.
|
|
90
90
|
|
|
91
91
|
To conserve space and reduce latency, SQLite utilizes Varint (variable-length integer) encoding for metadata storage. Under this scheme, small integers consume only 1 byte, while larger numbers scale dynamically. This library inherits that design philosophy, compressing file metadata into varints for memory storage to ensure minimal footprint and high synchronization performance.
|
|
92
92
|
## About
|
|
@@ -98,17 +98,17 @@ This library is developed by [WebC.site](https://webc.site).
|
|
|
98
98
|
---
|
|
99
99
|
|
|
100
100
|
<a id="zh"></a>
|
|
101
|
-
# @1-/scan :
|
|
101
|
+
# @1-/scan : 基于 SQLite 的目录增量扫描工具
|
|
102
102
|
|
|
103
|
-
|
|
103
|
+
增量扫描目录文件,比对文件大小与修改时间检测变更,同步元数据至 SQLite 数据库,返回已变更相对路径列表。
|
|
104
104
|
|
|
105
105
|
## 1. 功能介绍
|
|
106
106
|
|
|
107
107
|
- **增量扫描**:比对大小与修改时间,过滤未变更文件,减少磁盘读写。
|
|
108
|
-
-
|
|
109
|
-
-
|
|
110
|
-
-
|
|
111
|
-
-
|
|
108
|
+
- **键长优化**:路径长度不大于 16 字节存储原始字节,超出 16 字节转换为 16 字节 MD5 值,优化索引空间与查询性能。
|
|
109
|
+
- **内存优化**:使用 BinMap 与 BinSet 存储二进制键,避免字符串解码,降低内存占用。
|
|
110
|
+
- **事务保障**:元数据变更与删除操作合并在数据库事务中执行,确保数据一致性。
|
|
111
|
+
- **零配置**:集成 @1-/sqlite,自动初始化数据库表结构,管理数据库连接。
|
|
112
112
|
|
|
113
113
|
## 2. 使用演示
|
|
114
114
|
|
|
@@ -121,15 +121,15 @@ const dir = "./data";
|
|
|
121
121
|
const db_path = "./scan_record.db";
|
|
122
122
|
const files = ["file1.txt", "file2.txt"];
|
|
123
123
|
|
|
124
|
-
// 扫描文件列表并同步至 SQLite
|
|
124
|
+
// 扫描文件列表并同步至 SQLite,返回已变更的相对路径列表与更新函数
|
|
125
125
|
const [updated_paths, upsert] = await scan(dir, db_path, files);
|
|
126
126
|
|
|
127
|
-
//
|
|
127
|
+
// 退出作用域自动关闭数据库
|
|
128
128
|
using _ = upsert;
|
|
129
129
|
|
|
130
130
|
console.log("更新文件列表:", updated_paths);
|
|
131
131
|
|
|
132
|
-
//
|
|
132
|
+
// 更新已处理文件的元数据至数据库
|
|
133
133
|
for (const rel_path of updated_paths) {
|
|
134
134
|
await upsert(rel_path);
|
|
135
135
|
}
|
|
@@ -151,14 +151,14 @@ db.close();
|
|
|
151
151
|
|
|
152
152
|
## 3. 设计思路
|
|
153
153
|
|
|
154
|
-
|
|
154
|
+
主入口调度各模块,协作完成目录扫描与数据同步。
|
|
155
155
|
|
|
156
|
-

|
|
157
157
|
|
|
158
158
|
1. **初始化连接**:调用 `@1-/sqlite` 打开 SQLite 数据库。
|
|
159
|
-
2. **加载记录**:`load.js` 检查 `scanMtimeLen`
|
|
160
|
-
3.
|
|
161
|
-
4.
|
|
159
|
+
2. **加载记录**:`load.js` 检查 `scanMtimeLen` 表,表不存在则自动创建。读取已记录哈希、大小及修改时间,恢复内存映射。
|
|
160
|
+
3. **比对文件**:遍历输入文件列表,利用 `@1-/hash` 将路径映射为 16 字节二进制键。若大小或修改时间不一致,则加入变更列表。
|
|
161
|
+
4. **删除与返回**:在事务中批量删除已被物理移除或不再扫描记录。返回变更路径列表与 `upsert` 函数,供外部持久化。
|
|
162
162
|
|
|
163
163
|
## 4. 技术栈
|
|
164
164
|
|
|
@@ -168,7 +168,7 @@ db.close();
|
|
|
168
168
|
- **@3-/vb**:Varint 变长整型编码与解码器。
|
|
169
169
|
- **@3-/binmap / @3-/binset**:基于 Rust 与 WebAssembly 的高效二进制键容器。
|
|
170
170
|
|
|
171
|
-
## 5.
|
|
171
|
+
## 5. Code Structure
|
|
172
172
|
|
|
173
173
|
```text
|
|
174
174
|
.
|
package/_.js
CHANGED
|
@@ -9,51 +9,66 @@ import int from "@3-/int";
|
|
|
9
9
|
import strmd5 from "@1-/hash/strmd5.js";
|
|
10
10
|
import { BinSet } from "@3-/binset";
|
|
11
11
|
import u8eq from "@3-/u8/u8eq.js";
|
|
12
|
+
import { availableParallelism } from "node:os";
|
|
13
|
+
import pLimit from "@3-/plimit";
|
|
12
14
|
|
|
13
15
|
const stat = async (dir, rel_path) => {
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
}
|
|
16
|
+
const { size, mtimeMs: mtime_ms } = await fsStat(join(dir, rel_path));
|
|
17
|
+
return [size, int(mtime_ms), strmd5(rel_path)];
|
|
18
|
+
},
|
|
19
|
+
scanFiles = async (dir, files, existing, limit) => {
|
|
20
|
+
const scanned = new BinSet(),
|
|
21
|
+
update = [];
|
|
22
|
+
await Promise.all(
|
|
23
|
+
files.map((rel_path) =>
|
|
24
|
+
limit(async () => {
|
|
25
|
+
try {
|
|
26
|
+
const [size, mtime, hash] = await stat(dir, rel_path),
|
|
27
|
+
val = existing.get(hash);
|
|
28
|
+
|
|
29
|
+
scanned.add(hash);
|
|
30
|
+
|
|
31
|
+
if (!val || !u8eq(val, vbE([size, mtime]))) {
|
|
32
|
+
update.push(rel_path);
|
|
33
|
+
}
|
|
34
|
+
} catch {}
|
|
35
|
+
}),
|
|
36
|
+
),
|
|
37
|
+
);
|
|
38
|
+
return [scanned, update];
|
|
39
|
+
},
|
|
40
|
+
rmHashes = (db, rm) => {
|
|
41
|
+
if (rm.length > 0) {
|
|
42
|
+
tx(db, () => {
|
|
43
|
+
const del = db.prepare("DELETE FROM scanMtimeLen WHERE hash=?");
|
|
44
|
+
rm.forEach((hash) => del.run(hash));
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
newUpsert = (db, dir) => {
|
|
49
|
+
const insert = db.prepare("INSERT OR REPLACE INTO scanMtimeLen(hash,size,mtime)VALUES(?,?,?)"),
|
|
50
|
+
upsert = async (rel_path) => {
|
|
51
|
+
try {
|
|
52
|
+
const [size, mtime, hash] = await stat(dir, rel_path);
|
|
53
|
+
insert.run(hash, size, mtime);
|
|
54
|
+
} catch {}
|
|
55
|
+
};
|
|
56
|
+
upsert[Symbol.dispose] = () => db.close();
|
|
57
|
+
return upsert;
|
|
58
|
+
};
|
|
17
59
|
|
|
18
60
|
export default async (dir, db_path, files) => {
|
|
19
61
|
const db = sqlite(db_path),
|
|
20
62
|
existing = new BinMap(),
|
|
21
63
|
db_rows = load(db),
|
|
22
|
-
|
|
23
|
-
update = [];
|
|
64
|
+
limit = pLimit(availableParallelism());
|
|
24
65
|
|
|
25
66
|
db_rows.forEach(({ hash, size, mtime }) => existing.set(hash, vbE([size, mtime])));
|
|
26
67
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
if (!val || !u8eq(val, vbE([size, mtime]))) {
|
|
35
|
-
update.push(rel_path);
|
|
36
|
-
}
|
|
37
|
-
} catch {}
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
const rm = db_rows.filter(({ hash }) => !scanned.has(hash)).map(({ hash }) => hash),
|
|
41
|
-
insert = db.prepare("INSERT OR REPLACE INTO scanMtimeLen(hash,size,mtime)VALUES(?,?,?)"),
|
|
42
|
-
upsert = async (rel_path) => {
|
|
43
|
-
try {
|
|
44
|
-
const [size, mtime, hash] = await stat(dir, rel_path);
|
|
45
|
-
insert.run(hash, size, mtime);
|
|
46
|
-
} catch {}
|
|
47
|
-
};
|
|
48
|
-
|
|
49
|
-
if (rm.length > 0) {
|
|
50
|
-
tx(db, () => {
|
|
51
|
-
const del = db.prepare("DELETE FROM scanMtimeLen WHERE hash=?");
|
|
52
|
-
rm.forEach((hash) => del.run(hash));
|
|
53
|
-
});
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
upsert[Symbol.dispose] = () => db.close();
|
|
57
|
-
|
|
58
|
-
return [update, upsert];
|
|
68
|
+
const [scanned, update] = await scanFiles(dir, files, existing, limit),
|
|
69
|
+
rm = db_rows.filter(({ hash }) => !scanned.has(hash)).map(({ hash }) => hash);
|
|
70
|
+
|
|
71
|
+
rmHashes(db, rm);
|
|
72
|
+
|
|
73
|
+
return [update, newUpsert(db, dir)];
|
|
59
74
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@1-/scan",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.8",
|
|
4
4
|
"description": "Incrementally scan directory files and track metadata in SQLite / 增量扫描目录文件并使用 SQLite 记录元数据",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"directory",
|
|
@@ -23,10 +23,11 @@
|
|
|
23
23
|
},
|
|
24
24
|
"peerDependencies": {
|
|
25
25
|
"@1-/hash": "^0.1.0",
|
|
26
|
-
"@1-/sqlite": "^0.1.
|
|
26
|
+
"@1-/sqlite": "^0.1.1",
|
|
27
27
|
"@3-/binmap": "^0.1.20",
|
|
28
28
|
"@3-/binset": "^0.1.6",
|
|
29
29
|
"@3-/int": "^0.1.1",
|
|
30
|
+
"@3-/plimit": "^0.1.3",
|
|
30
31
|
"@3-/u8": "^0.1.2",
|
|
31
32
|
"@3-/utf8": "^0.1.1",
|
|
32
33
|
"@3-/vb": "^0.1.6"
|