@1-/scan 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -36
- package/_.js +16 -44
- package/package.json +5 -3
- package/rm.js +10 -0
- package/scan.js +26 -0
- package/stat.js +9 -0
- package/upsert.js +13 -0
package/README.md
CHANGED
|
@@ -3,17 +3,17 @@
|
|
|
3
3
|
---
|
|
4
4
|
|
|
5
5
|
<a id="en"></a>
|
|
6
|
-
# @1-/scan :
|
|
6
|
+
# @1-/scan : SQLite-backed incremental directory file scanner
|
|
7
7
|
|
|
8
|
-
Incrementally scans directory files, compares file sizes and modification times to detect changes, synchronizes metadata to SQLite database, and returns
|
|
8
|
+
Incrementally scans directory files, compares file sizes and modification times to detect changes, synchronizes metadata to SQLite database, and returns list of changed relative paths.
|
|
9
9
|
|
|
10
10
|
## 1. Features
|
|
11
11
|
|
|
12
|
-
- **Incremental
|
|
13
|
-
- **Key Optimization**: Stores
|
|
14
|
-
- **Memory
|
|
15
|
-
- **Transactional Integrity**: Performs updates and deletions
|
|
16
|
-
- **Configuration
|
|
12
|
+
- **Incremental Scan**: Compares size and modification time, filtering unchanged files to reduce disk I/O.
|
|
13
|
+
- **Key Length Optimization**: Stores raw bytes for paths up to 16 bytes. Converts longer paths into 16-byte MD5 hashes to optimize database index space and query performance.
|
|
14
|
+
- **Memory Optimization**: Uses BinMap and BinSet to store binary keys in memory, avoiding string decoding overhead and reducing memory footprint.
|
|
15
|
+
- **Transactional Integrity**: Performs metadata updates and deletions in database transactions to ensure consistency.
|
|
16
|
+
- **Auto Configuration**: Integrates @1-/sqlite to initialize database schema and manage database connections automatically, updating .gitignore when new database is detected.
|
|
17
17
|
|
|
18
18
|
## 2. Usage
|
|
19
19
|
|
|
@@ -26,10 +26,10 @@ const dir = "./data";
|
|
|
26
26
|
const db_path = "./scan_record.db";
|
|
27
27
|
const files = ["file1.txt", "file2.txt"];
|
|
28
28
|
|
|
29
|
-
// Scan file list
|
|
29
|
+
// Scan file list, sync metadata to SQLite, return changed relative paths and upsert function
|
|
30
30
|
const [updated_paths, upsert] = await scan(dir, db_path, files);
|
|
31
31
|
|
|
32
|
-
//
|
|
32
|
+
// Close database automatically when exiting scope
|
|
33
33
|
using _ = upsert;
|
|
34
34
|
|
|
35
35
|
console.log("Updated files:", updated_paths);
|
|
@@ -54,16 +54,16 @@ save(db, [["file.txt", new Uint8Array([1, 2, 3]), 123, 1620000000]], [new Uint8A
|
|
|
54
54
|
db.close();
|
|
55
55
|
```
|
|
56
56
|
|
|
57
|
-
## 3. Design
|
|
57
|
+
## 3. Design
|
|
58
58
|
|
|
59
|
-
|
|
59
|
+
Main entry orchestrates modules to scan directories and synchronize metadata.
|
|
60
60
|
|
|
61
|
-

|
|
62
62
|
|
|
63
|
-
1. **Initialize Connection**:
|
|
64
|
-
2. **Load Records**: `load.js` checks
|
|
65
|
-
3. **Compare
|
|
66
|
-
4. **Delete and Return**:
|
|
63
|
+
1. **Initialize Connection**: Calls `@1-/sqlite` to open SQLite database. Updates `.gitignore` in the database directory if the database is newly created to prevent tracking.
|
|
64
|
+
2. **Load Records**: `load.js` checks and creates `scanMtimeLen` table. Reads stored hashes, sizes, and modification times to restore memory mappings inside `BinMap`.
|
|
65
|
+
3. **Compare Files**: `scan.js` iterates over input file list, calling `stat.js` for metadata and utilizing `@1-/hash` to map paths to 16-byte binary keys. Adds files with mismatched size or modification time to change list.
|
|
66
|
+
4. **Delete and Return**: `rm.js` deletes absent or unscanned records in transaction. Returns changed paths list and `upsert` function (provided by `upsert.js`) for persistence, supporting automatic resource disposal.
|
|
67
67
|
|
|
68
68
|
## 4. Tech Stack
|
|
69
69
|
|
|
@@ -78,15 +78,19 @@ The entry point orchestrates independent modules to execute the incremental scan
|
|
|
78
78
|
```text
|
|
79
79
|
.
|
|
80
80
|
├── src
|
|
81
|
-
│ ├── _.js # Core flow
|
|
82
|
-
│ ├── load.js # Table
|
|
83
|
-
│
|
|
81
|
+
│ ├── _.js # Core controller flow
|
|
82
|
+
│ ├── load.js # Table initialization and loading
|
|
83
|
+
│ ├── rm.js # Batch deletion of metadata
|
|
84
|
+
│ ├── save.js # Batch storage and updates
|
|
85
|
+
│ ├── scan.js # Scans and compares files
|
|
86
|
+
│ ├── stat.js # Retrieves file metadata and path hash
|
|
87
|
+
│ └── upsert.js # Single-record updates and auto-dispose
|
|
84
88
|
└── tests # Unit tests
|
|
85
89
|
```
|
|
86
90
|
|
|
87
91
|
## 6. History
|
|
88
92
|
|
|
89
|
-
SQLite was created by D. Richard Hipp in 2000 while designing board software for guided-missile destroyers. The system originally depended on
|
|
93
|
+
SQLite was created by D. Richard Hipp in 2000 while designing board software for guided-missile destroyers. The system originally depended on commercial database that required constant database administration; connection loss could stall the entire damage control application. Hipp designed serverless, zero-configuration embedded database that directly reads and writes local files, marking the birth of SQLite.
|
|
90
94
|
|
|
91
95
|
To conserve space and reduce latency, SQLite utilizes Varint (variable-length integer) encoding for metadata storage. Under this scheme, small integers consume only 1 byte, while larger numbers scale dynamically. This library inherits that design philosophy, compressing file metadata into varints for memory storage to ensure minimal footprint and high synchronization performance.
|
|
92
96
|
## About
|
|
@@ -98,17 +102,17 @@ This library is developed by [WebC.site](https://webc.site).
|
|
|
98
102
|
---
|
|
99
103
|
|
|
100
104
|
<a id="zh"></a>
|
|
101
|
-
# @1-/scan :
|
|
105
|
+
# @1-/scan : 基于 SQLite 的目录文件增量扫描器
|
|
102
106
|
|
|
103
|
-
|
|
107
|
+
增量扫描目录文件,比对文件大小与修改时间检测变更,同步元数据至 SQLite 数据库,返回已变更相对路径列表。
|
|
104
108
|
|
|
105
109
|
## 1. 功能介绍
|
|
106
110
|
|
|
107
111
|
- **增量扫描**:比对大小与修改时间,过滤未变更文件,减少磁盘读写。
|
|
108
|
-
-
|
|
109
|
-
-
|
|
110
|
-
-
|
|
111
|
-
-
|
|
112
|
+
- **键长优化**:路径长度不大于 16 字节时存储原始字节,超出 16 字节转换为 16 字节 MD5 值,优化索引空间与查询性能。
|
|
113
|
+
- **内存优化**:使用 BinMap 与 BinSet 存储二进制键,避免字符串解码,降低内存占用。
|
|
114
|
+
- **事务保障**:元数据变更与删除操作合并在数据库事务中执行,确保数据一致性。
|
|
115
|
+
- **自动配置**:集成 @1-/sqlite,自动初始化数据库表结构,并在检测到新数据库时自动更新 .gitignore。
|
|
112
116
|
|
|
113
117
|
## 2. 使用演示
|
|
114
118
|
|
|
@@ -121,15 +125,15 @@ const dir = "./data";
|
|
|
121
125
|
const db_path = "./scan_record.db";
|
|
122
126
|
const files = ["file1.txt", "file2.txt"];
|
|
123
127
|
|
|
124
|
-
// 扫描文件列表并同步至 SQLite
|
|
128
|
+
// 扫描文件列表并同步至 SQLite,返回已变更的相对路径列表与更新函数
|
|
125
129
|
const [updated_paths, upsert] = await scan(dir, db_path, files);
|
|
126
130
|
|
|
127
|
-
//
|
|
131
|
+
// 退出作用域自动关闭数据库
|
|
128
132
|
using _ = upsert;
|
|
129
133
|
|
|
130
134
|
console.log("更新文件列表:", updated_paths);
|
|
131
135
|
|
|
132
|
-
//
|
|
136
|
+
// 更新已处理文件的元数据至数据库
|
|
133
137
|
for (const rel_path of updated_paths) {
|
|
134
138
|
await upsert(rel_path);
|
|
135
139
|
}
|
|
@@ -151,14 +155,14 @@ db.close();
|
|
|
151
155
|
|
|
152
156
|
## 3. 设计思路
|
|
153
157
|
|
|
154
|
-
|
|
158
|
+
主入口调度各模块,协作完成目录扫描与数据同步。
|
|
155
159
|
|
|
156
|
-

|
|
157
161
|
|
|
158
|
-
1. **初始化连接**:调用 `@1-/sqlite` 打开 SQLite
|
|
159
|
-
2. **加载记录**:`load.js`
|
|
160
|
-
3.
|
|
161
|
-
4.
|
|
162
|
+
1. **初始化连接**:调用 `@1-/sqlite` 打开 SQLite 数据库。若数据库为新创建,自动更新所在目录的 `.gitignore` 阻断提交。
|
|
163
|
+
2. **加载记录**:`load.js` 检查并创建 `scanMtimeLen` 表。读取已记录的哈希、大小及修改时间,恢复至内存映射 `BinMap` 中。
|
|
164
|
+
3. **比对文件**:`scan.js` 遍历输入文件列表,调用 `stat.js` 获取元数据,并利用 `@1-/hash` 将路径映射为 16 字节二进制键。比对大小或修改时间,差异项归入变更列表。
|
|
165
|
+
4. **删除与返回**:`rm.js` 在事务中批量删除物理移除或不再扫描的记录。返回变更路径列表与 `upsert` 函数(`upsert.js` 提供),用以更新数据库,支持自动释放。
|
|
162
166
|
|
|
163
167
|
## 4. 技术栈
|
|
164
168
|
|
|
@@ -175,7 +179,11 @@ db.close();
|
|
|
175
179
|
├── src
|
|
176
180
|
│ ├── _.js # 核心控制流程
|
|
177
181
|
│ ├── load.js # 元数据表初始化与加载
|
|
178
|
-
│
|
|
182
|
+
│ ├── rm.js # 批量删除元数据
|
|
183
|
+
│ ├── save.js # 批量存储元数据
|
|
184
|
+
│ ├── scan.js # 扫描与比对文件
|
|
185
|
+
│ ├── stat.js # 获取文件元数据及路径哈希
|
|
186
|
+
│ └── upsert.js # 逐个更新与自动关闭
|
|
179
187
|
└── tests # 单元测试
|
|
180
188
|
```
|
|
181
189
|
|
package/_.js
CHANGED
|
@@ -1,59 +1,31 @@
|
|
|
1
1
|
import sqlite from "@1-/sqlite";
|
|
2
|
-
import tx from "@1-/sqlite/tx.js";
|
|
3
2
|
import { BinMap } from "@3-/binmap";
|
|
4
3
|
import vbE from "@3-/vb/vbE.js";
|
|
4
|
+
import { availableParallelism } from "node:os";
|
|
5
|
+
import pLimit from "@3-/plimit";
|
|
6
|
+
import upsertGitignore from "@1-/upsert_gitignore";
|
|
7
|
+
import { existsSync } from "node:fs";
|
|
8
|
+
import { join, dirname, basename } from "node:path";
|
|
5
9
|
import load from "./load.js";
|
|
6
|
-
import
|
|
7
|
-
import
|
|
8
|
-
import
|
|
9
|
-
import strmd5 from "@1-/hash/strmd5.js";
|
|
10
|
-
import { BinSet } from "@3-/binset";
|
|
11
|
-
import u8eq from "@3-/u8/u8eq.js";
|
|
12
|
-
|
|
13
|
-
const stat = async (dir, rel_path) => {
|
|
14
|
-
const { size, mtimeMs: mtime_ms } = await fsStat(join(dir, rel_path));
|
|
15
|
-
return [size, int(mtime_ms), strmd5(rel_path)];
|
|
16
|
-
};
|
|
10
|
+
import scan from "./scan.js";
|
|
11
|
+
import rm from "./rm.js";
|
|
12
|
+
import upsert from "./upsert.js";
|
|
17
13
|
|
|
18
14
|
export default async (dir, db_path, files) => {
|
|
15
|
+
if (!existsSync(db_path)) {
|
|
16
|
+
upsertGitignore(join(dirname(db_path), ".gitignore"), basename(db_path));
|
|
17
|
+
}
|
|
19
18
|
const db = sqlite(db_path),
|
|
20
19
|
existing = new BinMap(),
|
|
21
20
|
db_rows = load(db),
|
|
22
|
-
|
|
23
|
-
update = [];
|
|
21
|
+
limit = pLimit(availableParallelism());
|
|
24
22
|
|
|
25
23
|
db_rows.forEach(({ hash, size, mtime }) => existing.set(hash, vbE([size, mtime])));
|
|
26
24
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
const [size, mtime, hash] = await stat(dir, rel_path),
|
|
30
|
-
val = existing.get(hash);
|
|
31
|
-
|
|
32
|
-
scanned.add(hash);
|
|
33
|
-
|
|
34
|
-
if (!val || !u8eq(val, vbE([size, mtime]))) {
|
|
35
|
-
update.push(rel_path);
|
|
36
|
-
}
|
|
37
|
-
} catch {}
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
const rm = db_rows.filter(({ hash }) => !scanned.has(hash)).map(({ hash }) => hash),
|
|
41
|
-
insert = db.prepare("INSERT OR REPLACE INTO scanMtimeLen(hash,size,mtime)VALUES(?,?,?)"),
|
|
42
|
-
upsert = async (rel_path) => {
|
|
43
|
-
try {
|
|
44
|
-
const [size, mtime, hash] = await stat(dir, rel_path);
|
|
45
|
-
insert.run(hash, size, mtime);
|
|
46
|
-
} catch {}
|
|
47
|
-
};
|
|
48
|
-
|
|
49
|
-
if (rm.length > 0) {
|
|
50
|
-
tx(db, () => {
|
|
51
|
-
const del = db.prepare("DELETE FROM scanMtimeLen WHERE hash=?");
|
|
52
|
-
rm.forEach((hash) => del.run(hash));
|
|
53
|
-
});
|
|
54
|
-
}
|
|
25
|
+
const [scanned, update] = await scan(dir, files, existing, limit),
|
|
26
|
+
rm_hashes = db_rows.filter(({ hash }) => !scanned.has(hash)).map(({ hash }) => hash);
|
|
55
27
|
|
|
56
|
-
|
|
28
|
+
rm(db, rm_hashes);
|
|
57
29
|
|
|
58
|
-
return [update, upsert];
|
|
30
|
+
return [update, upsert(db, dir)];
|
|
59
31
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@1-/scan",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.9",
|
|
4
4
|
"description": "Incrementally scan directory files and track metadata in SQLite / 增量扫描目录文件并使用 SQLite 记录元数据",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"directory",
|
|
@@ -23,12 +23,14 @@
|
|
|
23
23
|
},
|
|
24
24
|
"peerDependencies": {
|
|
25
25
|
"@1-/hash": "^0.1.0",
|
|
26
|
-
"@1-/sqlite": "^0.1.
|
|
26
|
+
"@1-/sqlite": "^0.1.1",
|
|
27
27
|
"@3-/binmap": "^0.1.20",
|
|
28
28
|
"@3-/binset": "^0.1.6",
|
|
29
29
|
"@3-/int": "^0.1.1",
|
|
30
|
+
"@3-/plimit": "^0.1.3",
|
|
30
31
|
"@3-/u8": "^0.1.2",
|
|
31
32
|
"@3-/utf8": "^0.1.1",
|
|
32
|
-
"@3-/vb": "^0.1.6"
|
|
33
|
+
"@3-/vb": "^0.1.6",
|
|
34
|
+
"@1-/upsert_gitignore": "^0.1.3"
|
|
33
35
|
}
|
|
34
36
|
}
|
package/rm.js
ADDED
package/scan.js
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { BinSet } from "@3-/binset";
|
|
2
|
+
import u8eq from "@3-/u8/u8eq.js";
|
|
3
|
+
import vbE from "@3-/vb/vbE.js";
|
|
4
|
+
import stat from "./stat.js";
|
|
5
|
+
|
|
6
|
+
export default async (dir, files, existing, limit) => {
|
|
7
|
+
const scanned = new BinSet(),
|
|
8
|
+
update = [];
|
|
9
|
+
await Promise.all(
|
|
10
|
+
files.map((rel_path) =>
|
|
11
|
+
limit(async () => {
|
|
12
|
+
try {
|
|
13
|
+
const [size, mtime, hash] = await stat(dir, rel_path),
|
|
14
|
+
val = existing.get(hash);
|
|
15
|
+
|
|
16
|
+
scanned.add(hash);
|
|
17
|
+
|
|
18
|
+
if (!val || !u8eq(val, vbE([size, mtime]))) {
|
|
19
|
+
update.push(rel_path);
|
|
20
|
+
}
|
|
21
|
+
} catch {}
|
|
22
|
+
}),
|
|
23
|
+
),
|
|
24
|
+
);
|
|
25
|
+
return [scanned, update];
|
|
26
|
+
};
|
package/stat.js
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { stat as fsStat } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import int from "@3-/int";
|
|
4
|
+
import strmd5 from "@1-/hash/strmd5.js";
|
|
5
|
+
|
|
6
|
+
export default async (dir, rel_path) => {
|
|
7
|
+
const { size, mtimeMs: mtime_ms } = await fsStat(join(dir, rel_path));
|
|
8
|
+
return [size, int(mtime_ms), strmd5(rel_path)];
|
|
9
|
+
};
|
package/upsert.js
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import stat from "./stat.js";
|
|
2
|
+
|
|
3
|
+
export default (db, dir) => {
|
|
4
|
+
const insert = db.prepare("INSERT OR REPLACE INTO scanMtimeLen(hash,size,mtime)VALUES(?,?,?)"),
|
|
5
|
+
upsert = async (rel_path) => {
|
|
6
|
+
try {
|
|
7
|
+
const [size, mtime, hash] = await stat(dir, rel_path);
|
|
8
|
+
insert.run(hash, size, mtime);
|
|
9
|
+
} catch {}
|
|
10
|
+
};
|
|
11
|
+
upsert[Symbol.dispose] = () => db.close();
|
|
12
|
+
return upsert;
|
|
13
|
+
};
|