@1-/scan 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +117 -67
- package/_.js +2 -2
- package/dirWalk.js +4 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -5,73 +5,98 @@
|
|
|
5
5
|
<a id="en"></a>
|
|
6
6
|
# @1-/scan : Incrementally scan directory files and track metadata in SQLite
|
|
7
7
|
|
|
8
|
-
Incrementally scans directory files,
|
|
8
|
+
Incrementally scans directory files, compares file sizes and modification times to detect changes, synchronizes metadata to an SQLite database, and returns updated relative paths.
|
|
9
9
|
|
|
10
10
|
## Features
|
|
11
11
|
|
|
12
|
-
- Incremental Scanning
|
|
13
|
-
-
|
|
14
|
-
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
17
|
-
- Native
|
|
12
|
+
- **Incremental Scanning**: Detects and processes only new, modified, or deleted files, avoiding redundant file system operations.
|
|
13
|
+
- **Key Optimization**: Stores relative paths within 16 bytes directly as raw bytes; hashes longer paths to 16-byte MD5 digests to optimize database index space and query performance.
|
|
14
|
+
- **Metadata Compression**: Compresses file sizes and modification times using Varint (variable-length byte) encoding.
|
|
15
|
+
- **Transactional Integrity**: Packages updates and deletions in a single database transaction to guarantee consistency.
|
|
16
|
+
- **Flexible Filtering**: Supports custom ignore callback functions to filter specific files and directories.
|
|
17
|
+
- **Native Database**: Integrates Bun's native `bun:sqlite` module, eliminating external database driver dependencies.
|
|
18
18
|
|
|
19
19
|
## Usage
|
|
20
20
|
|
|
21
|
+
### Basic Incremental Scan
|
|
22
|
+
|
|
21
23
|
```javascript
|
|
22
24
|
import scan from "@1-/scan";
|
|
23
25
|
|
|
24
|
-
const dir = "./
|
|
25
|
-
const dbPath = "./
|
|
26
|
+
const dir = "./data";
|
|
27
|
+
const dbPath = "./scan_record.db";
|
|
26
28
|
|
|
27
|
-
// Scan directory and sync
|
|
29
|
+
// Scan directory and sync metadata to SQLite, returning modified relative paths
|
|
28
30
|
const updatedPaths = await scan(dir, dbPath);
|
|
29
|
-
console.log(updatedPaths);
|
|
31
|
+
console.log("Updated files:", updatedPaths);
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Scan with Ignore Filter
|
|
35
|
+
|
|
36
|
+
```javascript
|
|
37
|
+
import { FILE } from "@1-/walk";
|
|
38
|
+
import scan from "@1-/scan";
|
|
39
|
+
|
|
40
|
+
const dir = "./data";
|
|
41
|
+
const dbPath = "./scan_record.db";
|
|
42
|
+
|
|
43
|
+
// Ignore temporary files and specific configurations
|
|
44
|
+
const ignore = (kind, relPath) => {
|
|
45
|
+
return relPath.startsWith("temp/") || relPath === "config.json";
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
const updatedPaths = await scan(dir, dbPath, ignore);
|
|
49
|
+
console.log("Synced. Updated files:", updatedPaths);
|
|
30
50
|
```
|
|
31
51
|
|
|
32
52
|
## Design Ideas
|
|
33
53
|
|
|
34
|
-
|
|
54
|
+
The main entry orchestrates independent modules to execute the incremental scanning and synchronization flow.
|
|
35
55
|
|
|
36
56
|
```mermaid
|
|
37
57
|
graph TD
|
|
38
|
-
Entry["_.js (
|
|
39
|
-
Entry -->|Load
|
|
40
|
-
Entry -->|Walk
|
|
41
|
-
DirWalk -->|
|
|
42
|
-
DirWalk -->|
|
|
43
|
-
Entry -->|
|
|
44
|
-
Save -->|
|
|
58
|
+
Entry["_.js (Entry Point)"] -->|1. Initialize Connection| Sqlite[sqlite.js]
|
|
59
|
+
Entry -->|2. Load Existing Records| Load[load.js]
|
|
60
|
+
Entry -->|3. Walk & Compare Files| DirWalk[dirWalk.js]
|
|
61
|
+
DirWalk -->|Invoke| Walk["@1-/walk/walkRelIgnore"]
|
|
62
|
+
DirWalk -->|Process Path Keys| Hash[hash.js]
|
|
63
|
+
Entry -->|4. Persist Changes| Save[save.js]
|
|
64
|
+
Save -->|Transaction Wrapper| Trans[trans.js]
|
|
45
65
|
```
|
|
46
66
|
|
|
67
|
+
1. **Initialize Connection (`sqlite.js`)**: Opens the SQLite database connection and configures automatic connection disposal.
|
|
68
|
+
2. **Load Records (`load.js`)**: Automatically creates the schema if missing, retrieves existing file hashes, sizes, and modification times, and reconstructs the reference set in memory.
|
|
69
|
+
3. **Walk & Compare (`dirWalk.js`)**: Traverses the directory structure recursively. Paths are transformed into 16-byte keys via `hash.js`. File attributes are encoded using `@3-/vb` and compared against database records to identify additions and modifications.
|
|
70
|
+
4. **Persist Changes (`save.js`)**: Executes bulk inserts and deletions in a single transaction via `trans.js` to update database state.
|
|
71
|
+
|
|
47
72
|
## Tech Stack
|
|
48
73
|
|
|
49
|
-
- Bun
|
|
50
|
-
- Bun SQLite
|
|
51
|
-
-
|
|
52
|
-
-
|
|
53
|
-
-
|
|
74
|
+
- **Bun**: Runtime environment and test framework.
|
|
75
|
+
- **Bun SQLite**: Native high-performance SQLite engine built into Bun.
|
|
76
|
+
- **@1-/walk**: Directory walker with ignore support.
|
|
77
|
+
- **@3-/vb**: Variable-length byte (Varint) encoder and decoder.
|
|
78
|
+
- **@3-/binmap / @3-/binset**: Memory-efficient collections designed for binary keys.
|
|
54
79
|
|
|
55
80
|
## Directory Structure
|
|
56
81
|
|
|
57
82
|
```
|
|
58
83
|
.
|
|
59
84
|
├── src
|
|
60
|
-
│ ├── _.js # Entry point
|
|
61
|
-
│ ├── dirWalk.js #
|
|
62
|
-
│ ├──
|
|
63
|
-
│ ├──
|
|
64
|
-
│ ├──
|
|
65
|
-
│ ├── sqlite.js #
|
|
66
|
-
│ └── trans.js #
|
|
85
|
+
│ ├── _.js # Entry point coordinating scanning and synchronization
|
|
86
|
+
│ ├── dirWalk.js # Directory traverser comparing file metadata
|
|
87
|
+
│ ├── hash.js # Hashing helper mapping paths to 16-byte keys
|
|
88
|
+
│ ├── load.js # Database loader initializing schema and loading records
|
|
89
|
+
│ ├── save.js # Writer executing bulk updates and deletions
|
|
90
|
+
│ ├── sqlite.js # Connection manager instantiating SQLite database
|
|
91
|
+
│ └── trans.js # Transaction wrapper providing rollback mechanism
|
|
67
92
|
└── tests # Test suites
|
|
68
93
|
```
|
|
69
94
|
|
|
70
95
|
## History
|
|
71
96
|
|
|
72
|
-
SQLite was
|
|
97
|
+
SQLite was created by D. Richard Hipp in 2000 while designing board software for US Navy guided-missile destroyers. The system originally depended on a commercial database that required constant database administration; a connection loss could stall the entire damage control application. To resolve this vulnerability, Hipp designed a serverless, zero-configuration embedded database that directly reads and writes local files—marking the birth of SQLite.
|
|
73
98
|
|
|
74
|
-
To
|
|
99
|
+
To conserve disk space and reduce I/O overhead, SQLite utilizes Varint (variable-length integer) encoding for metadata storage. Under this scheme, small integers consume only 1 byte, while larger numbers scale dynamically. This library inherits that design philosophy, compressing file metadata into varints before storing it, ensuring minimal footprint and high sync performance.
|
|
75
100
|
../doc/en/about.md
|
|
76
101
|
|
|
77
102
|
---
|
|
@@ -79,71 +104,96 @@ To optimize space inside the database file, SQLite internally uses variable-leng
|
|
|
79
104
|
<a id="zh"></a>
|
|
80
105
|
# @1-/scan : 增量扫描目录文件并使用 SQLite 记录元数据
|
|
81
106
|
|
|
82
|
-
|
|
107
|
+
增量扫描目录文件,通过比对文件大小和修改时间检测变更,并同步至 SQLite 数据库中,最终返回有更新的相对路径列表。
|
|
83
108
|
|
|
84
109
|
## 功能介绍
|
|
85
110
|
|
|
86
|
-
-
|
|
87
|
-
-
|
|
88
|
-
-
|
|
89
|
-
-
|
|
90
|
-
-
|
|
91
|
-
-
|
|
111
|
+
- **增量扫描**:仅处理新增、修改或删除的文件,避免冗余的文件系统读写,提升同步速度。
|
|
112
|
+
- **路径压缩**:当相对路径长度小于等于 16 字节时保留原始字节;超出 16 字节则转换为 16 字节 MD5 值作为数据库主键,优化索引空间与查询性能。
|
|
113
|
+
- **元数据压缩**:使用 Varint(可变字节整型)编码方式压缩存储文件大小和修改时间。
|
|
114
|
+
- **事务安全**:将更新与删除操作合并在单个数据库事务中执行,确保数据一致性。
|
|
115
|
+
- **灵活过滤**:支持通过自定义回调函数过滤指定类型的文件与目录。
|
|
116
|
+
- **原生依赖**:基于 Bun 内置 `bun:sqlite` 模块,无需额外安装或编译数据库驱动。
|
|
92
117
|
|
|
93
118
|
## 使用演示
|
|
94
119
|
|
|
120
|
+
### 基础增量扫描
|
|
121
|
+
|
|
95
122
|
```javascript
|
|
96
123
|
import scan from "@1-/scan";
|
|
97
124
|
|
|
98
|
-
const dir = "./
|
|
99
|
-
const dbPath = "./
|
|
125
|
+
const dir = "./data";
|
|
126
|
+
const dbPath = "./scan_record.db";
|
|
100
127
|
|
|
101
|
-
// 扫描目录并同步至 SQLite
|
|
128
|
+
// 扫描目录并同步至 SQLite,返回发生变更的相对路径列表
|
|
102
129
|
const updatedPaths = await scan(dir, dbPath);
|
|
103
|
-
console.log(updatedPaths);
|
|
130
|
+
console.log("更新文件列表:", updatedPaths);
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### 带有忽略规则的扫描
|
|
134
|
+
|
|
135
|
+
```javascript
|
|
136
|
+
import { FILE } from "@1-/walk";
|
|
137
|
+
import scan from "@1-/scan";
|
|
138
|
+
|
|
139
|
+
const dir = "./data";
|
|
140
|
+
const dbPath = "./scan_record.db";
|
|
141
|
+
|
|
142
|
+
// 忽略特定文件或目录
|
|
143
|
+
const ignore = (kind, relPath) => {
|
|
144
|
+
return relPath.startsWith("temp/") || relPath === "config.json";
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
const updatedPaths = await scan(dir, dbPath, ignore);
|
|
148
|
+
console.log("已同步,更新列表:", updatedPaths);
|
|
104
149
|
```
|
|
105
150
|
|
|
106
151
|
## 设计思路
|
|
107
152
|
|
|
108
|
-
|
|
153
|
+
系统主入口调用各个独立模块完成增量扫描与数据同步流程。
|
|
109
154
|
|
|
110
155
|
```mermaid
|
|
111
156
|
graph TD
|
|
112
|
-
Entry["_.js (主入口)"]
|
|
113
|
-
Entry
|
|
114
|
-
Entry
|
|
115
|
-
DirWalk
|
|
116
|
-
DirWalk
|
|
117
|
-
Entry
|
|
118
|
-
Save
|
|
157
|
+
Entry["_.js (主入口)"] -->|1. 初始化连接| Sqlite[sqlite.js]
|
|
158
|
+
Entry -->|2. 加载已有记录| Load[load.js]
|
|
159
|
+
Entry -->|3. 扫描文件系统并对比| DirWalk[dirWalk.js]
|
|
160
|
+
DirWalk -->|调用| Walk["@1-/walk/walkRelIgnore"]
|
|
161
|
+
DirWalk -->|处理路径键| Hash[hash.js]
|
|
162
|
+
Entry -->|4. 持久化数据变更| Save[save.js]
|
|
163
|
+
Save -->|事务保障| Trans[trans.js]
|
|
119
164
|
```
|
|
120
165
|
|
|
166
|
+
1. **初始化连接 (`sqlite.js`)**:打开 SQLite 数据库,并配置自动释放连接机制。
|
|
167
|
+
2. **加载记录 (`load.js`)**:若表不存在则自动创建,读取已记录的文件哈希、大小及修改时间,在内存中还原比对集合。
|
|
168
|
+
3. **文件系统扫描 (`dirWalk.js`)**:递归遍历目录,利用 `hash.js` 将路径映射为 16 字节键。对比当前文件与数据库元数据(利用 `@3-/vb` 进行压缩状态对比),筛选出新增和修改的文件。
|
|
169
|
+
4. **数据存储 (`save.js`)**:使用 `trans.js` 开启事务,将需要删除的无效记录及需要更新的元数据批量写入 SQLite 数据库。
|
|
170
|
+
|
|
121
171
|
## 技术栈
|
|
122
172
|
|
|
123
|
-
- Bun
|
|
124
|
-
- Bun SQLite
|
|
125
|
-
-
|
|
126
|
-
-
|
|
127
|
-
-
|
|
173
|
+
- **Bun**:JavaScript 运行时及测试框架。
|
|
174
|
+
- **Bun SQLite**:内置的轻量级、高性能 SQLite 实现。
|
|
175
|
+
- **@1-/walk**:支持过滤规则的目录递归遍历工具。
|
|
176
|
+
- **@3-/vb**:Varint(可变字节)编码与解码器。
|
|
177
|
+
- **@3-/binmap / @3-/binset**:针对二进制键优化的 Map 和 Set 容器。
|
|
128
178
|
|
|
129
179
|
## 目录结构
|
|
130
180
|
|
|
131
181
|
```
|
|
132
182
|
.
|
|
133
183
|
├── src
|
|
134
|
-
│ ├── _.js #
|
|
135
|
-
│ ├── dirWalk.js #
|
|
136
|
-
│ ├──
|
|
137
|
-
│ ├──
|
|
138
|
-
│ ├──
|
|
139
|
-
│ ├── sqlite.js #
|
|
140
|
-
│ └── trans.js #
|
|
141
|
-
└── tests #
|
|
184
|
+
│ ├── _.js # 核心流程控制器,调度各模块完成增量同步
|
|
185
|
+
│ ├── dirWalk.js # 遍历目录并比对元数据,输出变更队列
|
|
186
|
+
│ ├── hash.js # 将文件相对路径编码或计算为固定 16 字节键
|
|
187
|
+
│ ├── load.js # 查询数据库现有记录,若数据表缺失则执行初始化
|
|
188
|
+
│ ├── save.js # 执行批量写入与删除操作
|
|
189
|
+
│ ├── sqlite.js # 创建并配置 SQLite 数据库实例
|
|
190
|
+
│ └── trans.js # 封装 SQLite 事务,提供异常回滚机制
|
|
191
|
+
└── tests # 单元测试模块
|
|
142
192
|
```
|
|
143
193
|
|
|
144
194
|
## 历史故事
|
|
145
195
|
|
|
146
|
-
SQLite
|
|
196
|
+
SQLite 的诞生与军事应用密切相关。2000 年,D. Richard Hipp 在为美国海军陆战队设计导弹驱逐舰板载损害控制系统软件时,遇到商业数据库由于配置复杂、日常需要专业维护且一旦连接丢失便会导致整个软件瘫痪的问题。Hipp 随即着手设计了一套无需任何独立服务器、零配置且直接对本地文件进行读写的嵌入式数据库,这便是 SQLite。
|
|
147
197
|
|
|
148
|
-
|
|
198
|
+
为极限节约磁盘空间和降低读写延迟,SQLite 广泛应用了 Varint(可变字节整型)编码。在这种编码下,数值较小的整数(如常见的文件大小、序列号)仅占用 1 个字节,只有大数值才会占用更多字节。本项目中对文件大小和修改时间采用同样的压缩设计,从而秉承了 SQLite 极致节约空间与高效率的系统设计哲学。
|
|
149
199
|
../doc/zh/about.md
|
package/_.js
CHANGED
|
@@ -5,7 +5,7 @@ import load from "./load.js";
|
|
|
5
5
|
import dirWalk from "./dirWalk.js";
|
|
6
6
|
import save from "./save.js";
|
|
7
7
|
|
|
8
|
-
export default async (dir, db_path) => {
|
|
8
|
+
export default async (dir, db_path, ignore) => {
|
|
9
9
|
using db = sqlite(db_path);
|
|
10
10
|
const existing = new BinMap(),
|
|
11
11
|
db_rows = load(db);
|
|
@@ -14,7 +14,7 @@ export default async (dir, db_path) => {
|
|
|
14
14
|
existing.set(row.hash, vbE([row.size, row.mtime]));
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
const [scanned, to_update] = await dirWalk(dir, existing),
|
|
17
|
+
const [scanned, to_update] = await dirWalk(dir, existing, ignore),
|
|
18
18
|
to_delete = [];
|
|
19
19
|
for (const row of db_rows) {
|
|
20
20
|
if (!scanned.has(row.hash)) {
|
package/dirWalk.js
CHANGED
|
@@ -8,11 +8,14 @@ import vbE from "@3-/vb/vbE.js";
|
|
|
8
8
|
import int from "@3-/int";
|
|
9
9
|
import hash from "./hash.js";
|
|
10
10
|
|
|
11
|
-
export default async (dir, existing) => {
|
|
11
|
+
export default async (dir, existing, ignore) => {
|
|
12
12
|
const scanned = new BinSet(),
|
|
13
13
|
to_update = [];
|
|
14
14
|
|
|
15
15
|
await walkRelIgnore(dir, async (kind, rel_path) => {
|
|
16
|
+
if (ignore && ignore(kind, rel_path)) {
|
|
17
|
+
return false;
|
|
18
|
+
}
|
|
16
19
|
if (kind === FILE) {
|
|
17
20
|
const { size, mtimeMs } = await stat(join(dir, rel_path)),
|
|
18
21
|
mtime = int(mtimeMs),
|