@1-/scan 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,73 +5,113 @@
5
5
  <a id="en"></a>
6
6
  # @1-/scan : Incrementally scan directory files and track metadata in SQLite
7
7
 
8
- Incrementally scans directory files, tracks file sizes and modification times, and synchronizes status into an SQLite database using Bun's native SQLite driver (`bun:sqlite`), returning an array of updated relative paths.
8
+ Incrementally scans directory files, compares file sizes and modification times to detect changes, synchronizes metadata to SQLite database, and returns updated relative paths.
9
9
 
10
10
  ## Features
11
11
 
12
- - Incremental Scanning: Detects and updates only new, modified, or deleted files, avoiding redundant file system operations.
13
- - Space-Efficient Storage: Employs Varint compression to serialize and compare file sizes and modification times.
14
- - Smart Path Key: Stores relative paths not exceeding 16 bytes as raw binary to preserve readability, while hashing longer paths to 16-byte MD5 digests to optimize index performance.
15
- - Database Synchronization: Synchronizes updates and deletions in a single atomic transaction.
16
- - Ignore Pattern Support: Integrates ignore rules dynamically during traversal.
17
- - Native SQLite: Leverages Bun's native, high-performance `bun:sqlite` engine, eliminating external build dependencies.
12
+ - **Incremental Scanning**: Detects and processes only new, modified, or deleted files, avoiding redundant file system operations.
13
+ - **Key Optimization**: Stores relative paths within 16 bytes directly as raw bytes; hashes longer paths to 16-byte MD5 digests to optimize database index space and query performance.
14
+ - **Metadata Compression**: Compresses file sizes and modification times using Varint (variable-length byte) encoding.
15
+ - **Transactional Integrity**: Packages updates and deletions in a single database transaction to guarantee consistency.
16
+ - **Flexible Filtering**: Supports custom ignore callback functions to filter specific files and directories.
17
+ - **Native Database**: Integrates Bun native `bun:sqlite` module, eliminating external database driver dependencies.
18
18
 
19
19
  ## Usage
20
20
 
21
+ ### Basic Incremental Scan
22
+
21
23
  ```javascript
22
24
  import scan from "@1-/scan";
23
25
 
24
- const dir = "./src";
25
- const dbPath = "./files.db";
26
+ const dir = "./data";
27
+ const db_path = "./scan_record.db";
28
+
29
+ // Scan directory and sync metadata to SQLite, returning modified relative paths and upsert function
30
+ const [updated_paths, upsert] = await scan(dir, db_path);
31
+
32
+ // Auto-close database when exiting scope
33
+ using _upsert = upsert;
34
+
35
+ console.log("Updated files:", updated_paths);
26
36
 
27
- // Scan directory and sync records into SQLite, returning an array of updated relative paths
28
- const updatedPaths = await scan(dir, dbPath);
29
- console.log(updatedPaths);
37
+ // Update scanned file metadata in database
38
+ for (const rel_path of updated_paths) {
39
+ await upsert(rel_path);
40
+ }
41
+ ```
42
+
43
+ ### Scan with Ignore Filter
44
+
45
+ ```javascript
46
+ import scan from "@1-/scan";
47
+
48
+ const dir = "./data";
49
+ const db_path = "./scan_record.db";
50
+
51
+ // Ignore temporary files and specific configurations
52
+ const ignore = (kind, rel_path) => {
53
+ return rel_path.startsWith("temp/") || rel_path === "config.json";
54
+ };
55
+
56
+ const [updated_paths, upsert] = await scan(dir, db_path, ignore);
57
+ using _upsert = upsert;
58
+
59
+ console.log("Synced. Updated files:", updated_paths);
60
+
61
+ for (const rel_path of updated_paths) {
62
+ await upsert(rel_path);
63
+ }
30
64
  ```
31
65
 
32
66
  ## Design Ideas
33
67
 
34
- Execution flow of modules:
68
+ The main entry orchestrates independent modules to execute the incremental scanning and synchronization flow.
35
69
 
36
70
  ```mermaid
37
71
  graph TD
38
- Entry["_.js (Main)"] -->|Open database| Sqlite[sqlite.js]
39
- Entry -->|Load existing records| Load[load.js]
40
- Entry -->|Walk and compare| DirWalk[dirWalk.js]
41
- DirWalk -->|Traverse files| Walk["@1-/walk/walkRelIgnore"]
42
- DirWalk -->|Optimize keys| Hash[hash.js]
43
- Entry -->|Apply modifications| Save[save.js]
44
- Save -->|Wrap transaction| Trans[trans.js]
72
+ Entry["_.js (Entry Point)"] -->|1. Initialize Connection| Sqlite["sqlite.js"]
73
+ Entry -->|2. Load Existing Records| Load["load.js"]
74
+ Entry -->|3. Walk & Compare Files| DirWalk["dirWalk.js"]
75
+ DirWalk -->|Invoke| Walk["@1-/walk/walkRelIgnore"]
76
+ DirWalk -->|Process Path Keys| Hash["hash.js"]
77
+ Entry -->|4. Delete Absent & Return Upsert| Trans["trans.js"]
78
+ Save["save.js (Independent Sync Helper)"] -->|Transaction Wrapper| Trans
45
79
  ```
46
80
 
81
+ 1. **Initialize Connection (`sqlite.js`)**: Opens SQLite database connection and configures automatic connection disposal.
82
+ 2. **Load Records (`load.js`)**: Automatically creates schema if missing, retrieves existing file hashes, sizes, and modification times, and reconstructs reference set in memory.
83
+ 3. **Walk & Compare (`dirWalk.js`)**: Traverses directory structure recursively. Paths are transformed into 16-byte keys via `hash.js`. File attributes are encoded using `@3-/vb` and compared against database records to identify additions and modifications.
84
+ 4. **Delete & Return Upsert**: Uses `trans.js` to execute transaction-safe deletions for deleted files, and returns modified relative paths and an `upsert` function so that caller can update database records.
85
+ 5. **Independent Sync Helper (`save.js`)**: Exported independent module to execute bulk inserts and deletions in a single transaction.
86
+
47
87
  ## Tech Stack
48
88
 
49
- - Bun: Runtime and test runner
50
- - Bun SQLite: Bun's built-in high-performance SQLite engine
51
- - `@1-/walk`: Directory walker with ignore support
52
- - `@3-/vb`: Variable-length byte encoder
53
- - `@3-/binmap` / `@3-/binset`: Efficient binary collection structures
89
+ - **Bun**: Runtime environment and test framework.
90
+ - **Bun SQLite**: Native high-performance SQLite engine built into Bun.
91
+ - **@1-/walk**: Directory walker with ignore support.
92
+ - **@3-/vb**: Variable-length byte (Varint) encoder and decoder.
93
+ - **@3-/binmap / @3-/binset**: Memory-efficient collections designed for binary keys.
54
94
 
55
95
  ## Directory Structure
56
96
 
57
97
  ```
58
98
  .
59
99
  ├── src
60
- │ ├── _.js # Entry point orchestrating the scanning and sync process
61
- │ ├── dirWalk.js # Recursively scans files and filters modified ones
62
- │ ├── load.js # Retrieves database records and initializes schema
63
- │ ├── save.js # Performs bulk database inserts and deletes
64
- │ ├── hash.js # Processes path keys into raw bytes or MD5 digests
65
- │ ├── sqlite.js # Manages SQLite database connection and disposal
66
- │ └── trans.js # Wraps operations inside an SQL transaction
100
+ │ ├── _.js # Entry point coordinating scanning and returning upsert helper
101
+ │ ├── dirWalk.js # Directory traverser comparing file metadata
102
+ │ ├── hash.js # Hashing helper mapping paths to 16-byte keys
103
+ │ ├── load.js # Database loader initializing schema and loading records
104
+ │ ├── save.js # Independent helper executing bulk updates and deletions
105
+ │ ├── sqlite.js # Connection manager instantiating SQLite database
106
+ │ └── trans.js # Transaction wrapper providing rollback mechanism
67
107
  └── tests # Test suites
68
108
  ```
69
109
 
70
110
  ## History
71
111
 
72
- SQLite was designed in 2000 by D. Richard Hipp while working on a US Navy damage control system. The application originally relied on an Informix database, which required extensive database administration. Hipp designed SQLite to be a serverless, self-contained library requiring zero configuration, allowing the software to function reliably even when database services were unavailable.
112
+ SQLite was created by D. Richard Hipp in 2000 while designing board software for US Navy guided-missile destroyers. The system originally depended on a commercial database that required constant database administration; a connection loss could stall the entire damage control application. To resolve this vulnerability, Hipp designed a serverless, zero-configuration embedded database that directly reads and writes local files—marking the birth of SQLite.
73
113
 
74
- To optimize space inside the database file, SQLite internally uses variable-length integers (Varints) to compress metadata. This project adopts similar techniques—compressing file size and modification time into varints before storage—inheriting the SQLite philosophy of minimalism and space efficiency for local file synchronization.
114
+ To conserve disk space and reduce I/O overhead, SQLite utilizes Varint (variable-length integer) encoding for metadata storage. Under this scheme, small integers consume only 1 byte, while larger numbers scale dynamically. This library inherits that design philosophy, compressing file metadata into varints before storing it, ensuring minimal footprint and high sync performance.
75
115
  ../doc/en/about.md
76
116
 
77
117
  ---
@@ -79,71 +119,111 @@ To optimize space inside the database file, SQLite internally uses variable-leng
79
119
  <a id="zh"></a>
80
120
  # @1-/scan : 增量扫描目录文件并使用 SQLite 记录元数据
81
121
 
82
- 基于 Bun 原生的高性能内置 SQLite 数据库(`bun:sqlite`)增量扫描目录,比对并同步文件大小与修改时间,并返回有变更的相对路径数组。
122
+ 增量扫描目录文件,通过比对文件大小和修改时间检测变更,并同步至 SQLite 数据库中,最终返回有更新的相对路径列表。
83
123
 
84
124
  ## 功能介绍
85
125
 
86
- - 增量扫描:仅处理新增、修改或删除的文件,避免冗余文件操作。
87
- - 紧凑存储:使用可变字节码(Varint)压缩技术比对并保存文件大小和修改时间。
88
- - 路径映射:相对路径长度不大于 16 字节时保留原始字节,大于 16 字节时计算为 16 字节 MD5,优化数据库索引。
89
- - 事务同步:更新与删除操作合并至单次数据库事务,确保一致性。
90
- - 规则过滤:基于 `@1-/walk` 的忽略规则过滤特定文件与目录。
91
- - 原生数据库:使用 Bun 内置的 `bun:sqlite`,性能优异且无需安装或编译外部依赖。
126
+ - **增量扫描**:仅处理新增、修改或删除的文件,避免冗余的文件系统读写,提升同步速度。
127
+ - **路径压缩**:当相对路径长度小于等于 16 字节时保留原始字节;超出 16 字节则转换为 16 字节 MD5 值作为数据库主键,优化索引空间与查询性能。
128
+ - **元数据压缩**:使用 Varint(可变字节整型)编码方式压缩存储文件大小和修改时间。
129
+ - **事务安全**:将更新与删除操作合并在单个数据库事务中执行,确保数据一致性。
130
+ - **灵活过滤**:支持通过自定义回调函数过滤指定类型的文件与目录。
131
+ - **原生依赖**:基于 Bun 内置 `bun:sqlite` 模块,无需额外安装或编译数据库驱动。
92
132
 
93
133
  ## 使用演示
94
134
 
135
+ ### 基础增量扫描
136
+
95
137
  ```javascript
96
138
  import scan from "@1-/scan";
97
139
 
98
- const dir = "./src";
99
- const dbPath = "./files.db";
140
+ const dir = "./data";
141
+ const db_path = "./scan_record.db";
142
+
143
+ // 扫描目录并同步至 SQLite,返回发生变更的相对路径列表与更新函数
144
+ const [updated_paths, upsert] = await scan(dir, db_path);
145
+
146
+ // 退出作用域时自动关闭数据库
147
+ using _upsert = upsert;
148
+
149
+ console.log("更新文件列表:", updated_paths);
100
150
 
101
- // 扫描目录并同步至 SQLite 数据库,返回有变更的相对路径数组
102
- const updatedPaths = await scan(dir, dbPath);
103
- console.log(updatedPaths);
151
+ // 更新已处理文件的元数据至数据库
152
+ for (const rel_path of updated_paths) {
153
+ await upsert(rel_path);
154
+ }
155
+ ```
156
+
157
+ ### 带有忽略规则的扫描
158
+
159
+ ```javascript
160
+ import scan from "@1-/scan";
161
+
162
+ const dir = "./data";
163
+ const db_path = "./scan_record.db";
164
+
165
+ // 忽略特定文件或目录
166
+ const ignore = (kind, rel_path) => {
167
+ return rel_path.startsWith("temp/") || rel_path === "config.json";
168
+ };
169
+
170
+ const [updated_paths, upsert] = await scan(dir, db_path, ignore);
171
+ using _upsert = upsert;
172
+
173
+ console.log("已同步,更新列表:", updated_paths);
174
+
175
+ for (const rel_path of updated_paths) {
176
+ await upsert(rel_path);
177
+ }
104
178
  ```
105
179
 
106
180
  ## 设计思路
107
181
 
108
- 模块调用流程:
182
+ 系统主入口调用各个独立模块完成增量扫描与数据同步流程。
109
183
 
110
184
  ```mermaid
111
185
  graph TD
112
- Entry["_.js (主入口)"] -->|打开数据库| Sqlite[sqlite.js]
113
- Entry -->|载入已有记录| Load[load.js]
114
- Entry -->|遍历并比对| DirWalk[dirWalk.js]
115
- DirWalk -->|扫描文件系统| Walk["@1-/walk/walkRelIgnore"]
116
- DirWalk -->|计算路径哈希| Hash[hash.js]
117
- Entry -->|写入变更数据| Save[save.js]
118
- Save -->|执行事务控制| Trans[trans.js]
186
+ Entry["_.js (主入口)"] -->|1. 初始化连接| Sqlite["sqlite.js"]
187
+ Entry -->|2. 加载已有记录| Load["load.js"]
188
+ Entry -->|3. 扫描文件系统并对比| DirWalk["dirWalk.js"]
189
+ DirWalk -->|调用| Walk["@1-/walk/walkRelIgnore"]
190
+ DirWalk -->|处理路径键| Hash["hash.js"]
191
+ Entry -->|4. 删除失效记录并返回更新函数| Trans["trans.js"]
192
+ Save["save.js (独立批量存储辅助模块)"] -->|事务保障| Trans
119
193
  ```
120
194
 
195
+ 1. **初始化连接 (`sqlite.js`)**:打开 SQLite 数据库,并配置自动释放连接机制。
196
+ 2. **加载记录 (`load.js`)**:若表不存在则自动创建,读取已记录的文件哈希、大小及修改时间,在内存中还原比对集合。
197
+ 3. **文件系统扫描 (`dirWalk.js`)**:递归遍历目录,利用 `hash.js` 将路径映射为 16 字节键。对比当前文件与数据库元数据(利用 `@3-/vb` 进行压缩状态对比),筛选出新增和修改的文件。
198
+ 4. **删除与返回更新函数**:使用 `trans.js` 开启事务,批量删除已被移除的无效记录,并返回变更的相对路径列表与 `upsert` 函数,供调用者按需持久化数据。
199
+ 5. **独立批量存储辅助模块 (`save.js`)**:导出的独立工具模块,用于在单个事务中一次性批量写入与删除。
200
+
121
201
  ## 技术栈
122
202
 
123
- - Bun:运行环境与测试工具
124
- - Bun SQLite:Bun 内置的高性能 SQLite 模块
125
- - `@1-/walk`:支持忽略规则的目录遍历工具
126
- - `@3-/vb`:可变长度整型编码器
127
- - `@3-/binmap` / `@3-/binset`:二进制哈希键容器
203
+ - **Bun**:JavaScript 运行时及测试框架。
204
+ - **Bun SQLite**:内置的轻量级、高性能 SQLite 实现。
205
+ - **@1-/walk**:支持过滤规则的目录递归遍历工具。
206
+ - **@3-/vb**:Varint(可变字节)编码与解码器。
207
+ - **@3-/binmap / @3-/binset**:针对二进制键优化的 Map 和 Set 容器。
128
208
 
129
209
  ## 目录结构
130
210
 
131
211
  ```
132
212
  .
133
213
  ├── src
134
- │ ├── _.js # 主入口,统筹扫描与同步逻辑
135
- │ ├── dirWalk.js # 递归遍历目录,比对筛选出变更文件
136
- │ ├── load.js # 读取数据库中全部记录,初始化数据表
137
- │ ├── save.js # 事务内执行批量插入与删除
138
- │ ├── hash.js # 计算相对路径哈希值或保留原始字节
139
- │ ├── sqlite.js # 管理 SQLite 数据库连接及资源释放
140
- │ └── trans.js # 封装数据库事务控制
141
- └── tests # 测试目录
214
+ │ ├── _.js # 核心流程控制器,调度各模块并返回变更及更新函数
215
+ │ ├── dirWalk.js # 遍历目录并比对元数据,输出变更队列
216
+ │ ├── hash.js # 将文件相对路径编码或计算为固定 16 字节键
217
+ │ ├── load.js # 查询数据库现有记录,若数据表缺失则执行初始化
218
+ │ ├── save.js # 独立导出的批量持久化与删除辅助函数
219
+ │ ├── sqlite.js # 创建并配置 SQLite 数据库实例
220
+ │ └── trans.js # 封装 SQLite 事务,提供异常回滚机制
221
+ └── tests # 单元测试模块
142
222
  ```
143
223
 
144
224
  ## 历史故事
145
225
 
146
- SQLite D. Richard Hipp 于 2000 年为驱逐舰控制系统编写。当时系统采用的商业数据库需要繁琐的管理,且一旦故障系统便无法运行。Hipp 设计出无服务器、零配置且直接读写单文件的 SQLite。
226
+ SQLite 的诞生与军事应用密切相关。2000 年,D. Richard Hipp 在为美国海军陆战队设计导弹驱逐舰板载损害控制系统软件时,遇到商业数据库由于配置复杂、日常需要专业维护且一旦连接丢失便会导致整个软件瘫痪的问题。Hipp 随即着手设计了一套无需任何独立服务器、零配置且直接对本地文件进行读写的嵌入式数据库,这便是 SQLite。
147
227
 
148
- 为节约存储空间,SQLite 内部采用可变长度整数(Varint)编码。本项目同样引入 Varint 压缩算法,对文件大小与修改时间做编码后再比对存储,延续了 SQLite 追求性能与紧凑空间的优良传统。
228
+ 为极限节约磁盘空间 and 降低读写延迟,SQLite 广泛应用了 Varint(可变字节整型)编码。在这种编码下,数值较小的整数(如常见的文件大小、序列号)仅占用 1 个字节,只有大数值才会占用更多字节。本项目中对文件大小和修改时间采用同样的压缩设计,从而秉承了 SQLite 极致节约空间与高效率的系统设计哲学。
149
229
  ../doc/zh/about.md
package/_.js CHANGED
@@ -3,25 +3,39 @@ import vbE from "@3-/vb/vbE.js";
3
3
  import sqlite from "./sqlite.js";
4
4
  import load from "./load.js";
5
5
  import dirWalk from "./dirWalk.js";
6
- import save from "./save.js";
6
+ import { stat } from "node:fs/promises";
7
+ import { join } from "node:path";
8
+ import int from "@3-/int";
9
+ import hash from "./hash.js";
10
+ import trans from "./trans.js";
7
11
 
8
- export default async (dir, db_path) => {
9
- using db = sqlite(db_path);
10
- const existing = new BinMap(),
12
+ export default async (dir, db_path, ignore) => {
13
+ const db = sqlite(db_path),
14
+ existing = new BinMap(),
11
15
  db_rows = load(db);
12
16
 
13
- for (const row of db_rows) {
14
- existing.set(row.hash, vbE([row.size, row.mtime]));
15
- }
17
+ db_rows.forEach(({ hash, size, mtime }) => existing.set(hash, vbE([size, mtime])));
18
+
19
+ const [scanned, to_update] = await dirWalk(dir, existing, ignore),
20
+ to_delete = db_rows.filter(({ hash }) => !scanned.has(hash)).map(({ hash }) => hash);
16
21
 
17
- const [scanned, to_update] = await dirWalk(dir, existing),
18
- to_delete = [];
19
- for (const row of db_rows) {
20
- if (!scanned.has(row.hash)) {
21
- to_delete.push(row.hash);
22
- }
22
+ if (to_delete.length > 0) {
23
+ trans(db, () => {
24
+ const del = db.prepare("DELETE FROM file WHERE hash=?");
25
+ to_delete.forEach((h) => del.run(h));
26
+ });
23
27
  }
24
28
 
25
- save(db, to_update, to_delete);
26
- return to_update.map(([rel_path]) => rel_path);
29
+ const insert = db.prepare("INSERT OR REPLACE INTO file(hash,size,mtime)VALUES(?,?,?)"),
30
+ upsert = async (rel_path) => {
31
+ const fp = join(dir, rel_path),
32
+ { size, mtimeMs } = await stat(fp),
33
+ mtime = int(mtimeMs),
34
+ h = hash(rel_path);
35
+ insert.run(h, size, mtime);
36
+ };
37
+
38
+ upsert[Symbol.dispose] = () => db.close();
39
+
40
+ return [to_update.map(([rel_path]) => rel_path), upsert];
27
41
  };
package/dirWalk.js CHANGED
@@ -8,11 +8,14 @@ import vbE from "@3-/vb/vbE.js";
8
8
  import int from "@3-/int";
9
9
  import hash from "./hash.js";
10
10
 
11
- export default async (dir, existing) => {
11
+ export default async (dir, existing, ignore) => {
12
12
  const scanned = new BinSet(),
13
13
  to_update = [];
14
14
 
15
15
  await walkRelIgnore(dir, async (kind, rel_path) => {
16
+ if (ignore && ignore(kind, rel_path) === false) {
17
+ return false;
18
+ }
16
19
  if (kind === FILE) {
17
20
  const { size, mtimeMs } = await stat(join(dir, rel_path)),
18
21
  mtime = int(mtimeMs),
package/hash.js CHANGED
@@ -3,5 +3,5 @@ import utf8e from "@3-/utf8/utf8e.js";
3
3
 
4
4
  export default (str) => {
5
5
  const buf = utf8e(str);
6
- return buf.length <= 16 ? buf : createHash("md5").update(buf).digest();
6
+ return buf.length <= 16 ? buf : new Uint8Array(createHash("md5").update(buf).digest());
7
7
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@1-/scan",
3
- "version": "0.1.2",
3
+ "version": "0.1.4",
4
4
  "description": "Incrementally scan directory files and track metadata in SQLite / 增量扫描目录文件并使用 SQLite 记录元数据",
5
5
  "keywords": [
6
6
  "scan",
package/save.js CHANGED
@@ -5,15 +5,11 @@ export default (db, to_update, to_delete) => {
5
5
  trans(db, () => {
6
6
  if (to_update.length > 0) {
7
7
  const insert = db.prepare("INSERT OR REPLACE INTO file(hash,size,mtime)VALUES(?,?,?)");
8
- for (const [_, h, size, mtime] of to_update) {
9
- insert.run(h, size, mtime);
10
- }
8
+ to_update.forEach(([_, h, size, mtime]) => insert.run(h, size, mtime));
11
9
  }
12
10
  if (to_delete.length > 0) {
13
11
  const del = db.prepare("DELETE FROM file WHERE hash=?");
14
- for (const h of to_delete) {
15
- del.run(h);
16
- }
12
+ to_delete.forEach((h) => del.run(h));
17
13
  }
18
14
  });
19
15
  }