@1-/scan 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,7 +5,7 @@
5
5
  <a id="en"></a>
6
6
  # @1-/scan : Incrementally scan directory files and track metadata in SQLite
7
7
 
8
- Incrementally scans directory files, compares file sizes and modification times to detect changes, synchronizes metadata to an SQLite database, and returns updated relative paths.
8
+ Incrementally scans directory files, compares file sizes and modification times to detect changes, synchronizes metadata to SQLite database, and returns updated relative paths.
9
9
 
10
10
  ## Features
11
11
 
@@ -14,7 +14,7 @@ Incrementally scans directory files, compares file sizes and modification times
14
14
  - **Metadata Compression**: Compresses file sizes and modification times using Varint (variable-length byte) encoding.
15
15
  - **Transactional Integrity**: Packages updates and deletions in a single database transaction to guarantee consistency.
16
16
  - **Flexible Filtering**: Supports custom ignore callback functions to filter specific files and directories.
17
- - **Native Database**: Integrates Bun's native `bun:sqlite` module, eliminating external database driver dependencies.
17
+ - **Native Database**: Integrates Bun native `bun:sqlite` module, eliminating external database driver dependencies.
18
18
 
19
19
  ## Usage
20
20
 
@@ -24,29 +24,43 @@ Incrementally scans directory files, compares file sizes and modification times
24
24
  import scan from "@1-/scan";
25
25
 
26
26
  const dir = "./data";
27
- const dbPath = "./scan_record.db";
27
+ const db_path = "./scan_record.db";
28
28
 
29
- // Scan directory and sync metadata to SQLite, returning modified relative paths
30
- const updatedPaths = await scan(dir, dbPath);
31
- console.log("Updated files:", updatedPaths);
29
+ // Scan directory and sync metadata to SQLite, returning modified relative paths and upsert function
30
+ const [updated_paths, upsert] = await scan(dir, db_path);
31
+
32
+ // Auto-close database when exiting scope
33
+ using _upsert = upsert;
34
+
35
+ console.log("Updated files:", updated_paths);
36
+
37
+ // Update scanned file metadata in database
38
+ for (const rel_path of updated_paths) {
39
+ await upsert(rel_path);
40
+ }
32
41
  ```
33
42
 
34
43
  ### Scan with Ignore Filter
35
44
 
36
45
  ```javascript
37
- import { FILE } from "@1-/walk";
38
46
  import scan from "@1-/scan";
39
47
 
40
48
  const dir = "./data";
41
- const dbPath = "./scan_record.db";
49
+ const db_path = "./scan_record.db";
42
50
 
43
51
  // Ignore temporary files and specific configurations
44
- const ignore = (kind, relPath) => {
45
- return relPath.startsWith("temp/") || relPath === "config.json";
52
+ const ignore = (kind, rel_path) => {
53
+ return rel_path.startsWith("temp/") || rel_path === "config.json";
46
54
  };
47
55
 
48
- const updatedPaths = await scan(dir, dbPath, ignore);
49
- console.log("Synced. Updated files:", updatedPaths);
56
+ const [updated_paths, upsert] = await scan(dir, db_path, ignore);
57
+ using _upsert = upsert;
58
+
59
+ console.log("Synced. Updated files:", updated_paths);
60
+
61
+ for (const rel_path of updated_paths) {
62
+ await upsert(rel_path);
63
+ }
50
64
  ```
51
65
 
52
66
  ## Design Ideas
@@ -55,19 +69,20 @@ The main entry orchestrates independent modules to execute the incremental scann
55
69
 
56
70
  ```mermaid
57
71
  graph TD
58
- Entry["_.js (Entry Point)"] -->|1. Initialize Connection| Sqlite[sqlite.js]
59
- Entry -->|2. Load Existing Records| Load[load.js]
60
- Entry -->|3. Walk & Compare Files| DirWalk[dirWalk.js]
72
+ Entry["_.js (Entry Point)"] -->|1. Initialize Connection| Sqlite["sqlite.js"]
73
+ Entry -->|2. Load Existing Records| Load["load.js"]
74
+ Entry -->|3. Walk & Compare Files| DirWalk["dirWalk.js"]
61
75
  DirWalk -->|Invoke| Walk["@1-/walk/walkRelIgnore"]
62
- DirWalk -->|Process Path Keys| Hash[hash.js]
63
- Entry -->|4. Persist Changes| Save[save.js]
64
- Save -->|Transaction Wrapper| Trans[trans.js]
76
+ DirWalk -->|Process Path Keys| Hash["hash.js"]
77
+ Entry -->|4. Delete Absent & Return Upsert| Trans["trans.js"]
78
+ Save["save.js (Independent Sync Helper)"] -->|Transaction Wrapper| Trans
65
79
  ```
66
80
 
67
- 1. **Initialize Connection (`sqlite.js`)**: Opens the SQLite database connection and configures automatic connection disposal.
68
- 2. **Load Records (`load.js`)**: Automatically creates the schema if missing, retrieves existing file hashes, sizes, and modification times, and reconstructs the reference set in memory.
69
- 3. **Walk & Compare (`dirWalk.js`)**: Traverses the directory structure recursively. Paths are transformed into 16-byte keys via `hash.js`. File attributes are encoded using `@3-/vb` and compared against database records to identify additions and modifications.
70
- 4. **Persist Changes (`save.js`)**: Executes bulk inserts and deletions in a single transaction via `trans.js` to update database state.
81
+ 1. **Initialize Connection (`sqlite.js`)**: Opens SQLite database connection and configures automatic connection disposal.
82
+ 2. **Load Records (`load.js`)**: Automatically creates schema if missing, retrieves existing file hashes, sizes, and modification times, and reconstructs reference set in memory.
83
+ 3. **Walk & Compare (`dirWalk.js`)**: Traverses directory structure recursively. Paths are transformed into 16-byte keys via `hash.js`. File attributes are encoded using `@3-/vb` and compared against database records to identify additions and modifications.
84
+ 4. **Delete & Return Upsert**: Uses `trans.js` to execute transaction-safe deletions for deleted files, and returns modified relative paths and an `upsert` function so that caller can update database records.
85
+ 5. **Independent Sync Helper (`save.js`)**: Exported independent module to execute bulk inserts and deletions in a single transaction.
71
86
 
72
87
  ## Tech Stack
73
88
 
@@ -82,11 +97,11 @@ graph TD
82
97
  ```
83
98
  .
84
99
  ├── src
85
- │ ├── _.js # Entry point coordinating scanning and synchronization
100
+ │ ├── _.js # Entry point coordinating scanning and returning upsert helper
86
101
  │ ├── dirWalk.js # Directory traverser comparing file metadata
87
102
  │ ├── hash.js # Hashing helper mapping paths to 16-byte keys
88
103
  │ ├── load.js # Database loader initializing schema and loading records
89
- │ ├── save.js # Writer executing bulk updates and deletions
104
+ │ ├── save.js # Independent helper executing bulk updates and deletions
90
105
  │ ├── sqlite.js # Connection manager instantiating SQLite database
91
106
  │ └── trans.js # Transaction wrapper providing rollback mechanism
92
107
  └── tests # Test suites
@@ -123,29 +138,43 @@ To conserve disk space and reduce I/O overhead, SQLite utilizes Varint (variable
123
138
  import scan from "@1-/scan";
124
139
 
125
140
  const dir = "./data";
126
- const dbPath = "./scan_record.db";
141
+ const db_path = "./scan_record.db";
127
142
 
128
- // 扫描目录并同步至 SQLite,返回发生变更的相对路径列表
129
- const updatedPaths = await scan(dir, dbPath);
130
- console.log("更新文件列表:", updatedPaths);
143
+ // 扫描目录并同步至 SQLite,返回发生变更的相对路径列表与更新函数
144
+ const [updated_paths, upsert] = await scan(dir, db_path);
145
+
146
+ // 退出作用域时自动关闭数据库
147
+ using _upsert = upsert;
148
+
149
+ console.log("更新文件列表:", updated_paths);
150
+
151
+ // 更新已处理文件的元数据至数据库
152
+ for (const rel_path of updated_paths) {
153
+ await upsert(rel_path);
154
+ }
131
155
  ```
132
156
 
133
157
  ### 带有忽略规则的扫描
134
158
 
135
159
  ```javascript
136
- import { FILE } from "@1-/walk";
137
160
  import scan from "@1-/scan";
138
161
 
139
162
  const dir = "./data";
140
- const dbPath = "./scan_record.db";
163
+ const db_path = "./scan_record.db";
141
164
 
142
165
  // 忽略特定文件或目录
143
- const ignore = (kind, relPath) => {
144
- return relPath.startsWith("temp/") || relPath === "config.json";
166
+ const ignore = (kind, rel_path) => {
167
+ return rel_path.startsWith("temp/") || rel_path === "config.json";
145
168
  };
146
169
 
147
- const updatedPaths = await scan(dir, dbPath, ignore);
148
- console.log("已同步,更新列表:", updatedPaths);
170
+ const [updated_paths, upsert] = await scan(dir, db_path, ignore);
171
+ using _upsert = upsert;
172
+
173
+ console.log("已同步,更新列表:", updated_paths);
174
+
175
+ for (const rel_path of updated_paths) {
176
+ await upsert(rel_path);
177
+ }
149
178
  ```
150
179
 
151
180
  ## 设计思路
@@ -154,19 +183,20 @@ console.log("已同步,更新列表:", updatedPaths);
154
183
 
155
184
  ```mermaid
156
185
  graph TD
157
- Entry["_.js (主入口)"] -->|1. 初始化连接| Sqlite[sqlite.js]
158
- Entry -->|2. 加载已有记录| Load[load.js]
159
- Entry -->|3. 扫描文件系统并对比| DirWalk[dirWalk.js]
186
+ Entry["_.js (主入口)"] -->|1. 初始化连接| Sqlite["sqlite.js"]
187
+ Entry -->|2. 加载已有记录| Load["load.js"]
188
+ Entry -->|3. 扫描文件系统并对比| DirWalk["dirWalk.js"]
160
189
  DirWalk -->|调用| Walk["@1-/walk/walkRelIgnore"]
161
- DirWalk -->|处理路径键| Hash[hash.js]
162
- Entry -->|4. 持久化数据变更| Save[save.js]
163
- Save -->|事务保障| Trans[trans.js]
190
+ DirWalk -->|处理路径键| Hash["hash.js"]
191
+ Entry -->|4. 删除失效记录并返回更新函数| Trans["trans.js"]
192
+ Save["save.js (独立批量存储辅助模块)"] -->|事务保障| Trans
164
193
  ```
165
194
 
166
195
  1. **初始化连接 (`sqlite.js`)**:打开 SQLite 数据库,并配置自动释放连接机制。
167
196
  2. **加载记录 (`load.js`)**:若表不存在则自动创建,读取已记录的文件哈希、大小及修改时间,在内存中还原比对集合。
168
197
  3. **文件系统扫描 (`dirWalk.js`)**:递归遍历目录,利用 `hash.js` 将路径映射为 16 字节键。对比当前文件与数据库元数据(利用 `@3-/vb` 进行压缩状态对比),筛选出新增和修改的文件。
169
- 4. **数据存储 (`save.js`)**:使用 `trans.js` 开启事务,将需要删除的无效记录及需要更新的元数据批量写入 SQLite 数据库。
198
+ 4. **删除与返回更新函数**:使用 `trans.js` 开启事务,批量删除已被移除的无效记录,并返回变更的相对路径列表与 `upsert` 函数,供调用者按需持久化数据。
199
+ 5. **独立批量存储辅助模块 (`save.js`)**:导出的独立工具模块,用于在单个事务中一次性批量写入与删除。
170
200
 
171
201
  ## 技术栈
172
202
 
@@ -181,11 +211,11 @@ graph TD
181
211
  ```
182
212
  .
183
213
  ├── src
184
- │ ├── _.js # 核心流程控制器,调度各模块完成增量同步
214
+ │ ├── _.js # 核心流程控制器,调度各模块并返回变更及更新函数
185
215
  │ ├── dirWalk.js # 遍历目录并比对元数据,输出变更队列
186
216
  │ ├── hash.js # 将文件相对路径编码或计算为固定 16 字节键
187
217
  │ ├── load.js # 查询数据库现有记录,若数据表缺失则执行初始化
188
- │ ├── save.js # 执行批量写入与删除操作
218
+ │ ├── save.js # 独立导出的批量持久化与删除辅助函数
189
219
  │ ├── sqlite.js # 创建并配置 SQLite 数据库实例
190
220
  │ └── trans.js # 封装 SQLite 事务,提供异常回滚机制
191
221
  └── tests # 单元测试模块
@@ -195,5 +225,5 @@ graph TD
195
225
 
196
226
  SQLite 的诞生与军事应用密切相关。2000 年,D. Richard Hipp 在为美国海军陆战队设计导弹驱逐舰板载损害控制系统软件时,遇到商业数据库由于配置复杂、日常需要专业维护且一旦连接丢失便会导致整个软件瘫痪的问题。Hipp 随即着手设计了一套无需任何独立服务器、零配置且直接对本地文件进行读写的嵌入式数据库,这便是 SQLite。
197
227
 
198
- 为极限节约磁盘空间和降低读写延迟,SQLite 广泛应用了 Varint(可变字节整型)编码。在这种编码下,数值较小的整数(如常见的文件大小、序列号)仅占用 1 个字节,只有大数值才会占用更多字节。本项目中对文件大小和修改时间采用同样的压缩设计,从而秉承了 SQLite 极致节约空间与高效率的系统设计哲学。
228
+ 为极限节约磁盘空间 and 降低读写延迟,SQLite 广泛应用了 Varint(可变字节整型)编码。在这种编码下,数值较小的整数(如常见的文件大小、序列号)仅占用 1 个字节,只有大数值才会占用更多字节。本项目中对文件大小和修改时间采用同样的压缩设计,从而秉承了 SQLite 极致节约空间与高效率的系统设计哲学。
199
229
  ../doc/zh/about.md
package/_.js CHANGED
@@ -3,25 +3,39 @@ import vbE from "@3-/vb/vbE.js";
3
3
  import sqlite from "./sqlite.js";
4
4
  import load from "./load.js";
5
5
  import dirWalk from "./dirWalk.js";
6
- import save from "./save.js";
6
+ import { stat } from "node:fs/promises";
7
+ import { join } from "node:path";
8
+ import int from "@3-/int";
9
+ import hash from "./hash.js";
10
+ import trans from "./trans.js";
7
11
 
8
12
  export default async (dir, db_path, ignore) => {
9
- using db = sqlite(db_path);
10
- const existing = new BinMap(),
13
+ const db = sqlite(db_path),
14
+ existing = new BinMap(),
11
15
  db_rows = load(db);
12
16
 
13
- for (const row of db_rows) {
14
- existing.set(row.hash, vbE([row.size, row.mtime]));
15
- }
17
+ db_rows.forEach(({ hash, size, mtime }) => existing.set(hash, vbE([size, mtime])));
16
18
 
17
19
  const [scanned, to_update] = await dirWalk(dir, existing, ignore),
18
- to_delete = [];
19
- for (const row of db_rows) {
20
- if (!scanned.has(row.hash)) {
21
- to_delete.push(row.hash);
22
- }
20
+ to_delete = db_rows.filter(({ hash }) => !scanned.has(hash)).map(({ hash }) => hash);
21
+
22
+ if (to_delete.length > 0) {
23
+ trans(db, () => {
24
+ const del = db.prepare("DELETE FROM file WHERE hash=?");
25
+ to_delete.forEach((h) => del.run(h));
26
+ });
23
27
  }
24
28
 
25
- save(db, to_update, to_delete);
26
- return to_update.map(([rel_path]) => rel_path);
29
+ const insert = db.prepare("INSERT OR REPLACE INTO file(hash,size,mtime)VALUES(?,?,?)"),
30
+ upsert = async (rel_path) => {
31
+ const fp = join(dir, rel_path),
32
+ { size, mtimeMs } = await stat(fp),
33
+ mtime = int(mtimeMs),
34
+ h = hash(rel_path);
35
+ insert.run(h, size, mtime);
36
+ };
37
+
38
+ upsert[Symbol.dispose] = () => db.close();
39
+
40
+ return [to_update.map(([rel_path]) => rel_path), upsert];
27
41
  };
package/dirWalk.js CHANGED
@@ -13,7 +13,7 @@ export default async (dir, existing, ignore) => {
13
13
  to_update = [];
14
14
 
15
15
  await walkRelIgnore(dir, async (kind, rel_path) => {
16
- if (ignore && ignore(kind, rel_path)) {
16
+ if (ignore && ignore(kind, rel_path) === false) {
17
17
  return false;
18
18
  }
19
19
  if (kind === FILE) {
package/hash.js CHANGED
@@ -3,5 +3,5 @@ import utf8e from "@3-/utf8/utf8e.js";
3
3
 
4
4
  export default (str) => {
5
5
  const buf = utf8e(str);
6
- return buf.length <= 16 ? buf : createHash("md5").update(buf).digest();
6
+ return buf.length <= 16 ? buf : new Uint8Array(createHash("md5").update(buf).digest());
7
7
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@1-/scan",
3
- "version": "0.1.3",
3
+ "version": "0.1.4",
4
4
  "description": "Incrementally scan directory files and track metadata in SQLite / 增量扫描目录文件并使用 SQLite 记录元数据",
5
5
  "keywords": [
6
6
  "scan",
package/save.js CHANGED
@@ -5,15 +5,11 @@ export default (db, to_update, to_delete) => {
5
5
  trans(db, () => {
6
6
  if (to_update.length > 0) {
7
7
  const insert = db.prepare("INSERT OR REPLACE INTO file(hash,size,mtime)VALUES(?,?,?)");
8
- for (const [_, h, size, mtime] of to_update) {
9
- insert.run(h, size, mtime);
10
- }
8
+ to_update.forEach(([_, h, size, mtime]) => insert.run(h, size, mtime));
11
9
  }
12
10
  if (to_delete.length > 0) {
13
11
  const del = db.prepare("DELETE FROM file WHERE hash=?");
14
- for (const h of to_delete) {
15
- del.run(h);
16
- }
12
+ to_delete.forEach((h) => del.run(h));
17
13
  }
18
14
  });
19
15
  }