sftp-push-sync 2.5.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,15 +1,19 @@
1
1
  # Changelog
2
2
 
3
- ## [2.4.0] - 2026-03-04
3
+ ## [3.0.0] - 2026-03-04
4
+
5
+ - Switched from JSON-file based hash cache to NDJSON-based Cache-implementation.
6
+ - Disk-based, only active entries in RAM
7
+ - Scales to 100,000+ files without memory issues
8
+ - Auto-persist (no explicit saving required)
9
+ - Auto-migration - Existing JSON cache (.sync-cache.prod.json) is automatically migrated to LevelDB (.sync-cache-prod/)
10
+
11
+ ## [2.5.0] - 2026-03-04
4
12
 
5
13
  - Parallel remote walker walkers.mjs: scans 8 directories simultaneously
6
14
  - Batch analysis with concurrency compare.mjs: 8 file comparisons in parallel
7
15
  - Parallel hash calculation: local + remote hash simultaneously
8
- - Keep-alive: SftpPushSyncApp.mjs prevents server disconnection
9
-
10
- ## [2.3.0] - 2026-03-04
11
-
12
- - Keep-Alive enabled - a Keep-Alive packet is sent every 10 seconds.
16
+ - Keep-alive: SftpPushSyncApp.mjs prevents server disconnection. A Keep-Alive packet is sent every 10 seconds.
13
17
 
14
18
  ## [2.1.0] - 2025-11-19
15
19
 
package/README.md CHANGED
@@ -26,6 +26,11 @@ Features:
26
26
 
27
27
  The file `sftp-push-sync.mjs` is pure JavaScript (ESM), not TypeScript. Node.js can execute it directly as long as "type": "module" is specified in package.json or the file has the extension .mjs.
28
28
 
29
+ ## Breaking changes in 3.0.0
30
+
31
+ - New Cache Mechanism: NDJSON instead of JSON.
32
+ - The cache can now handle any number of files.
33
+
29
34
  ## Breaking changes in 2.0.0
30
35
 
31
36
  - The flags `--upload-list` / `--download-list` have been replaced by
@@ -253,12 +258,12 @@ However, it should also manage directories:
253
258
 
254
259
  ## Which files are created?
255
260
 
256
- - The cache files: `.sync-cache.*.json`
261
+ - The cache files: `.sync-cache.*.ndjson`. The old ones can be deleted: `.sync-cache.*.json`
257
262
  - The log file: `.sftp-push-sync.{target}.log` (Optional, overwritten with each run)
258
263
 
259
264
  You can safely delete the local cache at any time. The first analysis will then take longer, because remote hashes will be streamed again. After that, everything will run fast.
260
265
 
261
- Note 1: The first run always takes a while, especially with lots of media – so be patient! Once the cache is full, it will be faster.
266
+ Note 1: The first run always takes a while, especially with lots of media – so be patient! Once the cache is full, it will be faster.
262
267
  Note 2: Reliability and accuracy are more important to me than speed.
263
268
 
264
269
  ## Example Output
@@ -80,6 +80,7 @@ let DRY_RUN = false;
80
80
  let RUN_UPLOAD_LIST = false;
81
81
  let RUN_DOWNLOAD_LIST = false;
82
82
  let SKIP_SYNC = false;
83
+ let SIZE_ONLY = false;
83
84
  let cliLogLevel = null;
84
85
  let configPath = undefined;
85
86
 
@@ -92,6 +93,9 @@ for (let i = 0; i < rest.length; i += 1) {
92
93
  case "--dry-run":
93
94
  DRY_RUN = true;
94
95
  break;
96
+ case "--size-only":
97
+ SIZE_ONLY = true;
98
+ break;
95
99
  case "--sidecar-upload":
96
100
  RUN_UPLOAD_LIST = true;
97
101
  break;
@@ -196,4 +200,4 @@ main().catch((err) => {
196
200
  console.error(err);
197
201
  }
198
202
  process.exit(1);
199
- });
203
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sftp-push-sync",
3
- "version": "2.5.0",
3
+ "version": "3.0.0",
4
4
  "description": "SFTP sync tool for Hugo projects (local to remote, with hash cache)",
5
5
  "type": "module",
6
6
  "bin": {
@@ -17,7 +17,7 @@ import { SyncLogger } from "./SyncLogger.mjs";
17
17
  import { ScanProgressController } from "./ScanProgressController.mjs";
18
18
 
19
19
  import { toPosix, shortenPathForProgress } from "../helpers/directory.mjs";
20
- import { createHashCache } from "../helpers/hashing.mjs";
20
+ import { createHashCacheNDJSON, migrateFromJsonCache } from "../helpers/hash-cache-ndjson.mjs";
21
21
  import { walkLocal, walkRemote } from "../helpers/walkers.mjs";
22
22
  import {
23
23
  analyseDifferences,
@@ -188,6 +188,50 @@ export class SftpPushSyncApp {
188
188
  this._consoleAndLog("", ...msg);
189
189
  }
190
190
 
191
+ // ---------------------------------------------------------
192
+ // SFTP Connection Helpers
193
+ // ---------------------------------------------------------
194
+
195
+ /**
196
+ * Check if SFTP connection is still alive
197
+ */
198
+ async _isConnected(sftp) {
199
+ try {
200
+ // Try a minimal operation to check connection
201
+ await sftp.cwd();
202
+ return true;
203
+ } catch {
204
+ return false;
205
+ }
206
+ }
207
+
208
+ /**
209
+ * Reconnect to SFTP server
210
+ */
211
+ async _reconnect(sftp) {
212
+ try {
213
+ await sftp.end();
214
+ } catch {
215
+ // Ignore errors when closing dead connection
216
+ }
217
+
218
+ await sftp.connect({
219
+ host: this.connection.host,
220
+ port: this.connection.port,
221
+ username: this.connection.user,
222
+ password: this.connection.password,
223
+ keepaliveInterval: 10000,
224
+ keepaliveCountMax: 10,
225
+ readyTimeout: 30000,
226
+ });
227
+
228
+ if (sftp.client) {
229
+ sftp.client.setMaxListeners(50);
230
+ }
231
+
232
+ this.log(`${TAB_A}${pc.green("✔ Reconnected to SFTP.")}`);
233
+ }
234
+
191
235
  // ---------------------------------------------------------
192
236
  // Pattern-Helper
193
237
  // ---------------------------------------------------------
@@ -650,14 +694,20 @@ export class SftpPushSyncApp {
650
694
  ];
651
695
  this.autoExcluded = new Set();
652
696
 
653
- // Hash-Cache
654
- const syncCacheName =
655
- targetConfig.syncCache || `.sync-cache.${target}.json`;
656
- const cachePath = path.resolve(syncCacheName);
657
- this.hashCache = createHashCache({
658
- cachePath,
697
+ // Hash-Cache (NDJSON - human-readable, scales to 100k+ files)
698
+ const oldJsonCacheName = targetConfig.syncCache || `.sync-cache.${target}.json`;
699
+ const oldJsonCachePath = path.resolve(oldJsonCacheName);
700
+ const ndjsonCachePath = path.resolve(`.sync-cache.${target}.ndjson`);
701
+
702
+ // Migrate from old JSON cache if exists
703
+ const migration = await migrateFromJsonCache(oldJsonCachePath, ndjsonCachePath, target);
704
+ if (migration.migrated) {
705
+ console.log(pc.green(` ✔ Migrated ${migration.localCount + migration.remoteCount} cache entries from JSON to NDJSON`));
706
+ }
707
+
708
+ this.hashCache = await createHashCacheNDJSON({
709
+ cachePath: ndjsonCachePath,
659
710
  namespace: target,
660
- flushInterval: 50,
661
711
  });
662
712
 
663
713
  // Logger
@@ -736,12 +786,12 @@ export class SftpPushSyncApp {
736
786
  readyTimeout: 30000, // 30s timeout for initial connection
737
787
  });
738
788
  connected = true;
739
-
789
+
740
790
  // Increase max listeners for parallel operations
741
791
  if (sftp.client) {
742
792
  sftp.client.setMaxListeners(50);
743
793
  }
744
-
794
+
745
795
  this.log(`${TAB_A}${pc.green("✔ Connected to SFTP.")}`);
746
796
 
747
797
  if (!skipSync && !fs.existsSync(this.connection.localRoot)) {
@@ -850,7 +900,7 @@ export class SftpPushSyncApp {
850
900
  // Phase 3 – Analyse Differences (delegiert an Helper)
851
901
  this.log(pc.bold(pc.cyan("🔎 Phase 3: Compare & Decide …")));
852
902
 
853
- const { getLocalHash, getRemoteHash, save: saveCache } = this.hashCache;
903
+ const { getLocalHash, getRemoteHash } = this.hashCache;
854
904
 
855
905
  const diffResult = await analyseDifferences({
856
906
  local,
@@ -885,6 +935,12 @@ export class SftpPushSyncApp {
885
935
  this.log("");
886
936
  this.log(pc.bold(pc.cyan("🧹 Phase 4: Removing orphaned remote files …")));
887
937
 
938
+ // Reconnect if connection was lost during analysis
939
+ if (!await this._isConnected(sftp)) {
940
+ this.log(`${TAB_A}${pc.yellow("⚠ Connection lost, reconnecting…")}`);
941
+ await this._reconnect(sftp);
942
+ }
943
+
888
944
  toDelete = computeRemoteDeletes({ local, remote });
889
945
 
890
946
  if (toDelete.length === 0) {
@@ -899,6 +955,13 @@ export class SftpPushSyncApp {
899
955
  if (!dryRun && (toAdd.length || toUpdate.length)) {
900
956
  this.log("");
901
957
  this.log(pc.bold(pc.cyan("📁 Preparing remote directories …")));
958
+
959
+ // Ensure connection before directory operations
960
+ if (!await this._isConnected(sftp)) {
961
+ this.log(`${TAB_A}${pc.yellow("⚠ Connection lost, reconnecting…")}`);
962
+ await this._reconnect(sftp);
963
+ }
964
+
902
965
  await this.ensureAllRemoteDirsExist(
903
966
  sftp,
904
967
  this.connection.remoteRoot,
@@ -912,6 +975,12 @@ export class SftpPushSyncApp {
912
975
  this.log("");
913
976
  this.log(pc.bold(pc.cyan("🚚 Phase 5: Apply changes …")));
914
977
 
978
+ // Ensure fresh connection before uploads
979
+ if (!await this._isConnected(sftp)) {
980
+ this.log(`${TAB_A}${pc.yellow("⚠ Connection lost, reconnecting…")}`);
981
+ await this._reconnect(sftp);
982
+ }
983
+
915
984
  // Upload new files
916
985
  await this.runTasks(
917
986
  toAdd,
@@ -976,13 +1045,21 @@ export class SftpPushSyncApp {
976
1045
  this.log(
977
1046
  pc.bold(pc.cyan("🧹 Cleaning up empty remote directories …"))
978
1047
  );
1048
+
1049
+ // Ensure connection before cleanup
1050
+ if (!await this._isConnected(sftp)) {
1051
+ this.log(`${TAB_A}${pc.yellow("⚠ Connection lost, reconnecting…")}`);
1052
+ await this._reconnect(sftp);
1053
+ }
1054
+
979
1055
  await this.cleanupEmptyDirs(sftp, this.connection.remoteRoot, dryRun);
980
1056
  }
981
1057
 
982
1058
  const duration = ((Date.now() - start) / 1000).toFixed(2);
983
1059
 
984
- // Cache am Ende sicher schreiben
985
- await saveCache(true);
1060
+ // Save cache and close
1061
+ await this.hashCache.save();
1062
+ await this.hashCache.close();
986
1063
 
987
1064
  // Summary
988
1065
  this.log(hr1());
@@ -1039,9 +1116,9 @@ export class SftpPushSyncApp {
1039
1116
  }
1040
1117
  process.exitCode = 1;
1041
1118
  try {
1042
- // falls hashCache existiert, Cache noch flushen
1043
- if (this.hashCache?.save) {
1044
- await this.hashCache.save(true);
1119
+ // falls hashCache existiert, Cache schließen
1120
+ if (this.hashCache?.close) {
1121
+ await this.hashCache.close();
1045
1122
  }
1046
1123
  } catch {
1047
1124
  // ignore
@@ -1067,4 +1144,4 @@ export class SftpPushSyncApp {
1067
1144
  }
1068
1145
  }
1069
1146
  }
1070
- }
1147
+ }
@@ -1,16 +1,16 @@
1
1
  /**
2
2
  * compare.mjs
3
- *
3
+ *
4
4
  * @author Carsten Nichte, 2025 / https://carsten-nichte.de/
5
- *
6
- */
5
+ *
6
+ */
7
7
  // src/helpers/compare.mjs
8
8
  import fsp from "fs/promises";
9
9
  import path from "path";
10
10
 
11
11
  /**
12
12
  * Analysiert Unterschiede zwischen local- und remote-Maps.
13
- * Optimiert: Parallelisierte Analyse mit Concurrency-Limit.
13
+ * Optimiert: Echtes Batch-Processing mit Concurrency-Limit.
14
14
  *
15
15
  * Erwartete Struktur:
16
16
  * local: Map<rel, { rel, localPath, size, mtimeMs, isText? }>
@@ -22,7 +22,7 @@ import path from "path";
22
22
  * - getLocalHash / getRemoteHash: from createHashCache
23
23
  * - analyzeChunk: Progress-Schrittgröße
24
24
  * - updateProgress(prefix, current, total, rel): optional
25
- * - concurrency: Max parallele Vergleiche (default: 8)
25
+ * - concurrency: Max parallele Vergleiche (default: 5)
26
26
  */
27
27
  export async function analyseDifferences({
28
28
  local,
@@ -33,7 +33,7 @@ export async function analyseDifferences({
33
33
  getRemoteHash,
34
34
  analyzeChunk = 10,
35
35
  updateProgress,
36
- concurrency = 5,
36
+ concurrency = 10,
37
37
  }) {
38
38
  const toAdd = [];
39
39
  const toUpdate = [];
@@ -42,104 +42,97 @@ export async function analyseDifferences({
42
42
  const totalToCheck = localKeys.length;
43
43
  let checked = 0;
44
44
 
45
- // Schneller Vorab-Check: Dateien nur lokal → direkt zu toAdd
46
- const keysToCompare = [];
45
+ // Phase 1: Schneller Vorab-Check ohne SFTP
46
+ // - Dateien nur lokal → direkt zu toAdd
47
+ // - Size-Vergleich für existierende Dateien
48
+ const keysNeedContentCompare = [];
49
+
47
50
  for (const rel of localKeys) {
51
+ const l = local.get(rel);
48
52
  const r = remote.get(rel);
49
53
  const remotePath = path.posix.join(remoteRoot, rel);
50
-
54
+
51
55
  if (!r) {
52
56
  // Datei existiert nur lokal → New (kein SFTP-Call nötig)
53
- toAdd.push({ rel, local: local.get(rel), remotePath });
54
- checked++;
55
- if (updateProgress && checked % analyzeChunk === 0) {
56
- updateProgress("Analyse: ", checked, totalToCheck, rel);
57
- }
57
+ toAdd.push({ rel, local: l, remotePath });
58
+ } else if (l.size !== r.size) {
59
+ // Size unterschiedlich Changed (kein SFTP-Call nötig)
60
+ toUpdate.push({ rel, local: l, remote: r, remotePath });
58
61
  } else {
59
- keysToCompare.push(rel);
62
+ // Size gleich → Content-Vergleich nötig
63
+ keysNeedContentCompare.push(rel);
60
64
  }
61
- }
62
-
63
- // Parallele Verarbeitung mit Semaphore
64
- let activeCount = 0;
65
- const waiting = [];
66
-
67
- async function acquireSemaphore() {
68
- if (activeCount < concurrency) {
69
- activeCount++;
70
- return;
71
- }
72
- await new Promise((resolve) => waiting.push(resolve));
73
- activeCount++;
74
- }
75
65
 
76
- function releaseSemaphore() {
77
- activeCount--;
78
- if (waiting.length > 0) {
79
- const next = waiting.shift();
80
- next();
66
+ checked++;
67
+ if (updateProgress && checked % analyzeChunk === 0) {
68
+ updateProgress("Analyse (Size): ", checked, totalToCheck, rel);
81
69
  }
82
70
  }
83
71
 
84
- async function compareFile(rel) {
85
- await acquireSemaphore();
86
- try {
87
- const l = local.get(rel);
88
- const r = remote.get(rel);
89
- const remotePath = path.posix.join(remoteRoot, rel);
90
-
91
- // 1. Size-Vergleich (schnell, kein SFTP)
92
- if (l.size !== r.size) {
93
- toUpdate.push({ rel, local: l, remote: r, remotePath });
94
- return;
95
- }
96
-
97
- // 2. Content-Vergleich
98
- if (l.isText) {
99
- // Text-Datei: vollständiger inhaltlicher Vergleich
100
- const [localBuf, remoteBuf] = await Promise.all([
101
- fsp.readFile(l.localPath),
102
- sftp.get(r.remotePath),
103
- ]);
104
-
105
- const localStr = localBuf.toString("utf8");
106
- const remoteStr = (
107
- Buffer.isBuffer(remoteBuf) ? remoteBuf : Buffer.from(remoteBuf)
108
- ).toString("utf8");
109
-
110
- if (localStr !== remoteStr) {
111
- toUpdate.push({ rel, local: l, remote: r, remotePath });
112
- }
113
- } else {
114
- // Binary: Hash-Vergleich mit Cache
115
- if (!getLocalHash || !getRemoteHash) {
116
- toUpdate.push({ rel, local: l, remote: r, remotePath });
117
- return;
72
+ // Phase 2: Content-Vergleich in echten Batches
73
+ // Nur für Dateien mit gleicher Size
74
+ const totalContentCompare = keysNeedContentCompare.length;
75
+
76
+ for (let i = 0; i < totalContentCompare; i += concurrency) {
77
+ const batch = keysNeedContentCompare.slice(i, i + concurrency);
78
+
79
+ const batchResults = await Promise.all(
80
+ batch.map(async (rel) => {
81
+ const l = local.get(rel);
82
+ const r = remote.get(rel);
83
+ const remotePath = path.posix.join(remoteRoot, rel);
84
+
85
+ try {
86
+ if (l.isText) {
87
+ // Text-Datei: vollständiger inhaltlicher Vergleich
88
+ const [localBuf, remoteBuf] = await Promise.all([
89
+ fsp.readFile(l.localPath),
90
+ sftp.get(r.remotePath),
91
+ ]);
92
+
93
+ const localStr = localBuf.toString("utf8");
94
+ const remoteStr = (
95
+ Buffer.isBuffer(remoteBuf) ? remoteBuf : Buffer.from(remoteBuf)
96
+ ).toString("utf8");
97
+
98
+ return localStr !== remoteStr
99
+ ? { rel, local: l, remote: r, remotePath, changed: true }
100
+ : null;
101
+ } else {
102
+ // Binary: Hash-Vergleich mit Cache
103
+ if (!getLocalHash || !getRemoteHash) {
104
+ return { rel, local: l, remote: r, remotePath, changed: true };
105
+ }
106
+
107
+ const [localHash, remoteHash] = await Promise.all([
108
+ getLocalHash(rel, l),
109
+ getRemoteHash(rel, r, sftp),
110
+ ]);
111
+
112
+ return localHash !== remoteHash
113
+ ? { rel, local: l, remote: r, remotePath, changed: true }
114
+ : null;
115
+ }
116
+ } catch (err) {
117
+ // Bei Fehler als changed markieren (sicherer)
118
+ return { rel, local: l, remote: r, remotePath, changed: true };
118
119
  }
120
+ })
121
+ );
119
122
 
120
- const [localHash, remoteHash] = await Promise.all([
121
- getLocalHash(rel, l),
122
- getRemoteHash(rel, r, sftp),
123
- ]);
124
-
125
- if (localHash !== remoteHash) {
126
- toUpdate.push({ rel, local: l, remote: r, remotePath });
127
- }
128
- }
129
- } finally {
130
- releaseSemaphore();
131
- checked++;
132
- if (
133
- updateProgress &&
134
- (checked === 1 || checked % analyzeChunk === 0 || checked === totalToCheck)
135
- ) {
136
- updateProgress("Analyse: ", checked, totalToCheck, rel);
123
+ // Ergebnisse sammeln
124
+ for (const result of batchResults) {
125
+ if (result && result.changed) {
126
+ toUpdate.push({ rel: result.rel, local: result.local, remote: result.remote, remotePath: result.remotePath });
137
127
  }
138
128
  }
139
- }
140
129
 
141
- // Starte alle Vergleiche parallel (mit Concurrency-Limit durch Semaphore)
142
- await Promise.all(keysToCompare.map(compareFile));
130
+ // Progress update
131
+ const progressCount = Math.min(i + batch.length, totalContentCompare);
132
+ if (updateProgress) {
133
+ updateProgress("Analyse (Content): ", checked + progressCount, totalToCheck + totalContentCompare, batch[batch.length - 1]);
134
+ }
135
+ }
143
136
 
144
137
  return { toAdd, toUpdate };
145
138
  }
@@ -160,4 +153,4 @@ export function computeRemoteDeletes({ local, remote }) {
160
153
  }
161
154
 
162
155
  return toDelete;
163
- }
156
+ }
@@ -0,0 +1,299 @@
1
+ /**
2
+ * hash-cache-leveldb.mjs
3
+ *
4
+ * LevelDB-based hash cache for efficient storage of large file sets.
5
+ * Unlike the JSON-based cache, this scales to 100k+ files without memory issues.
6
+ *
7
+ * @author Carsten Nichte, 2025 / https://carsten-nichte.de/
8
+ */
9
+ import { Level } from "level";
10
+ import fs from "fs";
11
+ import fsp from "fs/promises";
12
+ import { createHash } from "crypto";
13
+ import { Writable } from "stream";
14
+
15
+ /**
16
+ * Streaming-SHA256 für lokale Datei
17
+ */
18
+ export function hashLocalFile(filePath) {
19
+ return new Promise((resolve, reject) => {
20
+ const hash = createHash("sha256");
21
+ const stream = fs.createReadStream(filePath);
22
+ stream.on("error", reject);
23
+ stream.on("data", (chunk) => hash.update(chunk));
24
+ stream.on("end", () => resolve(hash.digest("hex")));
25
+ });
26
+ }
27
+
28
+ /**
29
+ * Streaming-SHA256 für Remote-Datei via ssh2-sftp-client
30
+ */
31
+ export async function hashRemoteFile(sftp, remotePath) {
32
+ const hash = createHash("sha256");
33
+
34
+ const writable = new Writable({
35
+ write(chunk, enc, cb) {
36
+ hash.update(chunk);
37
+ cb();
38
+ },
39
+ });
40
+
41
+ await sftp.get(remotePath, writable);
42
+ return hash.digest("hex");
43
+ }
44
+
45
+ /**
46
+ * Creates a LevelDB-based hash cache.
47
+ *
48
+ * Database structure:
49
+ * local:<namespace>:<relPath> → JSON { size, mtimeMs, hash }
50
+ * remote:<namespace>:<relPath> → JSON { size, modifyTime, hash }
51
+ *
52
+ * @param {Object} options
53
+ * @param {string} options.cachePath - Path to the LevelDB directory (e.g., ".sync-cache-prod")
54
+ * @param {string} options.namespace - Namespace for keys (e.g., "prod")
55
+ */
56
+ export async function createHashCacheLevelDB({ cachePath, namespace }) {
57
+ const ns = namespace || "default";
58
+
59
+ // Open or create the LevelDB database
60
+ const db = new Level(cachePath, { valueEncoding: "json" });
61
+ await db.open();
62
+
63
+ function localKey(relPath) {
64
+ return `local:${ns}:${relPath}`;
65
+ }
66
+
67
+ function remoteKey(relPath) {
68
+ return `remote:${ns}:${relPath}`;
69
+ }
70
+
71
+ /**
72
+ * Get cached local hash or compute and store it
73
+ */
74
+ async function getLocalHash(rel, meta) {
75
+ const key = localKey(rel);
76
+
77
+ try {
78
+ const cached = await db.get(key);
79
+
80
+ // Cache hit: check if still valid (same size + mtime)
81
+ if (
82
+ cached &&
83
+ cached.size === meta.size &&
84
+ cached.mtimeMs === meta.mtimeMs &&
85
+ cached.hash
86
+ ) {
87
+ return cached.hash;
88
+ }
89
+ } catch (err) {
90
+ // Key not found - that's fine, we'll compute the hash
91
+ if (err.code !== "LEVEL_NOT_FOUND") {
92
+ throw err;
93
+ }
94
+ }
95
+
96
+ // Compute hash and store
97
+ const hash = await hashLocalFile(meta.localPath);
98
+ await db.put(key, {
99
+ size: meta.size,
100
+ mtimeMs: meta.mtimeMs,
101
+ hash,
102
+ });
103
+
104
+ return hash;
105
+ }
106
+
107
+ /**
108
+ * Get cached remote hash or compute and store it
109
+ */
110
+ async function getRemoteHash(rel, meta, sftp) {
111
+ const key = remoteKey(rel);
112
+
113
+ try {
114
+ const cached = await db.get(key);
115
+
116
+ // Cache hit: check if still valid (same size + modifyTime)
117
+ if (
118
+ cached &&
119
+ cached.size === meta.size &&
120
+ cached.modifyTime === meta.modifyTime &&
121
+ cached.hash
122
+ ) {
123
+ return cached.hash;
124
+ }
125
+ } catch (err) {
126
+ // Key not found - compute the hash
127
+ if (err.code !== "LEVEL_NOT_FOUND") {
128
+ throw err;
129
+ }
130
+ }
131
+
132
+ // Compute hash (downloads file content for hashing)
133
+ const hash = await hashRemoteFile(sftp, meta.remotePath);
134
+ await db.put(key, {
135
+ size: meta.size,
136
+ modifyTime: meta.modifyTime,
137
+ hash,
138
+ });
139
+
140
+ return hash;
141
+ }
142
+
143
+ /**
144
+ * Explicitly save (flush) - LevelDB auto-persists, but this ensures sync
145
+ */
146
+ async function save() {
147
+ // LevelDB auto-persists, nothing to do
148
+ }
149
+
150
+ /**
151
+ * Close the database connection
152
+ */
153
+ async function close() {
154
+ await db.close();
155
+ }
156
+
157
+ /**
158
+ * Get statistics about cache contents
159
+ */
160
+ async function getStats() {
161
+ let localCount = 0;
162
+ let remoteCount = 0;
163
+
164
+ for await (const key of db.keys()) {
165
+ if (key.startsWith(`local:${ns}:`)) {
166
+ localCount++;
167
+ } else if (key.startsWith(`remote:${ns}:`)) {
168
+ remoteCount++;
169
+ }
170
+ }
171
+
172
+ return { localCount, remoteCount };
173
+ }
174
+
175
+ /**
176
+ * Clean up stale entries (entries for files that no longer exist)
177
+ * @param {Set<string>} validLocalPaths - Set of currently existing local file paths
178
+ * @param {Set<string>} validRemotePaths - Set of currently existing remote file paths
179
+ */
180
+ async function cleanup(validLocalPaths, validRemotePaths) {
181
+ const batch = db.batch();
182
+ let deletedCount = 0;
183
+
184
+ for await (const key of db.keys()) {
185
+ if (key.startsWith(`local:${ns}:`)) {
186
+ const relPath = key.slice(`local:${ns}:`.length);
187
+ if (!validLocalPaths.has(relPath)) {
188
+ batch.del(key);
189
+ deletedCount++;
190
+ }
191
+ } else if (key.startsWith(`remote:${ns}:`)) {
192
+ const relPath = key.slice(`remote:${ns}:`.length);
193
+ if (!validRemotePaths.has(relPath)) {
194
+ batch.del(key);
195
+ deletedCount++;
196
+ }
197
+ }
198
+ }
199
+
200
+ await batch.write();
201
+ return deletedCount;
202
+ }
203
+
204
+ return {
205
+ getLocalHash,
206
+ getRemoteHash,
207
+ save,
208
+ close,
209
+ getStats,
210
+ cleanup,
211
+ db, // Expose for advanced usage
212
+ };
213
+ }
214
+
215
+ /**
216
+ * Migrate from old JSON cache to LevelDB
217
+ * @param {string} jsonCachePath - Path to old .sync-cache.json file
218
+ * @param {string} levelDbPath - Path to new LevelDB directory
219
+ * @param {string} namespace - Namespace for keys
220
+ */
221
+ export async function migrateFromJsonCache(jsonCachePath, levelDbPath, namespace) {
222
+ const ns = namespace || "default";
223
+
224
+ // Check if JSON cache exists
225
+ try {
226
+ await fsp.access(jsonCachePath);
227
+ } catch {
228
+ return { migrated: false, reason: "No JSON cache found" };
229
+ }
230
+
231
+ // Check if LevelDB already has data (don't re-migrate)
232
+ const db = new Level(levelDbPath, { valueEncoding: "json" });
233
+ await db.open();
234
+
235
+ let hasData = false;
236
+ for await (const _ of db.keys({ limit: 1 })) {
237
+ hasData = true;
238
+ break;
239
+ }
240
+
241
+ if (hasData) {
242
+ await db.close();
243
+ return { migrated: false, reason: "LevelDB already has data" };
244
+ }
245
+
246
+ // Read and parse JSON cache
247
+ let jsonCache;
248
+ try {
249
+ const raw = await fsp.readFile(jsonCachePath, "utf8");
250
+ jsonCache = JSON.parse(raw);
251
+ } catch (parseErr) {
252
+ await db.close();
253
+ // Rename corrupt file so it doesn't block future runs
254
+ try {
255
+ await fsp.rename(jsonCachePath, jsonCachePath + ".corrupt");
256
+ } catch {
257
+ // Ignore rename errors
258
+ }
259
+ return { migrated: false, reason: `JSON cache corrupt: ${parseErr.message}` };
260
+ }
261
+
262
+ // Migrate entries
263
+ const batch = db.batch();
264
+ let localCount = 0;
265
+ let remoteCount = 0;
266
+
267
+ if (jsonCache.local) {
268
+ for (const [key, value] of Object.entries(jsonCache.local)) {
269
+ // Keys in JSON were like "namespace:relPath"
270
+ const relPath = key.startsWith(`${ns}:`) ? key.slice(ns.length + 1) : key;
271
+ batch.put(`local:${ns}:${relPath}`, value);
272
+ localCount++;
273
+ }
274
+ }
275
+
276
+ if (jsonCache.remote) {
277
+ for (const [key, value] of Object.entries(jsonCache.remote)) {
278
+ const relPath = key.startsWith(`${ns}:`) ? key.slice(ns.length + 1) : key;
279
+ batch.put(`remote:${ns}:${relPath}`, value);
280
+ remoteCount++;
281
+ }
282
+ }
283
+
284
+ await batch.write();
285
+ await db.close();
286
+
287
+ // Optionally rename old cache to .bak
288
+ try {
289
+ await fsp.rename(jsonCachePath, jsonCachePath + ".bak");
290
+ } catch {
291
+ // Ignore rename errors
292
+ }
293
+
294
+ return {
295
+ migrated: true,
296
+ localCount,
297
+ remoteCount,
298
+ };
299
+ }
@@ -0,0 +1,412 @@
1
+ /**
2
+ * hash-cache-ndjson.mjs
3
+ *
4
+ * NDJSON-based hash cache for efficient storage of large file sets.
5
+ * Each line is a valid JSON object - human-readable and easy to debug.
6
+ * Scales to 100k+ files through streaming reads/writes.
7
+ *
8
+ * File format:
9
+ * {"t":"l","p":"prod:path/file.js","s":1234,"m":1234567890,"h":"sha256..."}
10
+ * {"t":"r","p":"prod:path/file.js","s":1234,"m":"2025-01-01","h":"sha256..."}
11
+ *
12
+ * Where: t=type (l=local, r=remote), p=path, s=size, m=mtime, h=hash
13
+ *
14
+ * @author Carsten Nichte, 2025 / https://carsten-nichte.de/
15
+ */
16
+ import fs from "fs";
17
+ import fsp from "fs/promises";
18
+ import { createHash } from "crypto";
19
+ import { createReadStream, createWriteStream } from "fs";
20
+ import { createInterface } from "readline";
21
+ import { Writable } from "stream";
22
+
23
+ /**
24
+ * Streaming-SHA256 für lokale Datei
25
+ */
26
+ export function hashLocalFile(filePath) {
27
+ return new Promise((resolve, reject) => {
28
+ const hash = createHash("sha256");
29
+ const stream = fs.createReadStream(filePath);
30
+ stream.on("error", reject);
31
+ stream.on("data", (chunk) => hash.update(chunk));
32
+ stream.on("end", () => resolve(hash.digest("hex")));
33
+ });
34
+ }
35
+
36
+ /**
37
+ * Streaming-SHA256 für Remote-Datei via ssh2-sftp-client
38
+ */
39
+ export async function hashRemoteFile(sftp, remotePath) {
40
+ const hash = createHash("sha256");
41
+
42
+ const writable = new Writable({
43
+ write(chunk, enc, cb) {
44
+ hash.update(chunk);
45
+ cb();
46
+ },
47
+ });
48
+
49
+ await sftp.get(remotePath, writable);
50
+ return hash.digest("hex");
51
+ }
52
+
53
+ /**
54
+ * Creates an NDJSON-based hash cache.
55
+ *
56
+ * Data is stored in Maps during runtime and persisted as NDJSON on save().
57
+ * Auto-saves every 1000 changes to prevent data loss on crash/abort.
58
+ *
59
+ * @param {Object} options
60
+ * @param {string} options.cachePath - Path to the NDJSON file (e.g., ".sync-cache.prod.ndjson")
61
+ * @param {string} options.namespace - Namespace for keys (e.g., "prod")
62
+ * @param {number} options.autoSaveInterval - Save after this many changes (default: 1000)
63
+ */
64
+ export async function createHashCacheNDJSON({ cachePath, namespace, autoSaveInterval = 1000 }) {
65
+ const ns = namespace || "default";
66
+
67
+ // In-memory storage
68
+ const localCache = new Map();
69
+ const remoteCache = new Map();
70
+
71
+ // Auto-save tracking
72
+ let changesSinceLastSave = 0;
73
+ let saveInProgress = false;
74
+
75
+ // Load existing cache if present
76
+ await loadCache();
77
+
78
+ /**
79
+ * Load cache from NDJSON file
80
+ */
81
+ async function loadCache() {
82
+ try {
83
+ await fsp.access(cachePath);
84
+ } catch {
85
+ // File doesn't exist - start fresh
86
+ return;
87
+ }
88
+
89
+ const fileStream = createReadStream(cachePath, { encoding: "utf8" });
90
+ const rl = createInterface({ input: fileStream, crlfDelay: Infinity });
91
+
92
+ let lineNum = 0;
93
+ for await (const line of rl) {
94
+ lineNum++;
95
+ if (!line.trim()) continue;
96
+
97
+ try {
98
+ const entry = JSON.parse(line);
99
+ const key = `${ns}:${entry.p}`;
100
+
101
+ if (entry.t === "l") {
102
+ localCache.set(key, {
103
+ size: entry.s,
104
+ mtimeMs: entry.m,
105
+ hash: entry.h,
106
+ });
107
+ } else if (entry.t === "r") {
108
+ remoteCache.set(key, {
109
+ size: entry.s,
110
+ modifyTime: entry.m,
111
+ hash: entry.h,
112
+ });
113
+ }
114
+ } catch (parseErr) {
115
+ // Skip corrupt lines, log warning
116
+ console.warn(` ⚠ Skipping corrupt cache line ${lineNum}: ${parseErr.message}`);
117
+ }
118
+ }
119
+ }
120
+
121
+ function localKey(relPath) {
122
+ return `${ns}:${relPath}`;
123
+ }
124
+
125
+ function remoteKey(relPath) {
126
+ return `${ns}:${relPath}`;
127
+ }
128
+
129
+ /**
130
+ * Check if auto-save is needed and perform it
131
+ */
132
+ async function checkAutoSave() {
133
+ changesSinceLastSave++;
134
+ if (changesSinceLastSave >= autoSaveInterval && !saveInProgress) {
135
+ saveInProgress = true;
136
+ try {
137
+ await save();
138
+ changesSinceLastSave = 0;
139
+ } finally {
140
+ saveInProgress = false;
141
+ }
142
+ }
143
+ }
144
+
145
+ /**
146
+ * Get cached local hash or compute and store it
147
+ */
148
+ async function getLocalHash(rel, meta) {
149
+ const key = localKey(rel);
150
+ const cached = localCache.get(key);
151
+
152
+ // Cache hit: check if still valid (same size + mtime)
153
+ if (
154
+ cached &&
155
+ cached.size === meta.size &&
156
+ Math.abs(cached.mtimeMs - meta.mtimeMs) < 1000
157
+ ) {
158
+ return cached.hash;
159
+ }
160
+
161
+ // Cache miss or stale: compute new hash
162
+ const filePath = meta.fullPath || meta.localPath;
163
+ const hash = await hashLocalFile(filePath);
164
+
165
+ localCache.set(key, {
166
+ size: meta.size,
167
+ mtimeMs: meta.mtimeMs,
168
+ hash,
169
+ });
170
+
171
+ // Auto-save periodically
172
+ await checkAutoSave();
173
+
174
+ return hash;
175
+ }
176
+
177
+ /**
178
+ * Get cached remote hash or compute and store it
179
+ */
180
+ async function getRemoteHash(rel, meta, sftp) {
181
+ const key = remoteKey(rel);
182
+ const cached = remoteCache.get(key);
183
+
184
+ // Cache hit: check if still valid (same size + modifyTime)
185
+ if (
186
+ cached &&
187
+ cached.size === meta.size &&
188
+ cached.modifyTime === meta.modifyTime
189
+ ) {
190
+ return cached.hash;
191
+ }
192
+
193
+ // Cache miss or stale: compute new hash
194
+ const filePath = meta.fullPath || meta.remotePath;
195
+ const hash = await hashRemoteFile(sftp, filePath);
196
+
197
+ remoteCache.set(key, {
198
+ size: meta.size,
199
+ modifyTime: meta.modifyTime,
200
+ hash,
201
+ });
202
+
203
+ // Auto-save periodically
204
+ await checkAutoSave();
205
+
206
+ return hash;
207
+ }
208
+
209
+ /**
210
+ * Save cache to NDJSON file using streaming writes
211
+ */
212
+ async function save() {
213
+ const tempPath = cachePath + ".tmp";
214
+ const writeStream = createWriteStream(tempPath, { encoding: "utf8" });
215
+
216
+ // Write local entries
217
+ for (const [fullKey, value] of localCache) {
218
+ // Extract relPath from "namespace:relPath"
219
+ const relPath = fullKey.startsWith(`${ns}:`)
220
+ ? fullKey.slice(ns.length + 1)
221
+ : fullKey;
222
+
223
+ const line = JSON.stringify({
224
+ t: "l",
225
+ p: relPath,
226
+ s: value.size,
227
+ m: value.mtimeMs,
228
+ h: value.hash,
229
+ });
230
+ writeStream.write(line + "\n");
231
+ }
232
+
233
+ // Write remote entries
234
+ for (const [fullKey, value] of remoteCache) {
235
+ const relPath = fullKey.startsWith(`${ns}:`)
236
+ ? fullKey.slice(ns.length + 1)
237
+ : fullKey;
238
+
239
+ const line = JSON.stringify({
240
+ t: "r",
241
+ p: relPath,
242
+ s: value.size,
243
+ m: value.modifyTime,
244
+ h: value.hash,
245
+ });
246
+ writeStream.write(line + "\n");
247
+ }
248
+
249
+ // Wait for stream to finish
250
+ await new Promise((resolve, reject) => {
251
+ writeStream.on("finish", resolve);
252
+ writeStream.on("error", reject);
253
+ writeStream.end();
254
+ });
255
+
256
+ // Atomic rename
257
+ await fsp.rename(tempPath, cachePath);
258
+ }
259
+
260
+ /**
261
+ * Close the cache (no-op for NDJSON, but keeps API compatible)
262
+ */
263
+ async function close() {
264
+ // Nothing to do - Maps are garbage collected
265
+ }
266
+
267
+ /**
268
+ * Get cache statistics
269
+ */
270
+ function getStats() {
271
+ return {
272
+ localEntries: localCache.size,
273
+ remoteEntries: remoteCache.size,
274
+ totalEntries: localCache.size + remoteCache.size,
275
+ };
276
+ }
277
+
278
+ /**
279
+ * Remove entries for files that no longer exist
280
+ * @param {Set<string>} currentLocalFiles - Set of current local relative paths
281
+ * @param {Set<string>} currentRemoteFiles - Set of current remote relative paths
282
+ */
283
+ async function cleanup(currentLocalFiles, currentRemoteFiles) {
284
+ let deletedCount = 0;
285
+
286
+ // Clean local entries
287
+ for (const key of localCache.keys()) {
288
+ const relPath = key.startsWith(`${ns}:`) ? key.slice(ns.length + 1) : key;
289
+ if (!currentLocalFiles.has(relPath)) {
290
+ localCache.delete(key);
291
+ deletedCount++;
292
+ }
293
+ }
294
+
295
+ // Clean remote entries
296
+ for (const key of remoteCache.keys()) {
297
+ const relPath = key.startsWith(`${ns}:`) ? key.slice(ns.length + 1) : key;
298
+ if (!currentRemoteFiles.has(relPath)) {
299
+ remoteCache.delete(key);
300
+ deletedCount++;
301
+ }
302
+ }
303
+
304
+ return deletedCount;
305
+ }
306
+
307
+ return {
308
+ getLocalHash,
309
+ getRemoteHash,
310
+ save,
311
+ close,
312
+ getStats,
313
+ cleanup,
314
+ };
315
+ }
316
+
317
+ /**
318
+ * Migrate from old JSON cache to NDJSON
319
+ * @param {string} jsonCachePath - Path to old .sync-cache.json file
320
+ * @param {string} ndjsonPath - Path to new .ndjson file
321
+ * @param {string} namespace - Namespace for keys
322
+ */
323
+ export async function migrateFromJsonCache(jsonCachePath, ndjsonPath, namespace) {
324
+ const ns = namespace || "default";
325
+
326
+ // Check if JSON cache exists
327
+ try {
328
+ await fsp.access(jsonCachePath);
329
+ } catch {
330
+ return { migrated: false, reason: "No JSON cache found" };
331
+ }
332
+
333
+ // Check if NDJSON already exists (don't re-migrate)
334
+ try {
335
+ const stats = await fsp.stat(ndjsonPath);
336
+ if (stats.size > 0) {
337
+ return { migrated: false, reason: "NDJSON cache already exists" };
338
+ }
339
+ } catch {
340
+ // File doesn't exist - proceed with migration
341
+ }
342
+
343
+ // Read and parse JSON cache
344
+ let jsonCache;
345
+ try {
346
+ const raw = await fsp.readFile(jsonCachePath, "utf8");
347
+ jsonCache = JSON.parse(raw);
348
+ } catch (parseErr) {
349
+ // Rename corrupt file
350
+ try {
351
+ await fsp.rename(jsonCachePath, jsonCachePath + ".corrupt");
352
+ } catch {
353
+ // Ignore rename errors
354
+ }
355
+ return { migrated: false, reason: `JSON cache corrupt: ${parseErr.message}` };
356
+ }
357
+
358
+ // Write to NDJSON
359
+ const writeStream = createWriteStream(ndjsonPath, { encoding: "utf8" });
360
+ let localCount = 0;
361
+ let remoteCount = 0;
362
+
363
+ if (jsonCache.local) {
364
+ for (const [key, value] of Object.entries(jsonCache.local)) {
365
+ // Keys in JSON were like "namespace:relPath"
366
+ const relPath = key.startsWith(`${ns}:`) ? key.slice(ns.length + 1) : key;
367
+ const line = JSON.stringify({
368
+ t: "l",
369
+ p: relPath,
370
+ s: value.size,
371
+ m: value.mtimeMs,
372
+ h: value.hash,
373
+ });
374
+ writeStream.write(line + "\n");
375
+ localCount++;
376
+ }
377
+ }
378
+
379
+ if (jsonCache.remote) {
380
+ for (const [key, value] of Object.entries(jsonCache.remote)) {
381
+ const relPath = key.startsWith(`${ns}:`) ? key.slice(ns.length + 1) : key;
382
+ const line = JSON.stringify({
383
+ t: "r",
384
+ p: relPath,
385
+ s: value.size,
386
+ m: value.modifyTime,
387
+ h: value.hash,
388
+ });
389
+ writeStream.write(line + "\n");
390
+ remoteCount++;
391
+ }
392
+ }
393
+
394
+ await new Promise((resolve, reject) => {
395
+ writeStream.on("finish", resolve);
396
+ writeStream.on("error", reject);
397
+ writeStream.end();
398
+ });
399
+
400
+ // Rename old cache to .bak
401
+ try {
402
+ await fsp.rename(jsonCachePath, jsonCachePath + ".migrated");
403
+ } catch {
404
+ // Ignore rename errors
405
+ }
406
+
407
+ return {
408
+ migrated: true,
409
+ localCount,
410
+ remoteCount,
411
+ };
412
+ }
@@ -1,9 +1,9 @@
1
1
  /**
2
2
  * hashing.mjs
3
- *
3
+ *
4
4
  * @author Carsten Nichte, 2025 / https://carsten-nichte.de/
5
- *
6
- */
5
+ *
6
+ */
7
7
  // src/helpers/hashing.mjs
8
8
  import fs from "fs";
9
9
  import fsp from "fs/promises";
@@ -53,7 +53,7 @@ export async function hashRemoteFile(sftp, remotePath) {
53
53
  export function createHashCache({
54
54
  cachePath,
55
55
  namespace,
56
- flushInterval = 50,
56
+ flushInterval = 25, // Save more frequently to release memory
57
57
  }) {
58
58
  const ns = namespace || "default";
59
59
 
@@ -85,8 +85,40 @@ export function createHashCache({
85
85
 
86
86
  async function save(force = false) {
87
87
  if (!dirty && !force) return;
88
- const data = JSON.stringify(cache, null, 2);
89
- await fsp.writeFile(cachePath, data, "utf8");
88
+
89
+ // Stream-basiertes Schreiben für große Caches
90
+ // Verwendet for...in statt Object.keys() um Speicher zu sparen
91
+ const fd = await fsp.open(cachePath, 'w');
92
+ try {
93
+ await fd.write('{"version":1,"local":{');
94
+
95
+ let firstLocal = true;
96
+ for (const key in cache.local) {
97
+ if (Object.prototype.hasOwnProperty.call(cache.local, key)) {
98
+ const entry = cache.local[key];
99
+ const line = `${JSON.stringify(key)}:${JSON.stringify(entry)}`;
100
+ await fd.write(firstLocal ? line : ',' + line);
101
+ firstLocal = false;
102
+ }
103
+ }
104
+
105
+ await fd.write('},"remote":{');
106
+
107
+ let firstRemote = true;
108
+ for (const key in cache.remote) {
109
+ if (Object.prototype.hasOwnProperty.call(cache.remote, key)) {
110
+ const entry = cache.remote[key];
111
+ const line = `${JSON.stringify(key)}:${JSON.stringify(entry)}`;
112
+ await fd.write(firstRemote ? line : ',' + line);
113
+ firstRemote = false;
114
+ }
115
+ }
116
+
117
+ await fd.write('}}');
118
+ } finally {
119
+ await fd.close();
120
+ }
121
+
90
122
  dirty = false;
91
123
  dirtyCount = 0;
92
124
  }
@@ -198,4 +230,4 @@ export async function getRemoteHash(rel, meta, cacheRemote, key, markDirty, sftp
198
230
  await markDirty();
199
231
  }
200
232
  return hash;
201
- }
233
+ }
@@ -1,9 +1,9 @@
1
1
  /**
2
2
  * walkers.mjs
3
- *
3
+ *
4
4
  * @author Carsten Nichte, 2025 / https://carsten-nichte.de/
5
- *
6
- */
5
+ *
6
+ */
7
7
  // src/helpers/walkers.mjs
8
8
  import fsp from "fs/promises";
9
9
  import path from "path";
@@ -250,4 +250,4 @@ export async function walkRemotePlain(sftp, remoteRoot) {
250
250
 
251
251
  await recurse(remoteRoot, "");
252
252
  return result;
253
- }
253
+ }