dbgate-api-premium 7.1.10 → 7.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "dbgate-api-premium",
3
3
  "main": "src/index.js",
4
- "version": "7.1.10",
4
+ "version": "7.1.12",
5
5
  "homepage": "https://www.dbgate.io/",
6
6
  "repository": {
7
7
  "type": "git",
@@ -30,11 +30,11 @@
30
30
  "compare-versions": "^3.6.0",
31
31
  "cors": "^2.8.5",
32
32
  "cross-env": "^6.0.3",
33
- "dbgate-datalib": "7.1.10",
33
+ "dbgate-datalib": "7.1.12",
34
34
  "dbgate-query-splitter": "^4.12.0",
35
- "dbgate-rest": "7.1.10",
36
- "dbgate-sqltree": "7.1.10",
37
- "dbgate-tools": "7.1.10",
35
+ "dbgate-rest": "7.1.12",
36
+ "dbgate-sqltree": "7.1.12",
37
+ "dbgate-tools": "7.1.12",
38
38
  "debug": "^4.3.4",
39
39
  "diff": "^5.0.0",
40
40
  "diff2html": "^3.4.13",
@@ -42,7 +42,6 @@
42
42
  "express": "^4.17.1",
43
43
  "express-basic-auth": "^1.2.0",
44
44
  "express-fileupload": "^1.2.0",
45
- "external-sorting": "^1.3.1",
46
45
  "fs-extra": "^9.1.0",
47
46
  "fs-reverse": "^0.0.3",
48
47
  "get-port": "^5.1.1",
@@ -81,15 +80,25 @@
81
80
  "start:singleconn": "env-cmd node src/index.js --server localhost --user root --port 3307 --engine mysql@dbgate-plugin-mysql --password test --listen-api",
82
81
  "start:azure": "env-cmd -f env/azure/.env node src/index.js --listen-api",
83
82
  "start:e2e:team": "cross-env DEVWEB=1 DEVMODE=1 env-cmd -f ../../e2e-tests/env/team/.env node src/index.js --listen-api",
83
+ "test": "jest",
84
+ "test:ci": "jest --json --outputFile=result.json --testLocationInResults",
84
85
  "ts": "tsc",
85
86
  "build": "webpack",
86
87
  "build:doc": "jsdoc2md --template doctpl.hbs ./src/shell/* > ../../../dbgate.github.io/_docs/apidoc.md"
87
88
  },
89
+ "jest": {
90
+ "testMatch": [
91
+ "**/*Test.js",
92
+ "**/*.test.js"
93
+ ]
94
+ },
88
95
  "devDependencies": {
89
96
  "@types/fs-extra": "^9.0.11",
97
+ "@types/jest": "^30.0.0",
90
98
  "@types/lodash": "^4.14.149",
91
- "dbgate-types": "7.1.10",
99
+ "dbgate-types": "7.1.12",
92
100
  "env-cmd": "^10.1.0",
101
+ "jest": "^30.4.2",
93
102
  "jsdoc-to-markdown": "^9.0.5",
94
103
  "node-loader": "^1.0.2",
95
104
  "nodemon": "^2.0.2",
@@ -264,7 +264,7 @@ module.exports = {
264
264
  try {
265
265
  const fingerprint = await getPublicHardwareFingerprint();
266
266
 
267
- const resp = await axios.default.post(`${getAuthProxyUrl()}/trial-license`, {
267
+ const resp = await axios.default.post(`https://api.dbgate.cloud/trial-license`, {
268
268
  type: 'premium-trial',
269
269
  days: 30,
270
270
  fingerprint,
@@ -15,6 +15,7 @@ const {
15
15
  getLogger,
16
16
  extractErrorLogData,
17
17
  filterStructureBySchema,
18
+ isCompositeDbName,
18
19
  serializeJsTypesForJsonStringify,
19
20
  } = require('dbgate-tools');
20
21
  const { html, parse } = require('diff2html');
@@ -194,6 +195,8 @@ module.exports = {
194
195
  );
195
196
  pipeForkLogs(subprocess);
196
197
  const lastClosed = this.closed[`${conid}/${database}`];
198
+ const initialStatusName =
199
+ !lastClosed && !(connection.useSeparateSchemas && !isCompositeDbName(database)) ? 'loadStructure' : 'pending';
197
200
  const newOpened = {
198
201
  conid,
199
202
  database,
@@ -201,7 +204,7 @@ module.exports = {
201
204
  structure: lastClosed ? lastClosed.structure : DatabaseAnalyser.createEmptyStructure(),
202
205
  serverVersion: lastClosed ? lastClosed.serverVersion : null,
203
206
  connection,
204
- status: { name: 'pending' },
207
+ status: { name: initialStatusName },
205
208
  };
206
209
  this.opened.push(newOpened);
207
210
  subprocess.on('message', message => {
@@ -1,5 +1,5 @@
1
1
 
2
2
  module.exports = {
3
- version: '7.1.10',
4
- buildTime: '2026-04-29T12:39:25.294Z'
3
+ version: '7.1.12',
4
+ buildTime: '2026-05-20T09:12:47.123Z'
5
5
  };
@@ -69,6 +69,16 @@ async function checkedAsyncCall(promise) {
69
69
  }
70
70
 
71
71
  let loadingModel = false;
72
+ let queuedSyncModelFullRefresh = null;
73
+
74
+ function finishLoadingModel() {
75
+ loadingModel = false;
76
+ if (queuedSyncModelFullRefresh != null) {
77
+ const isFullRefresh = queuedSyncModelFullRefresh;
78
+ queuedSyncModelFullRefresh = null;
79
+ handleSyncModel({ isFullRefresh });
80
+ }
81
+ }
72
82
 
73
83
  async function handleFullRefresh() {
74
84
  if (storedConnection.useSeparateSchemas && !isCompositeDbName(dbhan?.database)) {
@@ -86,7 +96,7 @@ async function handleFullRefresh() {
86
96
  process.send({ msgtype: 'structureTime', analysedTime });
87
97
  setStatusName('ok');
88
98
 
89
- loadingModel = false;
99
+ finishLoadingModel();
90
100
  resolveAnalysedPromises();
91
101
  }
92
102
 
@@ -111,12 +121,15 @@ async function handleIncrementalRefresh(forceSend) {
111
121
 
112
122
  process.send({ msgtype: 'structureTime', analysedTime });
113
123
  setStatusName('ok');
114
- loadingModel = false;
124
+ finishLoadingModel();
115
125
  resolveAnalysedPromises();
116
126
  }
117
127
 
118
128
  function handleSyncModel({ isFullRefresh }) {
119
- if (loadingModel) return;
129
+ if (loadingModel) {
130
+ queuedSyncModelFullRefresh = queuedSyncModelFullRefresh || !!isFullRefresh;
131
+ return;
132
+ }
120
133
  if (isFullRefresh) handleFullRefresh();
121
134
  else handleIncrementalRefresh();
122
135
  }
@@ -1,15 +1,16 @@
1
1
  const crypto = require('crypto');
2
2
  const fs = require('fs');
3
- const os = require('os');
4
- const rimraf = require('rimraf');
5
3
  const path = require('path');
6
4
  const AsyncLock = require('async-lock');
7
5
  const lock = new AsyncLock();
8
6
  const stableStringify = require('json-stable-stringify');
9
7
  const { evaluateCondition } = require('dbgate-sqltree');
10
- const esort = require('external-sorting');
8
+ const { getLogger, extractErrorLogData } = require('dbgate-tools');
11
9
  const { jsldir } = require('./directories');
12
10
  const LineReader = require('./LineReader');
11
+ const { sortFile } = require('./externalSort');
12
+
13
+ const logger = getLogger('JsonLinesDatastore');
13
14
 
14
15
  class JsonLinesDatastore {
15
16
  constructor(file, formatterFunction) {
@@ -30,33 +31,7 @@ class JsonLinesDatastore {
30
31
  }
31
32
 
32
33
  static async sortFile(infile, outfile, sort) {
33
- const tempDir = path.join(os.tmpdir(), crypto.randomUUID());
34
- fs.mkdirSync(tempDir);
35
-
36
- await esort
37
- .default({
38
- input: fs.createReadStream(infile),
39
- output: fs.createWriteStream(outfile),
40
- deserializer: JSON.parse,
41
- serializer: JSON.stringify,
42
- tempDir,
43
- maxHeap: 100,
44
- comparer: (a, b) => {
45
- for (const item of sort) {
46
- const { uniqueName, order } = item;
47
- if (a[uniqueName] < b[uniqueName]) {
48
- return order == 'ASC' ? -1 : 1;
49
- }
50
- if (a[uniqueName] > b[uniqueName]) {
51
- return order == 'ASC' ? 1 : -1;
52
- }
53
- }
54
- return 0;
55
- },
56
- })
57
- .asc();
58
-
59
- await new Promise(resolve => rimraf(tempDir, resolve));
34
+ return sortFile(infile, outfile, sort);
60
35
  }
61
36
 
62
37
  async _closeReader() {
@@ -214,8 +189,16 @@ class JsonLinesDatastore {
214
189
  if (sort && !this.sortedFiles[stableStringify(sort)]) {
215
190
  const jslid = crypto.randomUUID();
216
191
  const sortedFile = path.join(jsldir(), `${jslid}.jsonl`);
217
- await JsonLinesDatastore.sortFile(this.file, sortedFile, sort);
218
- this.sortedFiles[stableStringify(sort)] = sortedFile;
192
+ try {
193
+ await JsonLinesDatastore.sortFile(this.file, sortedFile, sort);
194
+ this.sortedFiles[stableStringify(sort)] = sortedFile;
195
+ } catch (e) {
196
+ logger.error(extractErrorLogData(e), 'DBGM-00000 Failed to sort data file, returning unsorted results');
197
+ // Remove any partial output file left by the failed sort so it does
198
+ // not accumulate in jsldir() across repeated failures.
199
+ try { fs.unlinkSync(sortedFile); } catch { /* best-effort */ }
200
+ sort = null;
201
+ }
219
202
  }
220
203
  await lock.acquire('reader', async () => {
221
204
  await this._ensureReader(offset, filter, sort);
@@ -21,7 +21,7 @@ const AI_GATEWAY_URL = process.env.LOCAL_AI_GATEWAY
21
21
  ? 'http://localhost:3110'
22
22
  : process.env.DEVWEB || process.env.DEVMODE
23
23
  ? 'https://aigw.dbgate.udolni.net'
24
- : 'https://aigw.dbgate.io';
24
+ : 'https://api.dbgate.cloud';
25
25
 
26
26
  const DBGATE_API_URL = process.env.LOCAL_DBGATE_API
27
27
  ? 'http://localhost:3115'
@@ -177,7 +177,7 @@ async function obtainRefreshedLicense() {
177
177
 
178
178
  try {
179
179
  const respToken = await axios.default.post(
180
- `${AUTH_PROXY_URL}/refresh-license`,
180
+ `https://api.dbgate.cloud/refresh-license`,
181
181
  {},
182
182
  {
183
183
  headers: {
@@ -199,7 +199,7 @@ async function obtainRefreshedLicense() {
199
199
  async function tryToGetRefreshedLicense(oldLicenseKey) {
200
200
  try {
201
201
  const respToken = await axios.default.post(
202
- `${AUTH_PROXY_URL}/refresh-license`,
202
+ `https://api.dbgate.cloud/refresh-license`,
203
203
  {},
204
204
  {
205
205
  headers: {
@@ -34,7 +34,7 @@ const DBGATE_CLOUD_URL = process.env.LOCAL_DBGATE_CLOUD
34
34
  : process.env.PROD_DBGATE_CLOUD
35
35
  ? 'https://cloud.dbgate.io'
36
36
  : process.env.DEVWEB || process.env.DEVMODE
37
- ? 'https://cloud.dbgate.udolni.net'
37
+ ? 'https://dev.dbgate.cloud'
38
38
  : 'https://cloud.dbgate.io';
39
39
 
40
40
 
@@ -297,7 +297,7 @@ async function updatePremiumPromoWidget(language) {
297
297
  const tags = (await collectCloudFilesSearchTags()).join(',');
298
298
 
299
299
  const resp = await axios.default.get(
300
- `${DBGATE_CLOUD_URL}/premium-promo-widget?identifier=${promoWidgetData?.identifier ?? 'empty'}&tags=${tags}`,
300
+ `${DBGATE_PUBLIC_CLOUD_URL}/premium-promo-widget?identifier=${promoWidgetData?.identifier ?? 'empty'}&tags=${tags}`,
301
301
  {
302
302
  headers: {
303
303
  ...getLicenseHttpHeaders(),
@@ -508,14 +508,14 @@ async function getPromoWidgetData() {
508
508
 
509
509
  async function getPromoWidgetPreview(campaign, variant) {
510
510
  const resp = await axios.default.get(
511
- `${DBGATE_CLOUD_URL}/premium-promo-widget-preview/${campaign}/${variant}`,
511
+ `${DBGATE_PUBLIC_CLOUD_URL}/premium-promo-widget-preview/${campaign}/${variant}`,
512
512
  stageAxiosConfig
513
513
  );
514
514
  return resp.data;
515
515
  }
516
516
 
517
517
  async function getPromoWidgetList() {
518
- const resp = await axios.default.get(`${DBGATE_CLOUD_URL}/promo-widget-list`, stageAxiosConfig);
518
+ const resp = await axios.default.get(`${DBGATE_PUBLIC_CLOUD_URL}/promo-widget-list`, stageAxiosConfig);
519
519
  return resp.data;
520
520
  }
521
521
 
@@ -0,0 +1,366 @@
1
+ const crypto = require('crypto');
2
+ const fs = require('fs');
3
+ const { pipeline } = require('stream');
4
+ const os = require('os');
5
+ const readline = require('readline');
6
+ const path = require('path');
7
+ const { getLogger, extractErrorLogData } = require('dbgate-tools');
8
+
9
+ const logger = getLogger('externalSort');
10
+
11
+ // Number of rows accumulated per sorted temp-chunk during external sort.
12
+ // Capped so that a single chunk never exceeds ~50 MB for typical row sizes.
13
+ const SORT_CHUNK_SIZE = 50_000;
14
+
15
+ // Maximum number of chunk files merged simultaneously in one pass.
16
+ // Limits the number of concurrently open file descriptors during merge.
17
+ const MAX_MERGE_FAN_IN = 16;
18
+
19
+ // Async generator that yields parsed JSON objects from an internally-generated
20
+ // sorted chunk file. Parse errors are thrown immediately — chunk files are
21
+ // always written by this module, so a parse error indicates corruption.
22
+ async function* readChunkLines(file) {
23
+ const stream = fs.createReadStream(file);
24
+ const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
25
+ try {
26
+ for await (const line of rl) {
27
+ if (!line.trim()) continue;
28
+ yield JSON.parse(line); // intentionally throws on bad JSON
29
+ }
30
+ } finally {
31
+ rl.close();
32
+ stream.destroy();
33
+ }
34
+ }
35
+
36
+ // Cross-device-safe rename: tries renameSync first; on EXDEV (cross-filesystem)
37
+ // falls back to a streaming copy followed by unlinking the source.
38
+ // Uses stream.pipeline() so both streams are destroyed and the partial
39
+ // destination file is removed if an error occurs mid-copy.
40
+ async function safeRename(src, dest) {
41
+ try {
42
+ fs.renameSync(src, dest);
43
+ } catch (e) {
44
+ if (e.code !== 'EXDEV') throw e;
45
+ await new Promise((resolve, reject) => {
46
+ const rs = fs.createReadStream(src);
47
+ const ws = fs.createWriteStream(dest);
48
+ pipeline(rs, ws, err => {
49
+ if (err) {
50
+ try { fs.unlinkSync(dest); } catch { /* best-effort */ }
51
+ reject(err);
52
+ } else {
53
+ resolve();
54
+ }
55
+ });
56
+ });
57
+ fs.unlinkSync(src);
58
+ }
59
+ }
60
+
61
+ // Write an array of rows to a JSON-lines file, respecting stream backpressure.
62
+ function writeChunkFile(filePath, rows) {
63
+ return new Promise((resolve, reject) => {
64
+ const ws = fs.createWriteStream(filePath);
65
+ ws.on('error', reject);
66
+ ws.on('finish', resolve);
67
+ const writeNext = i => {
68
+ for (; i < rows.length; i++) {
69
+ const ok = ws.write(JSON.stringify(rows[i]) + '\n');
70
+ if (!ok) {
71
+ ws.once('drain', () => writeNext(i + 1));
72
+ return;
73
+ }
74
+ }
75
+ ws.end();
76
+ };
77
+ writeNext(0);
78
+ });
79
+ }
80
+
81
+ // Min-heap used for k-way merge. Each item stored in the heap is
82
+ // { row: object, iter: AsyncGenerator }.
83
+ class _SortMinHeap {
84
+ constructor(comparator) {
85
+ this._data = [];
86
+ this._cmp = comparator;
87
+ }
88
+ get size() { return this._data.length; }
89
+ push(item) {
90
+ this._data.push(item);
91
+ let i = this._data.length - 1;
92
+ while (i > 0) {
93
+ const p = (i - 1) >> 1;
94
+ if (this._cmp(this._data[i].row, this._data[p].row) < 0) {
95
+ [this._data[i], this._data[p]] = [this._data[p], this._data[i]];
96
+ i = p;
97
+ } else break;
98
+ }
99
+ }
100
+ pop() {
101
+ const top = this._data[0];
102
+ const last = this._data.pop();
103
+ if (this._data.length > 0) {
104
+ this._data[0] = last;
105
+ let i = 0;
106
+ for (;;) {
107
+ const l = 2 * i + 1, r = 2 * i + 2, n = this._data.length;
108
+ let min = i;
109
+ if (l < n && this._cmp(this._data[l].row, this._data[min].row) < 0) min = l;
110
+ if (r < n && this._cmp(this._data[r].row, this._data[min].row) < 0) min = r;
111
+ if (min === i) break;
112
+ [this._data[i], this._data[min]] = [this._data[min], this._data[i]];
113
+ i = min;
114
+ }
115
+ }
116
+ return top;
117
+ }
118
+ }
119
+
120
+ // Merge exactly inputFiles.length (≤ MAX_MERGE_FAN_IN) sorted chunk files into
121
+ // one output file. Opens exactly inputFiles.length file descriptors at once.
122
+ async function mergeBatch(inputFiles, outfile, comparator) {
123
+ const ws = fs.createWriteStream(outfile);
124
+ const iters = inputFiles.map(f => readChunkLines(f));
125
+
126
+ const cleanup = () => {
127
+ ws.destroy();
128
+ for (const it of iters) it.return();
129
+ };
130
+
131
+ try {
132
+ await new Promise((resolve, reject) => {
133
+ ws.on('error', err => { cleanup(); reject(err); });
134
+
135
+ const heap = new _SortMinHeap(comparator);
136
+ let settled = false;
137
+
138
+ const fail = err => {
139
+ if (settled) return;
140
+ settled = true;
141
+ cleanup();
142
+ reject(err);
143
+ };
144
+
145
+ const advance = async iter => {
146
+ const { value, done } = await iter.next();
147
+ if (!done) heap.push({ row: value, iter });
148
+ };
149
+
150
+ const drain = async () => {
151
+ try {
152
+ await Promise.all(iters.map(advance));
153
+
154
+ while (heap.size > 0) {
155
+ if (settled) return;
156
+ const { row, iter } = heap.pop();
157
+ const line = JSON.stringify(row) + '\n';
158
+ const ok = ws.write(line);
159
+ if (!ok) await new Promise(r => ws.once('drain', r));
160
+ await advance(iter);
161
+ }
162
+
163
+ if (settled) return;
164
+ ws.end();
165
+ ws.once('finish', () => { settled = true; resolve(); });
166
+ } catch (e) {
167
+ fail(e);
168
+ }
169
+ };
170
+
171
+ drain();
172
+ });
173
+ } catch (e) {
174
+ // cleanup() was already called inside the Promise; re-throw so callers see the error.
175
+ throw e;
176
+ }
177
+ }
178
+
179
+ // Multi-pass k-way merge. Each pass merges groups of ≤ MAX_MERGE_FAN_IN files,
180
+ // writing intermediates via nextTmpFile() so they are tracked for cleanup.
181
+ // The very last pass writes directly to outfile (no cross-fs rename needed).
182
+ async function multiPassMerge(inputFiles, outfile, comparator, nextTmpFile) {
183
+ let current = inputFiles;
184
+ while (current.length > MAX_MERGE_FAN_IN) {
185
+ const next = [];
186
+ for (let i = 0; i < current.length; i += MAX_MERGE_FAN_IN) {
187
+ const batch = current.slice(i, i + MAX_MERGE_FAN_IN);
188
+ const merged = nextTmpFile();
189
+ next.push(merged);
190
+ await mergeBatch(batch, merged, comparator);
191
+ }
192
+ current = next;
193
+ }
194
+ await mergeBatch(current, outfile, comparator);
195
+ }
196
+
197
+ async function sortFile(infile, outfile, sort) {
198
+ const comparator = (a, b) => {
199
+ for (const { uniqueName, order } of sort) {
200
+ const av = a[uniqueName], bv = b[uniqueName];
201
+ if (av < bv) return order === 'ASC' ? -1 : 1;
202
+ if (av > bv) return order === 'ASC' ? 1 : -1;
203
+ }
204
+ return 0;
205
+ };
206
+
207
+ const tmpDir = path.join(os.tmpdir(), `dbgate-sort-${crypto.randomUUID()}`);
208
+ fs.mkdirSync(tmpDir, { recursive: true });
209
+
210
+ // All tmp paths are registered here BEFORE any write attempt so that the
211
+ // finally block can unlink partial files even when a write fails mid-way.
212
+ const createdTmpFiles = new Set();
213
+ let tmpCounter = 0;
214
+ const nextTmpFile = () => {
215
+ const f = path.join(tmpDir, `es-${tmpCounter++}.jsonl`);
216
+ createdTmpFiles.add(f);
217
+ return f;
218
+ };
219
+
220
+ try {
221
+ // ── Phase 1: generate sorted runs ──────────────────────────────────────
222
+ // Read the input line by line; accumulate SORT_CHUNK_SIZE rows, sort
223
+ // them, write to a temp file, then discard the chunk from memory.
224
+ // Peak memory ≈ one chunk (SORT_CHUNK_SIZE rows).
225
+ //
226
+ // The first non-empty line is inspected for __isStreamHeader. If found,
227
+ // it is saved and excluded from sorting so it can be written back as the
228
+ // very first line of the output file.
229
+ let chunk = [];
230
+ const runFiles = [];
231
+ let headerRow = null;
232
+ let isFirstNonEmptyLine = true;
233
+
234
+ const flushChunk = async () => {
235
+ if (chunk.length === 0) return;
236
+ chunk.sort(comparator);
237
+ // Register the path BEFORE writing so the finally block can always
238
+ // clean it up even if writeChunkFile throws partway through.
239
+ const tmpFile = nextTmpFile();
240
+ runFiles.push(tmpFile);
241
+ await writeChunkFile(tmpFile, chunk);
242
+ chunk = [];
243
+ };
244
+
245
+ await new Promise((resolve, reject) => {
246
+ const inputStream = fs.createReadStream(infile);
247
+ const rl = readline.createInterface({ input: inputStream, crlfDelay: Infinity });
248
+ let pendingFlush = Promise.resolve();
249
+ let settled = false;
250
+
251
+ const fail = err => {
252
+ if (settled) return;
253
+ settled = true;
254
+ // Destroy both the readline interface and the underlying stream so
255
+ // no file descriptors are leaked when we reject while paused.
256
+ rl.close();
257
+ inputStream.destroy();
258
+ reject(err);
259
+ };
260
+
261
+ // Attach directly to the stream — readline.Interface does not reliably
262
+ // forward the underlying stream's 'error' event.
263
+ inputStream.on('error', fail);
264
+ rl.on('error', fail);
265
+
266
+ rl.on('line', line => {
267
+ if (!line.trim()) return;
268
+ let parsed;
269
+ try {
270
+ parsed = JSON.parse(line);
271
+ } catch (e) {
272
+ logger.warn(extractErrorLogData(e), 'DBGM-00000 Skipping invalid JSON line during sort');
273
+ return;
274
+ }
275
+ // Detect and capture the stream header; do not include it in the sort.
276
+ if (isFirstNonEmptyLine) {
277
+ isFirstNonEmptyLine = false;
278
+ if (parsed.__isStreamHeader) {
279
+ headerRow = parsed;
280
+ return;
281
+ }
282
+ }
283
+ chunk.push(parsed);
284
+ if (chunk.length >= SORT_CHUNK_SIZE) {
285
+ rl.pause();
286
+ pendingFlush = pendingFlush
287
+ .then(() => flushChunk())
288
+ .then(() => rl.resume())
289
+ .catch(fail);
290
+ }
291
+ });
292
+
293
+ rl.on('close', () => {
294
+ if (settled) return;
295
+ pendingFlush
296
+ .then(() => flushChunk())
297
+ .then(() => { settled = true; resolve(); })
298
+ .catch(fail);
299
+ });
300
+ });
301
+
302
+ // ── Phase 2: k-way streaming merge ─────────────────────────────────────
303
+ if (headerRow !== null) {
304
+ // There is a stream header that must appear as the first line of outfile.
305
+ // Merge all data runs into a single intermediate file (or use the single
306
+ // run directly), then write outfile = header + merged data.
307
+ let mergedDataFile = null;
308
+ if (runFiles.length === 1) {
309
+ mergedDataFile = runFiles[0]; // cleaned up by the finally block
310
+ } else if (runFiles.length > 1) {
311
+ mergedDataFile = nextTmpFile();
312
+ await multiPassMerge(runFiles, mergedDataFile, comparator, nextTmpFile);
313
+ }
314
+
315
+ await new Promise((resolve, reject) => {
316
+ const ws = fs.createWriteStream(outfile);
317
+ let settled = false;
318
+ const fail = err => {
319
+ if (settled) return;
320
+ settled = true;
321
+ ws.destroy();
322
+ try { fs.unlinkSync(outfile); } catch { /* best-effort */ }
323
+ reject(err);
324
+ };
325
+ ws.on('error', fail);
326
+
327
+ const headerLine = JSON.stringify(headerRow) + '\n';
328
+ if (!mergedDataFile) {
329
+ // Header only — no data rows.
330
+ ws.end(headerLine);
331
+ ws.once('finish', () => { settled = true; resolve(); });
332
+ } else {
333
+ ws.write(headerLine, writeErr => {
334
+ if (writeErr) return fail(writeErr);
335
+ const rs = fs.createReadStream(mergedDataFile);
336
+ rs.on('error', fail);
337
+ rs.pipe(ws);
338
+ ws.once('finish', () => { settled = true; resolve(); });
339
+ });
340
+ }
341
+ });
342
+ } else if (runFiles.length === 0) {
343
+ fs.writeFileSync(outfile, '');
344
+ } else if (runFiles.length === 1) {
345
+ // safeRename handles EXDEV (cross-filesystem) by falling back to
346
+ // stream-copy + unlink, so outfile is always populated correctly.
347
+ await safeRename(runFiles[0], outfile);
348
+ // The file is now at outfile (or already unlinked on EXDEV path),
349
+ // so remove it from the cleanup set to avoid a spurious unlink error.
350
+ createdTmpFiles.delete(runFiles[0]);
351
+ } else {
352
+ // multiPassMerge batches the fan-in to MAX_MERGE_FAN_IN files per pass,
353
+ // bounding the number of concurrently open file descriptors.
354
+ // Intermediate files are allocated via nextTmpFile() so they are always
355
+ // tracked in createdTmpFiles for cleanup.
356
+ await multiPassMerge(runFiles, outfile, comparator, nextTmpFile);
357
+ }
358
+ } finally {
359
+ for (const f of createdTmpFiles) {
360
+ try { fs.unlinkSync(f); } catch { /* best-effort cleanup */ }
361
+ }
362
+ try { fs.rmdirSync(tmpDir); } catch { /* best-effort cleanup */ }
363
+ }
364
+ }
365
+
366
+ module.exports = { sortFile };
@@ -0,0 +1,275 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const os = require('os');
5
+ const path = require('path');
6
+ const { sortFile } = require('./externalSort');
7
+
8
+ // ── helpers ──────────────────────────────────────────────────────────────────
9
+
10
+ function tmpFile() {
11
+ return path.join(os.tmpdir(), `externalSortTest-${Date.now()}-${Math.random().toString(36).slice(2)}.jsonl`);
12
+ }
13
+
14
+ function writeJsonl(filePath, rows) {
15
+ const content = rows.map(r => JSON.stringify(r)).join('\n');
16
+ fs.writeFileSync(filePath, rows.length ? content + '\n' : '');
17
+ }
18
+
19
+ function readJsonl(filePath) {
20
+ return fs
21
+ .readFileSync(filePath, 'utf8')
22
+ .split('\n')
23
+ .filter(l => l.trim())
24
+ .map(l => JSON.parse(l));
25
+ }
26
+
27
+ async function withTmpPair(fn) {
28
+ const inFile = tmpFile();
29
+ const outFile = tmpFile();
30
+ try {
31
+ await fn(inFile, outFile);
32
+ } finally {
33
+ for (const f of [inFile, outFile]) {
34
+ try { fs.unlinkSync(f); } catch { /* best-effort */ }
35
+ }
36
+ }
37
+ }
38
+
39
+ // ── tests ─────────────────────────────────────────────────────────────────────
40
+
41
+ describe('externalSort', () => {
42
+
43
+ // ── Basic sorts ────────────────────────────────────────────────────────────
44
+
45
+ test('sorts numbers ASC', async () => {
46
+ await withTmpPair(async (inFile, outFile) => {
47
+ writeJsonl(inFile, [{ id: 3 }, { id: 1 }, { id: 2 }]);
48
+ await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'ASC' }]);
49
+ expect(readJsonl(outFile).map(r => r.id)).toEqual([1, 2, 3]);
50
+ });
51
+ });
52
+
53
+ test('sorts numbers DESC', async () => {
54
+ await withTmpPair(async (inFile, outFile) => {
55
+ writeJsonl(inFile, [{ id: 1 }, { id: 3 }, { id: 2 }]);
56
+ await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'DESC' }]);
57
+ expect(readJsonl(outFile).map(r => r.id)).toEqual([3, 2, 1]);
58
+ });
59
+ });
60
+
61
+ test('sorts strings ASC', async () => {
62
+ await withTmpPair(async (inFile, outFile) => {
63
+ writeJsonl(inFile, [{ name: 'Zebra' }, { name: 'Apple' }, { name: 'Mango' }]);
64
+ await sortFile(inFile, outFile, [{ uniqueName: 'name', order: 'ASC' }]);
65
+ expect(readJsonl(outFile).map(r => r.name)).toEqual(['Apple', 'Mango', 'Zebra']);
66
+ });
67
+ });
68
+
69
+ test('sorts strings DESC', async () => {
70
+ await withTmpPair(async (inFile, outFile) => {
71
+ writeJsonl(inFile, [{ name: 'Zebra' }, { name: 'Apple' }, { name: 'Mango' }]);
72
+ await sortFile(inFile, outFile, [{ uniqueName: 'name', order: 'DESC' }]);
73
+ expect(readJsonl(outFile).map(r => r.name)).toEqual(['Zebra', 'Mango', 'Apple']);
74
+ });
75
+ });
76
+
77
+ // ── Multi-field sort ────────────────────────────────────────────────────────
78
+
79
+ test('sorts by multiple fields (primary ASC, secondary DESC)', async () => {
80
+ await withTmpPair(async (inFile, outFile) => {
81
+ writeJsonl(inFile, [
82
+ { category: 'B', value: 1 },
83
+ { category: 'A', value: 2 },
84
+ { category: 'A', value: 5 },
85
+ { category: 'B', value: 3 },
86
+ ]);
87
+ await sortFile(inFile, outFile, [
88
+ { uniqueName: 'category', order: 'ASC' },
89
+ { uniqueName: 'value', order: 'DESC' },
90
+ ]);
91
+ const rows = readJsonl(outFile);
92
+ expect(rows.map(r => [r.category, r.value])).toEqual([['A', 5], ['A', 2], ['B', 3], ['B', 1]]);
93
+ });
94
+ });
95
+
96
+ // ── With stream header ──────────────────────────────────────────────────────
97
+
98
+ test('with header: header is first row, data rows are sorted', async () => {
99
+ await withTmpPair(async (inFile, outFile) => {
100
+ writeJsonl(inFile, [
101
+ { __isStreamHeader: true, title: 'dataset' },
102
+ { id: 3, val: 'c' },
103
+ { id: 1, val: 'a' },
104
+ { id: 2, val: 'b' },
105
+ ]);
106
+ await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'ASC' }]);
107
+ const rows = readJsonl(outFile);
108
+ expect(rows[0].__isStreamHeader).toBe(true);
109
+ expect(rows.slice(1).map(r => r.id)).toEqual([1, 2, 3]);
110
+ });
111
+ });
112
+
113
+ test('with header: header fields are preserved intact', async () => {
114
+ await withTmpPair(async (inFile, outFile) => {
115
+ writeJsonl(inFile, [
116
+ { __isStreamHeader: true, columns: ['a', 'b'], extra: 42 },
117
+ { a: 2, b: 'x' },
118
+ { a: 1, b: 'y' },
119
+ ]);
120
+ await sortFile(inFile, outFile, [{ uniqueName: 'a', order: 'ASC' }]);
121
+ const rows = readJsonl(outFile);
122
+ expect(rows[0]).toEqual({ __isStreamHeader: true, columns: ['a', 'b'], extra: 42 });
123
+ });
124
+ });
125
+
126
+ test('with header + multi-field sort', async () => {
127
+ await withTmpPair(async (inFile, outFile) => {
128
+ writeJsonl(inFile, [
129
+ { __isStreamHeader: true },
130
+ { dept: 'HR', salary: 50000 },
131
+ { dept: 'IT', salary: 80000 },
132
+ { dept: 'HR', salary: 70000 },
133
+ { dept: 'IT', salary: 60000 },
134
+ ]);
135
+ await sortFile(inFile, outFile, [
136
+ { uniqueName: 'dept', order: 'ASC' },
137
+ { uniqueName: 'salary', order: 'DESC' },
138
+ ]);
139
+ const rows = readJsonl(outFile);
140
+ expect(rows[0].__isStreamHeader).toBe(true);
141
+ expect(rows.slice(1).map(r => [r.dept, r.salary])).toEqual([
142
+ ['HR', 70000],
143
+ ['HR', 50000],
144
+ ['IT', 80000],
145
+ ['IT', 60000],
146
+ ]);
147
+ });
148
+ });
149
+
150
+ // ── Without stream header ───────────────────────────────────────────────────
151
+
152
+ test('without header: all rows sorted, no header injected', async () => {
153
+ await withTmpPair(async (inFile, outFile) => {
154
+ writeJsonl(inFile, [{ x: 30 }, { x: 10 }, { x: 20 }]);
155
+ await sortFile(inFile, outFile, [{ uniqueName: 'x', order: 'ASC' }]);
156
+ const rows = readJsonl(outFile);
157
+ expect(rows.length).toBe(3);
158
+ expect(rows[0].__isStreamHeader).toBeFalsy();
159
+ expect(rows.map(r => r.x)).toEqual([10, 20, 30]);
160
+ });
161
+ });
162
+
163
+ // ── Edge cases ───────────────────────────────────────────────────────────────
164
+
165
+ test('edge: empty file produces empty output', async () => {
166
+ await withTmpPair(async (inFile, outFile) => {
167
+ fs.writeFileSync(inFile, '');
168
+ await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'ASC' }]);
169
+ expect(fs.readFileSync(outFile, 'utf8')).toBe('');
170
+ });
171
+ });
172
+
173
+ test('edge: single data row is passed through unchanged', async () => {
174
+ await withTmpPair(async (inFile, outFile) => {
175
+ writeJsonl(inFile, [{ id: 42, msg: 'only one' }]);
176
+ await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'ASC' }]);
177
+ const rows = readJsonl(outFile);
178
+ expect(rows.length).toBe(1);
179
+ expect(rows[0].id).toBe(42);
180
+ });
181
+ });
182
+
183
+ test('edge: header-only file (no data rows) produces only header', async () => {
184
+ await withTmpPair(async (inFile, outFile) => {
185
+ writeJsonl(inFile, [{ __isStreamHeader: true, columns: ['a', 'b'] }]);
186
+ await sortFile(inFile, outFile, [{ uniqueName: 'a', order: 'ASC' }]);
187
+ const rows = readJsonl(outFile);
188
+ expect(rows.length).toBe(1);
189
+ expect(rows[0].__isStreamHeader).toBe(true);
190
+ });
191
+ });
192
+
193
+ test('edge: all rows have equal sort key — all rows preserved', async () => {
194
+ await withTmpPair(async (inFile, outFile) => {
195
+ writeJsonl(inFile, [
196
+ { score: 10, label: 'a' },
197
+ { score: 10, label: 'b' },
198
+ { score: 10, label: 'c' },
199
+ ]);
200
+ await sortFile(inFile, outFile, [{ uniqueName: 'score', order: 'ASC' }]);
201
+ const rows = readJsonl(outFile);
202
+ expect(rows.length).toBe(3);
203
+ expect(rows.every(r => r.score === 10)).toBe(true);
204
+ });
205
+ });
206
+
207
+ test('edge: invalid JSON lines are skipped gracefully', async () => {
208
+ await withTmpPair(async (inFile, outFile) => {
209
+ fs.writeFileSync(inFile, '{"id":3}\nNOT_JSON\n{"id":1}\n{"id":2}\n');
210
+ await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'ASC' }]);
211
+ const rows = readJsonl(outFile);
212
+ expect(rows.map(r => r.id)).toEqual([1, 2, 3]);
213
+ });
214
+ });
215
+
216
+ test('edge: blank lines in input are ignored', async () => {
217
+ await withTmpPair(async (inFile, outFile) => {
218
+ fs.writeFileSync(inFile, '{"id":2}\n\n{"id":1}\n\n');
219
+ await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'ASC' }]);
220
+ const rows = readJsonl(outFile);
221
+ expect(rows.map(r => r.id)).toEqual([1, 2]);
222
+ });
223
+ });
224
+
225
+ test('edge: rows missing sort key field are included in output', async () => {
226
+ await withTmpPair(async (inFile, outFile) => {
227
+ writeJsonl(inFile, [
228
+ { id: 2, name: 'Bob' },
229
+ { name: 'Alice' }, // no id field
230
+ { id: 1, name: 'Charlie' },
231
+ ]);
232
+ await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'ASC' }]);
233
+ const rows = readJsonl(outFile);
234
+ expect(rows.length).toBe(3);
235
+ expect(rows.find(r => r.name === 'Alice')).toBeTruthy();
236
+ expect(rows.find(r => r.id === 1)).toBeTruthy();
237
+ expect(rows.find(r => r.id === 2)).toBeTruthy();
238
+ });
239
+ });
240
+
241
+ test('edge: unicode values are preserved correctly', async () => {
242
+ await withTmpPair(async (inFile, outFile) => {
243
+ writeJsonl(inFile, [
244
+ { word: 'über' },
245
+ { word: 'apple' },
246
+ { word: 'éclair' },
247
+ ]);
248
+ await sortFile(inFile, outFile, [{ uniqueName: 'word', order: 'ASC' }]);
249
+ const rows = readJsonl(outFile);
250
+ expect(rows.length).toBe(3);
251
+ const words = new Set(rows.map(r => r.word));
252
+ expect(words.has('über')).toBe(true);
253
+ expect(words.has('apple')).toBe(true);
254
+ expect(words.has('éclair')).toBe(true);
255
+ });
256
+ });
257
+
258
+ test('edge: two rows with same primary key, different secondary key', async () => {
259
+ await withTmpPair(async (inFile, outFile) => {
260
+ writeJsonl(inFile, [
261
+ { pk: 'a', sk: 2 },
262
+ { pk: 'b', sk: 1 },
263
+ { pk: 'a', sk: 1 },
264
+ ]);
265
+ await sortFile(inFile, outFile, [
266
+ { uniqueName: 'pk', order: 'ASC' },
267
+ { uniqueName: 'sk', order: 'ASC' },
268
+ ]);
269
+ const rows = readJsonl(outFile);
270
+ expect(rows.map(r => [r.pk, r.sk])).toEqual([['a', 1], ['a', 2], ['b', 1]]);
271
+ });
272
+ });
273
+
274
+ });
275
+