dbgate-api-premium 7.1.10 → 7.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +16 -7
- package/src/controllers/config.js +1 -1
- package/src/controllers/databaseConnections.js +4 -1
- package/src/currentVersion.js +2 -2
- package/src/proc/databaseConnectionProcess.js +16 -3
- package/src/utility/JsonLinesDatastore.js +15 -32
- package/src/utility/authProxy.js +3 -3
- package/src/utility/cloudIntf.js +4 -4
- package/src/utility/externalSort.js +366 -0
- package/src/utility/externalSortTest.js +275 -0
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "dbgate-api-premium",
|
|
3
3
|
"main": "src/index.js",
|
|
4
|
-
"version": "7.1.
|
|
4
|
+
"version": "7.1.12",
|
|
5
5
|
"homepage": "https://www.dbgate.io/",
|
|
6
6
|
"repository": {
|
|
7
7
|
"type": "git",
|
|
@@ -30,11 +30,11 @@
|
|
|
30
30
|
"compare-versions": "^3.6.0",
|
|
31
31
|
"cors": "^2.8.5",
|
|
32
32
|
"cross-env": "^6.0.3",
|
|
33
|
-
"dbgate-datalib": "7.1.
|
|
33
|
+
"dbgate-datalib": "7.1.12",
|
|
34
34
|
"dbgate-query-splitter": "^4.12.0",
|
|
35
|
-
"dbgate-rest": "7.1.
|
|
36
|
-
"dbgate-sqltree": "7.1.
|
|
37
|
-
"dbgate-tools": "7.1.
|
|
35
|
+
"dbgate-rest": "7.1.12",
|
|
36
|
+
"dbgate-sqltree": "7.1.12",
|
|
37
|
+
"dbgate-tools": "7.1.12",
|
|
38
38
|
"debug": "^4.3.4",
|
|
39
39
|
"diff": "^5.0.0",
|
|
40
40
|
"diff2html": "^3.4.13",
|
|
@@ -42,7 +42,6 @@
|
|
|
42
42
|
"express": "^4.17.1",
|
|
43
43
|
"express-basic-auth": "^1.2.0",
|
|
44
44
|
"express-fileupload": "^1.2.0",
|
|
45
|
-
"external-sorting": "^1.3.1",
|
|
46
45
|
"fs-extra": "^9.1.0",
|
|
47
46
|
"fs-reverse": "^0.0.3",
|
|
48
47
|
"get-port": "^5.1.1",
|
|
@@ -81,15 +80,25 @@
|
|
|
81
80
|
"start:singleconn": "env-cmd node src/index.js --server localhost --user root --port 3307 --engine mysql@dbgate-plugin-mysql --password test --listen-api",
|
|
82
81
|
"start:azure": "env-cmd -f env/azure/.env node src/index.js --listen-api",
|
|
83
82
|
"start:e2e:team": "cross-env DEVWEB=1 DEVMODE=1 env-cmd -f ../../e2e-tests/env/team/.env node src/index.js --listen-api",
|
|
83
|
+
"test": "jest",
|
|
84
|
+
"test:ci": "jest --json --outputFile=result.json --testLocationInResults",
|
|
84
85
|
"ts": "tsc",
|
|
85
86
|
"build": "webpack",
|
|
86
87
|
"build:doc": "jsdoc2md --template doctpl.hbs ./src/shell/* > ../../../dbgate.github.io/_docs/apidoc.md"
|
|
87
88
|
},
|
|
89
|
+
"jest": {
|
|
90
|
+
"testMatch": [
|
|
91
|
+
"**/*Test.js",
|
|
92
|
+
"**/*.test.js"
|
|
93
|
+
]
|
|
94
|
+
},
|
|
88
95
|
"devDependencies": {
|
|
89
96
|
"@types/fs-extra": "^9.0.11",
|
|
97
|
+
"@types/jest": "^30.0.0",
|
|
90
98
|
"@types/lodash": "^4.14.149",
|
|
91
|
-
"dbgate-types": "7.1.
|
|
99
|
+
"dbgate-types": "7.1.12",
|
|
92
100
|
"env-cmd": "^10.1.0",
|
|
101
|
+
"jest": "^30.4.2",
|
|
93
102
|
"jsdoc-to-markdown": "^9.0.5",
|
|
94
103
|
"node-loader": "^1.0.2",
|
|
95
104
|
"nodemon": "^2.0.2",
|
|
@@ -264,7 +264,7 @@ module.exports = {
|
|
|
264
264
|
try {
|
|
265
265
|
const fingerprint = await getPublicHardwareFingerprint();
|
|
266
266
|
|
|
267
|
-
const resp = await axios.default.post(
|
|
267
|
+
const resp = await axios.default.post(`https://api.dbgate.cloud/trial-license`, {
|
|
268
268
|
type: 'premium-trial',
|
|
269
269
|
days: 30,
|
|
270
270
|
fingerprint,
|
|
@@ -15,6 +15,7 @@ const {
|
|
|
15
15
|
getLogger,
|
|
16
16
|
extractErrorLogData,
|
|
17
17
|
filterStructureBySchema,
|
|
18
|
+
isCompositeDbName,
|
|
18
19
|
serializeJsTypesForJsonStringify,
|
|
19
20
|
} = require('dbgate-tools');
|
|
20
21
|
const { html, parse } = require('diff2html');
|
|
@@ -194,6 +195,8 @@ module.exports = {
|
|
|
194
195
|
);
|
|
195
196
|
pipeForkLogs(subprocess);
|
|
196
197
|
const lastClosed = this.closed[`${conid}/${database}`];
|
|
198
|
+
const initialStatusName =
|
|
199
|
+
!lastClosed && !(connection.useSeparateSchemas && !isCompositeDbName(database)) ? 'loadStructure' : 'pending';
|
|
197
200
|
const newOpened = {
|
|
198
201
|
conid,
|
|
199
202
|
database,
|
|
@@ -201,7 +204,7 @@ module.exports = {
|
|
|
201
204
|
structure: lastClosed ? lastClosed.structure : DatabaseAnalyser.createEmptyStructure(),
|
|
202
205
|
serverVersion: lastClosed ? lastClosed.serverVersion : null,
|
|
203
206
|
connection,
|
|
204
|
-
status: { name:
|
|
207
|
+
status: { name: initialStatusName },
|
|
205
208
|
};
|
|
206
209
|
this.opened.push(newOpened);
|
|
207
210
|
subprocess.on('message', message => {
|
package/src/currentVersion.js
CHANGED
|
@@ -69,6 +69,16 @@ async function checkedAsyncCall(promise) {
|
|
|
69
69
|
}
|
|
70
70
|
|
|
71
71
|
let loadingModel = false;
|
|
72
|
+
let queuedSyncModelFullRefresh = null;
|
|
73
|
+
|
|
74
|
+
function finishLoadingModel() {
|
|
75
|
+
loadingModel = false;
|
|
76
|
+
if (queuedSyncModelFullRefresh != null) {
|
|
77
|
+
const isFullRefresh = queuedSyncModelFullRefresh;
|
|
78
|
+
queuedSyncModelFullRefresh = null;
|
|
79
|
+
handleSyncModel({ isFullRefresh });
|
|
80
|
+
}
|
|
81
|
+
}
|
|
72
82
|
|
|
73
83
|
async function handleFullRefresh() {
|
|
74
84
|
if (storedConnection.useSeparateSchemas && !isCompositeDbName(dbhan?.database)) {
|
|
@@ -86,7 +96,7 @@ async function handleFullRefresh() {
|
|
|
86
96
|
process.send({ msgtype: 'structureTime', analysedTime });
|
|
87
97
|
setStatusName('ok');
|
|
88
98
|
|
|
89
|
-
|
|
99
|
+
finishLoadingModel();
|
|
90
100
|
resolveAnalysedPromises();
|
|
91
101
|
}
|
|
92
102
|
|
|
@@ -111,12 +121,15 @@ async function handleIncrementalRefresh(forceSend) {
|
|
|
111
121
|
|
|
112
122
|
process.send({ msgtype: 'structureTime', analysedTime });
|
|
113
123
|
setStatusName('ok');
|
|
114
|
-
|
|
124
|
+
finishLoadingModel();
|
|
115
125
|
resolveAnalysedPromises();
|
|
116
126
|
}
|
|
117
127
|
|
|
118
128
|
function handleSyncModel({ isFullRefresh }) {
|
|
119
|
-
if (loadingModel)
|
|
129
|
+
if (loadingModel) {
|
|
130
|
+
queuedSyncModelFullRefresh = queuedSyncModelFullRefresh || !!isFullRefresh;
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
120
133
|
if (isFullRefresh) handleFullRefresh();
|
|
121
134
|
else handleIncrementalRefresh();
|
|
122
135
|
}
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
const crypto = require('crypto');
|
|
2
2
|
const fs = require('fs');
|
|
3
|
-
const os = require('os');
|
|
4
|
-
const rimraf = require('rimraf');
|
|
5
3
|
const path = require('path');
|
|
6
4
|
const AsyncLock = require('async-lock');
|
|
7
5
|
const lock = new AsyncLock();
|
|
8
6
|
const stableStringify = require('json-stable-stringify');
|
|
9
7
|
const { evaluateCondition } = require('dbgate-sqltree');
|
|
10
|
-
const
|
|
8
|
+
const { getLogger, extractErrorLogData } = require('dbgate-tools');
|
|
11
9
|
const { jsldir } = require('./directories');
|
|
12
10
|
const LineReader = require('./LineReader');
|
|
11
|
+
const { sortFile } = require('./externalSort');
|
|
12
|
+
|
|
13
|
+
const logger = getLogger('JsonLinesDatastore');
|
|
13
14
|
|
|
14
15
|
class JsonLinesDatastore {
|
|
15
16
|
constructor(file, formatterFunction) {
|
|
@@ -30,33 +31,7 @@ class JsonLinesDatastore {
|
|
|
30
31
|
}
|
|
31
32
|
|
|
32
33
|
static async sortFile(infile, outfile, sort) {
|
|
33
|
-
|
|
34
|
-
fs.mkdirSync(tempDir);
|
|
35
|
-
|
|
36
|
-
await esort
|
|
37
|
-
.default({
|
|
38
|
-
input: fs.createReadStream(infile),
|
|
39
|
-
output: fs.createWriteStream(outfile),
|
|
40
|
-
deserializer: JSON.parse,
|
|
41
|
-
serializer: JSON.stringify,
|
|
42
|
-
tempDir,
|
|
43
|
-
maxHeap: 100,
|
|
44
|
-
comparer: (a, b) => {
|
|
45
|
-
for (const item of sort) {
|
|
46
|
-
const { uniqueName, order } = item;
|
|
47
|
-
if (a[uniqueName] < b[uniqueName]) {
|
|
48
|
-
return order == 'ASC' ? -1 : 1;
|
|
49
|
-
}
|
|
50
|
-
if (a[uniqueName] > b[uniqueName]) {
|
|
51
|
-
return order == 'ASC' ? 1 : -1;
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
return 0;
|
|
55
|
-
},
|
|
56
|
-
})
|
|
57
|
-
.asc();
|
|
58
|
-
|
|
59
|
-
await new Promise(resolve => rimraf(tempDir, resolve));
|
|
34
|
+
return sortFile(infile, outfile, sort);
|
|
60
35
|
}
|
|
61
36
|
|
|
62
37
|
async _closeReader() {
|
|
@@ -214,8 +189,16 @@ class JsonLinesDatastore {
|
|
|
214
189
|
if (sort && !this.sortedFiles[stableStringify(sort)]) {
|
|
215
190
|
const jslid = crypto.randomUUID();
|
|
216
191
|
const sortedFile = path.join(jsldir(), `${jslid}.jsonl`);
|
|
217
|
-
|
|
218
|
-
|
|
192
|
+
try {
|
|
193
|
+
await JsonLinesDatastore.sortFile(this.file, sortedFile, sort);
|
|
194
|
+
this.sortedFiles[stableStringify(sort)] = sortedFile;
|
|
195
|
+
} catch (e) {
|
|
196
|
+
logger.error(extractErrorLogData(e), 'DBGM-00000 Failed to sort data file, returning unsorted results');
|
|
197
|
+
// Remove any partial output file left by the failed sort so it does
|
|
198
|
+
// not accumulate in jsldir() across repeated failures.
|
|
199
|
+
try { fs.unlinkSync(sortedFile); } catch { /* best-effort */ }
|
|
200
|
+
sort = null;
|
|
201
|
+
}
|
|
219
202
|
}
|
|
220
203
|
await lock.acquire('reader', async () => {
|
|
221
204
|
await this._ensureReader(offset, filter, sort);
|
package/src/utility/authProxy.js
CHANGED
|
@@ -21,7 +21,7 @@ const AI_GATEWAY_URL = process.env.LOCAL_AI_GATEWAY
|
|
|
21
21
|
? 'http://localhost:3110'
|
|
22
22
|
: process.env.DEVWEB || process.env.DEVMODE
|
|
23
23
|
? 'https://aigw.dbgate.udolni.net'
|
|
24
|
-
: 'https://
|
|
24
|
+
: 'https://api.dbgate.cloud';
|
|
25
25
|
|
|
26
26
|
const DBGATE_API_URL = process.env.LOCAL_DBGATE_API
|
|
27
27
|
? 'http://localhost:3115'
|
|
@@ -177,7 +177,7 @@ async function obtainRefreshedLicense() {
|
|
|
177
177
|
|
|
178
178
|
try {
|
|
179
179
|
const respToken = await axios.default.post(
|
|
180
|
-
|
|
180
|
+
`https://api.dbgate.cloud/refresh-license`,
|
|
181
181
|
{},
|
|
182
182
|
{
|
|
183
183
|
headers: {
|
|
@@ -199,7 +199,7 @@ async function obtainRefreshedLicense() {
|
|
|
199
199
|
async function tryToGetRefreshedLicense(oldLicenseKey) {
|
|
200
200
|
try {
|
|
201
201
|
const respToken = await axios.default.post(
|
|
202
|
-
|
|
202
|
+
`https://api.dbgate.cloud/refresh-license`,
|
|
203
203
|
{},
|
|
204
204
|
{
|
|
205
205
|
headers: {
|
package/src/utility/cloudIntf.js
CHANGED
|
@@ -34,7 +34,7 @@ const DBGATE_CLOUD_URL = process.env.LOCAL_DBGATE_CLOUD
|
|
|
34
34
|
: process.env.PROD_DBGATE_CLOUD
|
|
35
35
|
? 'https://cloud.dbgate.io'
|
|
36
36
|
: process.env.DEVWEB || process.env.DEVMODE
|
|
37
|
-
? 'https://
|
|
37
|
+
? 'https://dev.dbgate.cloud'
|
|
38
38
|
: 'https://cloud.dbgate.io';
|
|
39
39
|
|
|
40
40
|
|
|
@@ -297,7 +297,7 @@ async function updatePremiumPromoWidget(language) {
|
|
|
297
297
|
const tags = (await collectCloudFilesSearchTags()).join(',');
|
|
298
298
|
|
|
299
299
|
const resp = await axios.default.get(
|
|
300
|
-
`${
|
|
300
|
+
`${DBGATE_PUBLIC_CLOUD_URL}/premium-promo-widget?identifier=${promoWidgetData?.identifier ?? 'empty'}&tags=${tags}`,
|
|
301
301
|
{
|
|
302
302
|
headers: {
|
|
303
303
|
...getLicenseHttpHeaders(),
|
|
@@ -508,14 +508,14 @@ async function getPromoWidgetData() {
|
|
|
508
508
|
|
|
509
509
|
async function getPromoWidgetPreview(campaign, variant) {
|
|
510
510
|
const resp = await axios.default.get(
|
|
511
|
-
`${
|
|
511
|
+
`${DBGATE_PUBLIC_CLOUD_URL}/premium-promo-widget-preview/${campaign}/${variant}`,
|
|
512
512
|
stageAxiosConfig
|
|
513
513
|
);
|
|
514
514
|
return resp.data;
|
|
515
515
|
}
|
|
516
516
|
|
|
517
517
|
async function getPromoWidgetList() {
|
|
518
|
-
const resp = await axios.default.get(`${
|
|
518
|
+
const resp = await axios.default.get(`${DBGATE_PUBLIC_CLOUD_URL}/promo-widget-list`, stageAxiosConfig);
|
|
519
519
|
return resp.data;
|
|
520
520
|
}
|
|
521
521
|
|
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
const crypto = require('crypto');
|
|
2
|
+
const fs = require('fs');
|
|
3
|
+
const { pipeline } = require('stream');
|
|
4
|
+
const os = require('os');
|
|
5
|
+
const readline = require('readline');
|
|
6
|
+
const path = require('path');
|
|
7
|
+
const { getLogger, extractErrorLogData } = require('dbgate-tools');
|
|
8
|
+
|
|
9
|
+
const logger = getLogger('externalSort');
|
|
10
|
+
|
|
11
|
+
// Number of rows accumulated per sorted temp-chunk during external sort.
|
|
12
|
+
// Capped so that a single chunk never exceeds ~50 MB for typical row sizes.
|
|
13
|
+
const SORT_CHUNK_SIZE = 50_000;
|
|
14
|
+
|
|
15
|
+
// Maximum number of chunk files merged simultaneously in one pass.
|
|
16
|
+
// Limits the number of concurrently open file descriptors during merge.
|
|
17
|
+
const MAX_MERGE_FAN_IN = 16;
|
|
18
|
+
|
|
19
|
+
// Async generator that yields parsed JSON objects from an internally-generated
|
|
20
|
+
// sorted chunk file. Parse errors are thrown immediately — chunk files are
|
|
21
|
+
// always written by this module, so a parse error indicates corruption.
|
|
22
|
+
async function* readChunkLines(file) {
|
|
23
|
+
const stream = fs.createReadStream(file);
|
|
24
|
+
const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
|
|
25
|
+
try {
|
|
26
|
+
for await (const line of rl) {
|
|
27
|
+
if (!line.trim()) continue;
|
|
28
|
+
yield JSON.parse(line); // intentionally throws on bad JSON
|
|
29
|
+
}
|
|
30
|
+
} finally {
|
|
31
|
+
rl.close();
|
|
32
|
+
stream.destroy();
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Cross-device-safe rename: tries renameSync first; on EXDEV (cross-filesystem)
|
|
37
|
+
// falls back to a streaming copy followed by unlinking the source.
|
|
38
|
+
// Uses stream.pipeline() so both streams are destroyed and the partial
|
|
39
|
+
// destination file is removed if an error occurs mid-copy.
|
|
40
|
+
async function safeRename(src, dest) {
|
|
41
|
+
try {
|
|
42
|
+
fs.renameSync(src, dest);
|
|
43
|
+
} catch (e) {
|
|
44
|
+
if (e.code !== 'EXDEV') throw e;
|
|
45
|
+
await new Promise((resolve, reject) => {
|
|
46
|
+
const rs = fs.createReadStream(src);
|
|
47
|
+
const ws = fs.createWriteStream(dest);
|
|
48
|
+
pipeline(rs, ws, err => {
|
|
49
|
+
if (err) {
|
|
50
|
+
try { fs.unlinkSync(dest); } catch { /* best-effort */ }
|
|
51
|
+
reject(err);
|
|
52
|
+
} else {
|
|
53
|
+
resolve();
|
|
54
|
+
}
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
fs.unlinkSync(src);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Write an array of rows to a JSON-lines file, respecting stream backpressure.
|
|
62
|
+
function writeChunkFile(filePath, rows) {
|
|
63
|
+
return new Promise((resolve, reject) => {
|
|
64
|
+
const ws = fs.createWriteStream(filePath);
|
|
65
|
+
ws.on('error', reject);
|
|
66
|
+
ws.on('finish', resolve);
|
|
67
|
+
const writeNext = i => {
|
|
68
|
+
for (; i < rows.length; i++) {
|
|
69
|
+
const ok = ws.write(JSON.stringify(rows[i]) + '\n');
|
|
70
|
+
if (!ok) {
|
|
71
|
+
ws.once('drain', () => writeNext(i + 1));
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
ws.end();
|
|
76
|
+
};
|
|
77
|
+
writeNext(0);
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Min-heap used for k-way merge. Each item stored in the heap is
|
|
82
|
+
// { row: object, iter: AsyncGenerator }.
|
|
83
|
+
class _SortMinHeap {
|
|
84
|
+
constructor(comparator) {
|
|
85
|
+
this._data = [];
|
|
86
|
+
this._cmp = comparator;
|
|
87
|
+
}
|
|
88
|
+
get size() { return this._data.length; }
|
|
89
|
+
push(item) {
|
|
90
|
+
this._data.push(item);
|
|
91
|
+
let i = this._data.length - 1;
|
|
92
|
+
while (i > 0) {
|
|
93
|
+
const p = (i - 1) >> 1;
|
|
94
|
+
if (this._cmp(this._data[i].row, this._data[p].row) < 0) {
|
|
95
|
+
[this._data[i], this._data[p]] = [this._data[p], this._data[i]];
|
|
96
|
+
i = p;
|
|
97
|
+
} else break;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
pop() {
|
|
101
|
+
const top = this._data[0];
|
|
102
|
+
const last = this._data.pop();
|
|
103
|
+
if (this._data.length > 0) {
|
|
104
|
+
this._data[0] = last;
|
|
105
|
+
let i = 0;
|
|
106
|
+
for (;;) {
|
|
107
|
+
const l = 2 * i + 1, r = 2 * i + 2, n = this._data.length;
|
|
108
|
+
let min = i;
|
|
109
|
+
if (l < n && this._cmp(this._data[l].row, this._data[min].row) < 0) min = l;
|
|
110
|
+
if (r < n && this._cmp(this._data[r].row, this._data[min].row) < 0) min = r;
|
|
111
|
+
if (min === i) break;
|
|
112
|
+
[this._data[i], this._data[min]] = [this._data[min], this._data[i]];
|
|
113
|
+
i = min;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
return top;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Merge exactly inputFiles.length (≤ MAX_MERGE_FAN_IN) sorted chunk files into
|
|
121
|
+
// one output file. Opens exactly inputFiles.length file descriptors at once.
|
|
122
|
+
async function mergeBatch(inputFiles, outfile, comparator) {
|
|
123
|
+
const ws = fs.createWriteStream(outfile);
|
|
124
|
+
const iters = inputFiles.map(f => readChunkLines(f));
|
|
125
|
+
|
|
126
|
+
const cleanup = () => {
|
|
127
|
+
ws.destroy();
|
|
128
|
+
for (const it of iters) it.return();
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
try {
|
|
132
|
+
await new Promise((resolve, reject) => {
|
|
133
|
+
ws.on('error', err => { cleanup(); reject(err); });
|
|
134
|
+
|
|
135
|
+
const heap = new _SortMinHeap(comparator);
|
|
136
|
+
let settled = false;
|
|
137
|
+
|
|
138
|
+
const fail = err => {
|
|
139
|
+
if (settled) return;
|
|
140
|
+
settled = true;
|
|
141
|
+
cleanup();
|
|
142
|
+
reject(err);
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
const advance = async iter => {
|
|
146
|
+
const { value, done } = await iter.next();
|
|
147
|
+
if (!done) heap.push({ row: value, iter });
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
const drain = async () => {
|
|
151
|
+
try {
|
|
152
|
+
await Promise.all(iters.map(advance));
|
|
153
|
+
|
|
154
|
+
while (heap.size > 0) {
|
|
155
|
+
if (settled) return;
|
|
156
|
+
const { row, iter } = heap.pop();
|
|
157
|
+
const line = JSON.stringify(row) + '\n';
|
|
158
|
+
const ok = ws.write(line);
|
|
159
|
+
if (!ok) await new Promise(r => ws.once('drain', r));
|
|
160
|
+
await advance(iter);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (settled) return;
|
|
164
|
+
ws.end();
|
|
165
|
+
ws.once('finish', () => { settled = true; resolve(); });
|
|
166
|
+
} catch (e) {
|
|
167
|
+
fail(e);
|
|
168
|
+
}
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
drain();
|
|
172
|
+
});
|
|
173
|
+
} catch (e) {
|
|
174
|
+
// cleanup() was already called inside the Promise; re-throw so callers see the error.
|
|
175
|
+
throw e;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Multi-pass k-way merge. Each pass merges groups of ≤ MAX_MERGE_FAN_IN files,
|
|
180
|
+
// writing intermediates via nextTmpFile() so they are tracked for cleanup.
|
|
181
|
+
// The very last pass writes directly to outfile (no cross-fs rename needed).
|
|
182
|
+
async function multiPassMerge(inputFiles, outfile, comparator, nextTmpFile) {
|
|
183
|
+
let current = inputFiles;
|
|
184
|
+
while (current.length > MAX_MERGE_FAN_IN) {
|
|
185
|
+
const next = [];
|
|
186
|
+
for (let i = 0; i < current.length; i += MAX_MERGE_FAN_IN) {
|
|
187
|
+
const batch = current.slice(i, i + MAX_MERGE_FAN_IN);
|
|
188
|
+
const merged = nextTmpFile();
|
|
189
|
+
next.push(merged);
|
|
190
|
+
await mergeBatch(batch, merged, comparator);
|
|
191
|
+
}
|
|
192
|
+
current = next;
|
|
193
|
+
}
|
|
194
|
+
await mergeBatch(current, outfile, comparator);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
async function sortFile(infile, outfile, sort) {
|
|
198
|
+
const comparator = (a, b) => {
|
|
199
|
+
for (const { uniqueName, order } of sort) {
|
|
200
|
+
const av = a[uniqueName], bv = b[uniqueName];
|
|
201
|
+
if (av < bv) return order === 'ASC' ? -1 : 1;
|
|
202
|
+
if (av > bv) return order === 'ASC' ? 1 : -1;
|
|
203
|
+
}
|
|
204
|
+
return 0;
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
const tmpDir = path.join(os.tmpdir(), `dbgate-sort-${crypto.randomUUID()}`);
|
|
208
|
+
fs.mkdirSync(tmpDir, { recursive: true });
|
|
209
|
+
|
|
210
|
+
// All tmp paths are registered here BEFORE any write attempt so that the
|
|
211
|
+
// finally block can unlink partial files even when a write fails mid-way.
|
|
212
|
+
const createdTmpFiles = new Set();
|
|
213
|
+
let tmpCounter = 0;
|
|
214
|
+
const nextTmpFile = () => {
|
|
215
|
+
const f = path.join(tmpDir, `es-${tmpCounter++}.jsonl`);
|
|
216
|
+
createdTmpFiles.add(f);
|
|
217
|
+
return f;
|
|
218
|
+
};
|
|
219
|
+
|
|
220
|
+
try {
|
|
221
|
+
// ── Phase 1: generate sorted runs ──────────────────────────────────────
|
|
222
|
+
// Read the input line by line; accumulate SORT_CHUNK_SIZE rows, sort
|
|
223
|
+
// them, write to a temp file, then discard the chunk from memory.
|
|
224
|
+
// Peak memory ≈ one chunk (SORT_CHUNK_SIZE rows).
|
|
225
|
+
//
|
|
226
|
+
// The first non-empty line is inspected for __isStreamHeader. If found,
|
|
227
|
+
// it is saved and excluded from sorting so it can be written back as the
|
|
228
|
+
// very first line of the output file.
|
|
229
|
+
let chunk = [];
|
|
230
|
+
const runFiles = [];
|
|
231
|
+
let headerRow = null;
|
|
232
|
+
let isFirstNonEmptyLine = true;
|
|
233
|
+
|
|
234
|
+
const flushChunk = async () => {
|
|
235
|
+
if (chunk.length === 0) return;
|
|
236
|
+
chunk.sort(comparator);
|
|
237
|
+
// Register the path BEFORE writing so the finally block can always
|
|
238
|
+
// clean it up even if writeChunkFile throws partway through.
|
|
239
|
+
const tmpFile = nextTmpFile();
|
|
240
|
+
runFiles.push(tmpFile);
|
|
241
|
+
await writeChunkFile(tmpFile, chunk);
|
|
242
|
+
chunk = [];
|
|
243
|
+
};
|
|
244
|
+
|
|
245
|
+
await new Promise((resolve, reject) => {
|
|
246
|
+
const inputStream = fs.createReadStream(infile);
|
|
247
|
+
const rl = readline.createInterface({ input: inputStream, crlfDelay: Infinity });
|
|
248
|
+
let pendingFlush = Promise.resolve();
|
|
249
|
+
let settled = false;
|
|
250
|
+
|
|
251
|
+
const fail = err => {
|
|
252
|
+
if (settled) return;
|
|
253
|
+
settled = true;
|
|
254
|
+
// Destroy both the readline interface and the underlying stream so
|
|
255
|
+
// no file descriptors are leaked when we reject while paused.
|
|
256
|
+
rl.close();
|
|
257
|
+
inputStream.destroy();
|
|
258
|
+
reject(err);
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
// Attach directly to the stream — readline.Interface does not reliably
|
|
262
|
+
// forward the underlying stream's 'error' event.
|
|
263
|
+
inputStream.on('error', fail);
|
|
264
|
+
rl.on('error', fail);
|
|
265
|
+
|
|
266
|
+
rl.on('line', line => {
|
|
267
|
+
if (!line.trim()) return;
|
|
268
|
+
let parsed;
|
|
269
|
+
try {
|
|
270
|
+
parsed = JSON.parse(line);
|
|
271
|
+
} catch (e) {
|
|
272
|
+
logger.warn(extractErrorLogData(e), 'DBGM-00000 Skipping invalid JSON line during sort');
|
|
273
|
+
return;
|
|
274
|
+
}
|
|
275
|
+
// Detect and capture the stream header; do not include it in the sort.
|
|
276
|
+
if (isFirstNonEmptyLine) {
|
|
277
|
+
isFirstNonEmptyLine = false;
|
|
278
|
+
if (parsed.__isStreamHeader) {
|
|
279
|
+
headerRow = parsed;
|
|
280
|
+
return;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
chunk.push(parsed);
|
|
284
|
+
if (chunk.length >= SORT_CHUNK_SIZE) {
|
|
285
|
+
rl.pause();
|
|
286
|
+
pendingFlush = pendingFlush
|
|
287
|
+
.then(() => flushChunk())
|
|
288
|
+
.then(() => rl.resume())
|
|
289
|
+
.catch(fail);
|
|
290
|
+
}
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
rl.on('close', () => {
|
|
294
|
+
if (settled) return;
|
|
295
|
+
pendingFlush
|
|
296
|
+
.then(() => flushChunk())
|
|
297
|
+
.then(() => { settled = true; resolve(); })
|
|
298
|
+
.catch(fail);
|
|
299
|
+
});
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
// ── Phase 2: k-way streaming merge ─────────────────────────────────────
|
|
303
|
+
if (headerRow !== null) {
|
|
304
|
+
// There is a stream header that must appear as the first line of outfile.
|
|
305
|
+
// Merge all data runs into a single intermediate file (or use the single
|
|
306
|
+
// run directly), then write outfile = header + merged data.
|
|
307
|
+
let mergedDataFile = null;
|
|
308
|
+
if (runFiles.length === 1) {
|
|
309
|
+
mergedDataFile = runFiles[0]; // cleaned up by the finally block
|
|
310
|
+
} else if (runFiles.length > 1) {
|
|
311
|
+
mergedDataFile = nextTmpFile();
|
|
312
|
+
await multiPassMerge(runFiles, mergedDataFile, comparator, nextTmpFile);
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
await new Promise((resolve, reject) => {
|
|
316
|
+
const ws = fs.createWriteStream(outfile);
|
|
317
|
+
let settled = false;
|
|
318
|
+
const fail = err => {
|
|
319
|
+
if (settled) return;
|
|
320
|
+
settled = true;
|
|
321
|
+
ws.destroy();
|
|
322
|
+
try { fs.unlinkSync(outfile); } catch { /* best-effort */ }
|
|
323
|
+
reject(err);
|
|
324
|
+
};
|
|
325
|
+
ws.on('error', fail);
|
|
326
|
+
|
|
327
|
+
const headerLine = JSON.stringify(headerRow) + '\n';
|
|
328
|
+
if (!mergedDataFile) {
|
|
329
|
+
// Header only — no data rows.
|
|
330
|
+
ws.end(headerLine);
|
|
331
|
+
ws.once('finish', () => { settled = true; resolve(); });
|
|
332
|
+
} else {
|
|
333
|
+
ws.write(headerLine, writeErr => {
|
|
334
|
+
if (writeErr) return fail(writeErr);
|
|
335
|
+
const rs = fs.createReadStream(mergedDataFile);
|
|
336
|
+
rs.on('error', fail);
|
|
337
|
+
rs.pipe(ws);
|
|
338
|
+
ws.once('finish', () => { settled = true; resolve(); });
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
});
|
|
342
|
+
} else if (runFiles.length === 0) {
|
|
343
|
+
fs.writeFileSync(outfile, '');
|
|
344
|
+
} else if (runFiles.length === 1) {
|
|
345
|
+
// safeRename handles EXDEV (cross-filesystem) by falling back to
|
|
346
|
+
// stream-copy + unlink, so outfile is always populated correctly.
|
|
347
|
+
await safeRename(runFiles[0], outfile);
|
|
348
|
+
// The file is now at outfile (or already unlinked on EXDEV path),
|
|
349
|
+
// so remove it from the cleanup set to avoid a spurious unlink error.
|
|
350
|
+
createdTmpFiles.delete(runFiles[0]);
|
|
351
|
+
} else {
|
|
352
|
+
// multiPassMerge batches the fan-in to MAX_MERGE_FAN_IN files per pass,
|
|
353
|
+
// bounding the number of concurrently open file descriptors.
|
|
354
|
+
// Intermediate files are allocated via nextTmpFile() so they are always
|
|
355
|
+
// tracked in createdTmpFiles for cleanup.
|
|
356
|
+
await multiPassMerge(runFiles, outfile, comparator, nextTmpFile);
|
|
357
|
+
}
|
|
358
|
+
} finally {
|
|
359
|
+
for (const f of createdTmpFiles) {
|
|
360
|
+
try { fs.unlinkSync(f); } catch { /* best-effort cleanup */ }
|
|
361
|
+
}
|
|
362
|
+
try { fs.rmdirSync(tmpDir); } catch { /* best-effort cleanup */ }
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
module.exports = { sortFile };
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const os = require('os');
|
|
5
|
+
const path = require('path');
|
|
6
|
+
const { sortFile } = require('./externalSort');
|
|
7
|
+
|
|
8
|
+
// ── helpers ──────────────────────────────────────────────────────────────────
|
|
9
|
+
|
|
10
|
+
function tmpFile() {
|
|
11
|
+
return path.join(os.tmpdir(), `externalSortTest-${Date.now()}-${Math.random().toString(36).slice(2)}.jsonl`);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function writeJsonl(filePath, rows) {
|
|
15
|
+
const content = rows.map(r => JSON.stringify(r)).join('\n');
|
|
16
|
+
fs.writeFileSync(filePath, rows.length ? content + '\n' : '');
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function readJsonl(filePath) {
|
|
20
|
+
return fs
|
|
21
|
+
.readFileSync(filePath, 'utf8')
|
|
22
|
+
.split('\n')
|
|
23
|
+
.filter(l => l.trim())
|
|
24
|
+
.map(l => JSON.parse(l));
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
async function withTmpPair(fn) {
|
|
28
|
+
const inFile = tmpFile();
|
|
29
|
+
const outFile = tmpFile();
|
|
30
|
+
try {
|
|
31
|
+
await fn(inFile, outFile);
|
|
32
|
+
} finally {
|
|
33
|
+
for (const f of [inFile, outFile]) {
|
|
34
|
+
try { fs.unlinkSync(f); } catch { /* best-effort */ }
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ── tests ─────────────────────────────────────────────────────────────────────
|
|
40
|
+
|
|
41
|
+
describe('externalSort', () => {
|
|
42
|
+
|
|
43
|
+
// ── Basic sorts ────────────────────────────────────────────────────────────
|
|
44
|
+
|
|
45
|
+
test('sorts numbers ASC', async () => {
|
|
46
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
47
|
+
writeJsonl(inFile, [{ id: 3 }, { id: 1 }, { id: 2 }]);
|
|
48
|
+
await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'ASC' }]);
|
|
49
|
+
expect(readJsonl(outFile).map(r => r.id)).toEqual([1, 2, 3]);
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test('sorts numbers DESC', async () => {
|
|
54
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
55
|
+
writeJsonl(inFile, [{ id: 1 }, { id: 3 }, { id: 2 }]);
|
|
56
|
+
await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'DESC' }]);
|
|
57
|
+
expect(readJsonl(outFile).map(r => r.id)).toEqual([3, 2, 1]);
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test('sorts strings ASC', async () => {
|
|
62
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
63
|
+
writeJsonl(inFile, [{ name: 'Zebra' }, { name: 'Apple' }, { name: 'Mango' }]);
|
|
64
|
+
await sortFile(inFile, outFile, [{ uniqueName: 'name', order: 'ASC' }]);
|
|
65
|
+
expect(readJsonl(outFile).map(r => r.name)).toEqual(['Apple', 'Mango', 'Zebra']);
|
|
66
|
+
});
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
test('sorts strings DESC', async () => {
|
|
70
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
71
|
+
writeJsonl(inFile, [{ name: 'Zebra' }, { name: 'Apple' }, { name: 'Mango' }]);
|
|
72
|
+
await sortFile(inFile, outFile, [{ uniqueName: 'name', order: 'DESC' }]);
|
|
73
|
+
expect(readJsonl(outFile).map(r => r.name)).toEqual(['Zebra', 'Mango', 'Apple']);
|
|
74
|
+
});
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
// ── Multi-field sort ────────────────────────────────────────────────────────
|
|
78
|
+
|
|
79
|
+
test('sorts by multiple fields (primary ASC, secondary DESC)', async () => {
|
|
80
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
81
|
+
writeJsonl(inFile, [
|
|
82
|
+
{ category: 'B', value: 1 },
|
|
83
|
+
{ category: 'A', value: 2 },
|
|
84
|
+
{ category: 'A', value: 5 },
|
|
85
|
+
{ category: 'B', value: 3 },
|
|
86
|
+
]);
|
|
87
|
+
await sortFile(inFile, outFile, [
|
|
88
|
+
{ uniqueName: 'category', order: 'ASC' },
|
|
89
|
+
{ uniqueName: 'value', order: 'DESC' },
|
|
90
|
+
]);
|
|
91
|
+
const rows = readJsonl(outFile);
|
|
92
|
+
expect(rows.map(r => [r.category, r.value])).toEqual([['A', 5], ['A', 2], ['B', 3], ['B', 1]]);
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
// ── With stream header ──────────────────────────────────────────────────────
|
|
97
|
+
|
|
98
|
+
test('with header: header is first row, data rows are sorted', async () => {
|
|
99
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
100
|
+
writeJsonl(inFile, [
|
|
101
|
+
{ __isStreamHeader: true, title: 'dataset' },
|
|
102
|
+
{ id: 3, val: 'c' },
|
|
103
|
+
{ id: 1, val: 'a' },
|
|
104
|
+
{ id: 2, val: 'b' },
|
|
105
|
+
]);
|
|
106
|
+
await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'ASC' }]);
|
|
107
|
+
const rows = readJsonl(outFile);
|
|
108
|
+
expect(rows[0].__isStreamHeader).toBe(true);
|
|
109
|
+
expect(rows.slice(1).map(r => r.id)).toEqual([1, 2, 3]);
|
|
110
|
+
});
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
test('with header: header fields are preserved intact', async () => {
|
|
114
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
115
|
+
writeJsonl(inFile, [
|
|
116
|
+
{ __isStreamHeader: true, columns: ['a', 'b'], extra: 42 },
|
|
117
|
+
{ a: 2, b: 'x' },
|
|
118
|
+
{ a: 1, b: 'y' },
|
|
119
|
+
]);
|
|
120
|
+
await sortFile(inFile, outFile, [{ uniqueName: 'a', order: 'ASC' }]);
|
|
121
|
+
const rows = readJsonl(outFile);
|
|
122
|
+
expect(rows[0]).toEqual({ __isStreamHeader: true, columns: ['a', 'b'], extra: 42 });
|
|
123
|
+
});
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test('with header + multi-field sort', async () => {
|
|
127
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
128
|
+
writeJsonl(inFile, [
|
|
129
|
+
{ __isStreamHeader: true },
|
|
130
|
+
{ dept: 'HR', salary: 50000 },
|
|
131
|
+
{ dept: 'IT', salary: 80000 },
|
|
132
|
+
{ dept: 'HR', salary: 70000 },
|
|
133
|
+
{ dept: 'IT', salary: 60000 },
|
|
134
|
+
]);
|
|
135
|
+
await sortFile(inFile, outFile, [
|
|
136
|
+
{ uniqueName: 'dept', order: 'ASC' },
|
|
137
|
+
{ uniqueName: 'salary', order: 'DESC' },
|
|
138
|
+
]);
|
|
139
|
+
const rows = readJsonl(outFile);
|
|
140
|
+
expect(rows[0].__isStreamHeader).toBe(true);
|
|
141
|
+
expect(rows.slice(1).map(r => [r.dept, r.salary])).toEqual([
|
|
142
|
+
['HR', 70000],
|
|
143
|
+
['HR', 50000],
|
|
144
|
+
['IT', 80000],
|
|
145
|
+
['IT', 60000],
|
|
146
|
+
]);
|
|
147
|
+
});
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
// ── Without stream header ───────────────────────────────────────────────────
|
|
151
|
+
|
|
152
|
+
test('without header: all rows sorted, no header injected', async () => {
|
|
153
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
154
|
+
writeJsonl(inFile, [{ x: 30 }, { x: 10 }, { x: 20 }]);
|
|
155
|
+
await sortFile(inFile, outFile, [{ uniqueName: 'x', order: 'ASC' }]);
|
|
156
|
+
const rows = readJsonl(outFile);
|
|
157
|
+
expect(rows.length).toBe(3);
|
|
158
|
+
expect(rows[0].__isStreamHeader).toBeFalsy();
|
|
159
|
+
expect(rows.map(r => r.x)).toEqual([10, 20, 30]);
|
|
160
|
+
});
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
// ── Edge cases ───────────────────────────────────────────────────────────────
|
|
164
|
+
|
|
165
|
+
test('edge: empty file produces empty output', async () => {
|
|
166
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
167
|
+
fs.writeFileSync(inFile, '');
|
|
168
|
+
await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'ASC' }]);
|
|
169
|
+
expect(fs.readFileSync(outFile, 'utf8')).toBe('');
|
|
170
|
+
});
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
test('edge: single data row is passed through unchanged', async () => {
|
|
174
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
175
|
+
writeJsonl(inFile, [{ id: 42, msg: 'only one' }]);
|
|
176
|
+
await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'ASC' }]);
|
|
177
|
+
const rows = readJsonl(outFile);
|
|
178
|
+
expect(rows.length).toBe(1);
|
|
179
|
+
expect(rows[0].id).toBe(42);
|
|
180
|
+
});
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
test('edge: header-only file (no data rows) produces only header', async () => {
|
|
184
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
185
|
+
writeJsonl(inFile, [{ __isStreamHeader: true, columns: ['a', 'b'] }]);
|
|
186
|
+
await sortFile(inFile, outFile, [{ uniqueName: 'a', order: 'ASC' }]);
|
|
187
|
+
const rows = readJsonl(outFile);
|
|
188
|
+
expect(rows.length).toBe(1);
|
|
189
|
+
expect(rows[0].__isStreamHeader).toBe(true);
|
|
190
|
+
});
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
test('edge: all rows have equal sort key — all rows preserved', async () => {
|
|
194
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
195
|
+
writeJsonl(inFile, [
|
|
196
|
+
{ score: 10, label: 'a' },
|
|
197
|
+
{ score: 10, label: 'b' },
|
|
198
|
+
{ score: 10, label: 'c' },
|
|
199
|
+
]);
|
|
200
|
+
await sortFile(inFile, outFile, [{ uniqueName: 'score', order: 'ASC' }]);
|
|
201
|
+
const rows = readJsonl(outFile);
|
|
202
|
+
expect(rows.length).toBe(3);
|
|
203
|
+
expect(rows.every(r => r.score === 10)).toBe(true);
|
|
204
|
+
});
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
test('edge: invalid JSON lines are skipped gracefully', async () => {
|
|
208
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
209
|
+
fs.writeFileSync(inFile, '{"id":3}\nNOT_JSON\n{"id":1}\n{"id":2}\n');
|
|
210
|
+
await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'ASC' }]);
|
|
211
|
+
const rows = readJsonl(outFile);
|
|
212
|
+
expect(rows.map(r => r.id)).toEqual([1, 2, 3]);
|
|
213
|
+
});
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
test('edge: blank lines in input are ignored', async () => {
|
|
217
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
218
|
+
fs.writeFileSync(inFile, '{"id":2}\n\n{"id":1}\n\n');
|
|
219
|
+
await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'ASC' }]);
|
|
220
|
+
const rows = readJsonl(outFile);
|
|
221
|
+
expect(rows.map(r => r.id)).toEqual([1, 2]);
|
|
222
|
+
});
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
test('edge: rows missing sort key field are included in output', async () => {
|
|
226
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
227
|
+
writeJsonl(inFile, [
|
|
228
|
+
{ id: 2, name: 'Bob' },
|
|
229
|
+
{ name: 'Alice' }, // no id field
|
|
230
|
+
{ id: 1, name: 'Charlie' },
|
|
231
|
+
]);
|
|
232
|
+
await sortFile(inFile, outFile, [{ uniqueName: 'id', order: 'ASC' }]);
|
|
233
|
+
const rows = readJsonl(outFile);
|
|
234
|
+
expect(rows.length).toBe(3);
|
|
235
|
+
expect(rows.find(r => r.name === 'Alice')).toBeTruthy();
|
|
236
|
+
expect(rows.find(r => r.id === 1)).toBeTruthy();
|
|
237
|
+
expect(rows.find(r => r.id === 2)).toBeTruthy();
|
|
238
|
+
});
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
test('edge: unicode values are preserved correctly', async () => {
|
|
242
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
243
|
+
writeJsonl(inFile, [
|
|
244
|
+
{ word: 'über' },
|
|
245
|
+
{ word: 'apple' },
|
|
246
|
+
{ word: 'éclair' },
|
|
247
|
+
]);
|
|
248
|
+
await sortFile(inFile, outFile, [{ uniqueName: 'word', order: 'ASC' }]);
|
|
249
|
+
const rows = readJsonl(outFile);
|
|
250
|
+
expect(rows.length).toBe(3);
|
|
251
|
+
const words = new Set(rows.map(r => r.word));
|
|
252
|
+
expect(words.has('über')).toBe(true);
|
|
253
|
+
expect(words.has('apple')).toBe(true);
|
|
254
|
+
expect(words.has('éclair')).toBe(true);
|
|
255
|
+
});
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
test('edge: two rows with same primary key, different secondary key', async () => {
|
|
259
|
+
await withTmpPair(async (inFile, outFile) => {
|
|
260
|
+
writeJsonl(inFile, [
|
|
261
|
+
{ pk: 'a', sk: 2 },
|
|
262
|
+
{ pk: 'b', sk: 1 },
|
|
263
|
+
{ pk: 'a', sk: 1 },
|
|
264
|
+
]);
|
|
265
|
+
await sortFile(inFile, outFile, [
|
|
266
|
+
{ uniqueName: 'pk', order: 'ASC' },
|
|
267
|
+
{ uniqueName: 'sk', order: 'ASC' },
|
|
268
|
+
]);
|
|
269
|
+
const rows = readJsonl(outFile);
|
|
270
|
+
expect(rows.map(r => [r.pk, r.sk])).toEqual([['a', 1], ['a', 2], ['b', 1]]);
|
|
271
|
+
});
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
});
|
|
275
|
+
|