sfmc-dataloader 2.6.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/async-status.mjs +11 -10
- package/lib/batch.mjs +2 -2
- package/lib/business-units.mjs +12 -1
- package/lib/cli.mjs +178 -86
- package/lib/config.mjs +31 -2
- package/lib/cross-bu-import.mjs +133 -61
- package/lib/export-de.mjs +196 -16
- package/lib/file-resolve.mjs +71 -0
- package/lib/filename.mjs +25 -5
- package/lib/import-de.mjs +160 -37
- package/lib/init-project.mjs +2 -1
- package/lib/log.mjs +56 -0
- package/lib/multi-bu-export.mjs +8 -3
- package/lib/read-rows.mjs +156 -27
- package/lib/row-count.mjs +2 -1
- package/package.json +2 -2
package/lib/config.mjs
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import fs from 'node:fs';
|
|
2
2
|
import path from 'node:path';
|
|
3
|
+
import { log } from './log.mjs';
|
|
3
4
|
|
|
4
5
|
export const FILE_MCDEV_RC = '.mcdevrc.json';
|
|
5
6
|
export const FILE_MCDEV_AUTH = '.mcdev-auth.json';
|
|
@@ -39,7 +40,7 @@ export const WARN_MCDATA_SUPERSEDED =
|
|
|
39
40
|
* @returns {{ mcdevrc: Mcdevrc, mcdevAuth: Record<string, AuthCredential> }}
|
|
40
41
|
*/
|
|
41
42
|
export function loadProjectConfig(projectRoot, options = {}) {
|
|
42
|
-
const err = options.stderr ?? ((msg) =>
|
|
43
|
+
const err = options.stderr ?? ((msg) => log.error(msg));
|
|
43
44
|
const rcMcdev = path.join(projectRoot, FILE_MCDEV_RC);
|
|
44
45
|
const authMcdev = path.join(projectRoot, FILE_MCDEV_AUTH);
|
|
45
46
|
const rcMcdata = path.join(projectRoot, FILE_MCDATA_RC);
|
|
@@ -170,9 +171,37 @@ export function buildSdkAuthObject(authCred, mid) {
|
|
|
170
171
|
*/
|
|
171
172
|
export function buildSdkOptions(logger = null) {
|
|
172
173
|
/** @type {import('sfmc-sdk').SdkOptions} */
|
|
173
|
-
const options = {
|
|
174
|
+
const options = {
|
|
175
|
+
requestAttempts: 3,
|
|
176
|
+
retryOnConnectionError: true,
|
|
177
|
+
eventHandlers: {
|
|
178
|
+
onLoop: (_type, accumulator, context) => {
|
|
179
|
+
if (context) {
|
|
180
|
+
log.info(
|
|
181
|
+
`Downloading batch ${context.nextPage} of ${context.totalPages} (${context.accumulatedCount} records so far)`,
|
|
182
|
+
);
|
|
183
|
+
} else {
|
|
184
|
+
log.info(
|
|
185
|
+
`Downloading next batch (currently ${accumulator?.length ?? 0} records)`,
|
|
186
|
+
);
|
|
187
|
+
}
|
|
188
|
+
},
|
|
189
|
+
onConnectionError: (ex, remainingAttempts) => {
|
|
190
|
+
const endpointStr = ex.endpoint ? String(ex.endpoint) : '';
|
|
191
|
+
const endpointSuffix = endpointStr
|
|
192
|
+
? ` - ${endpointStr.split('rest.marketingcloudapis.com')[1] ?? endpointStr}`
|
|
193
|
+
: '';
|
|
194
|
+
log.warn(
|
|
195
|
+
`Connection problem (Code: ${ex.code}). Retrying ${remainingAttempts} time${
|
|
196
|
+
remainingAttempts > 1 ? 's' : ''
|
|
197
|
+
}${endpointSuffix}`,
|
|
198
|
+
);
|
|
199
|
+
},
|
|
200
|
+
},
|
|
201
|
+
};
|
|
174
202
|
if (logger) {
|
|
175
203
|
options.eventHandlers = {
|
|
204
|
+
...options.eventHandlers,
|
|
176
205
|
logRequest: (req) => {
|
|
177
206
|
const msg = structuredClone(req);
|
|
178
207
|
if (msg.headers?.Authorization) {
|
package/lib/cross-bu-import.mjs
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
import { createReadStream, createWriteStream } from 'node:fs';
|
|
1
2
|
import fs from 'node:fs/promises';
|
|
2
3
|
import path from 'node:path';
|
|
4
|
+
import { finished, pipeline } from 'node:stream/promises';
|
|
3
5
|
import readline from 'node:readline/promises';
|
|
4
6
|
import { stdin as input, stdout as output } from 'node:process';
|
|
5
7
|
import SDK from 'sfmc-sdk';
|
|
@@ -10,15 +12,42 @@ import {
|
|
|
10
12
|
serializeRows,
|
|
11
13
|
exportDataExtensionToFile,
|
|
12
14
|
} from './export-de.mjs';
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
15
|
+
import { MAX_OBJECTS_PER_BATCH } from './batch.mjs';
|
|
16
|
+
import { formatFromExtension, resolveImportSet } from './file-resolve.mjs';
|
|
17
|
+
import {
|
|
18
|
+
assertNonEmptyImportRowCount,
|
|
19
|
+
importRowsForDe,
|
|
20
|
+
importRowsStreamingForDe,
|
|
21
|
+
warnIfImportCountUnexpected,
|
|
22
|
+
} from './import-de.mjs';
|
|
15
23
|
import { pollAsyncImportCompletion } from './async-status.mjs';
|
|
16
|
-
import {
|
|
24
|
+
import {
|
|
25
|
+
countDataRowsFromImportPaths,
|
|
26
|
+
readRowsFromImportPaths,
|
|
27
|
+
streamRowsFromImportPaths,
|
|
28
|
+
} from './read-rows.mjs';
|
|
17
29
|
import { clearDataExtensionRows } from './clear-de.mjs';
|
|
18
30
|
import { confirmClearBeforeImport } from './confirm-clear.mjs';
|
|
19
31
|
import { dataDirectoryForBu } from './paths.mjs';
|
|
20
32
|
import { buildExportBasename, filesystemSafeTimestamp, parseExportBasename } from './filename.mjs';
|
|
21
33
|
import { getDeRowCount } from './row-count.mjs';
|
|
34
|
+
import { log } from './log.mjs';
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Concatenates source files into a single snapshot on disk (streaming; bounded memory).
|
|
38
|
+
*
|
|
39
|
+
* @param {string} destPath
|
|
40
|
+
* @param {string[]} srcPaths
|
|
41
|
+
* @returns {Promise.<void>}
|
|
42
|
+
*/
|
|
43
|
+
async function concatenateFilesToPath(destPath, srcPaths) {
|
|
44
|
+
const out = createWriteStream(destPath);
|
|
45
|
+
for (const src of srcPaths) {
|
|
46
|
+
await pipeline(createReadStream(src), out, { end: false });
|
|
47
|
+
}
|
|
48
|
+
out.end();
|
|
49
|
+
await finished(out);
|
|
50
|
+
}
|
|
22
51
|
|
|
23
52
|
/**
|
|
24
53
|
* @typedef {{ credential: string, bu: string }} CredBuTarget
|
|
@@ -111,20 +140,24 @@ export async function crossBuImport(params) {
|
|
|
111
140
|
const filePaths = params.filePaths ?? null;
|
|
112
141
|
const isFileBased = filePaths !== null && filePaths.length > 0;
|
|
113
142
|
|
|
114
|
-
// Derive DE keys: from explicit list (API mode) or from filenames (file mode)
|
|
143
|
+
// Derive DE keys: from explicit list (API mode) or from filenames (file mode, first-seen order)
|
|
144
|
+
/** @type {string[]} */
|
|
115
145
|
const deKeys = isFileBased
|
|
116
|
-
?
|
|
146
|
+
? (() => {
|
|
147
|
+
/** @type {string[]} */
|
|
148
|
+
const keys = [];
|
|
149
|
+
const seen = new Set();
|
|
150
|
+
for (const fp of filePaths) {
|
|
151
|
+
const k = parseExportBasename(path.basename(fp)).customerKey;
|
|
152
|
+
if (!seen.has(k)) {
|
|
153
|
+
seen.add(k);
|
|
154
|
+
keys.push(k);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
return keys;
|
|
158
|
+
})()
|
|
117
159
|
: (params.deKeys ?? []);
|
|
118
160
|
|
|
119
|
-
// Build a lookup map from deKey → filePath for file mode
|
|
120
|
-
/** @type {Map<string, string>} */
|
|
121
|
-
const fileByDeKey = new Map();
|
|
122
|
-
if (isFileBased) {
|
|
123
|
-
for (const fp of filePaths) {
|
|
124
|
-
fileByDeKey.set(parseExportBasename(path.basename(fp)).customerKey, fp);
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
|
|
128
161
|
// Validate all target BU configurations upfront
|
|
129
162
|
for (const { credential, bu } of targets) {
|
|
130
163
|
resolveCredentialAndMid(mcdevrc, mcdevAuth, credential, bu);
|
|
@@ -156,7 +189,7 @@ export async function crossBuImport(params) {
|
|
|
156
189
|
const { mid, authCred } = resolveCredentialAndMid(mcdevrc, mcdevAuth, credential, bu);
|
|
157
190
|
const tgtSdk = new SDK(buildSdkAuthObject(authCred, mid), buildSdkOptions(logger));
|
|
158
191
|
for (const deKey of deKeys) {
|
|
159
|
-
const {
|
|
192
|
+
const { paths: outPaths, rowCount } = await exportDataExtensionToFile(tgtSdk, {
|
|
160
193
|
projectRoot,
|
|
161
194
|
credentialName: credential,
|
|
162
195
|
buName: bu,
|
|
@@ -164,7 +197,8 @@ export async function crossBuImport(params) {
|
|
|
164
197
|
format,
|
|
165
198
|
useGit: false,
|
|
166
199
|
});
|
|
167
|
-
|
|
200
|
+
const label = outPaths.map((p) => `"${path.resolve(p)}"`).join(', ');
|
|
201
|
+
log.info(`Backup export: ${label} (${rowCount} rows)`);
|
|
168
202
|
}
|
|
169
203
|
}
|
|
170
204
|
}
|
|
@@ -176,35 +210,51 @@ export async function crossBuImport(params) {
|
|
|
176
210
|
|
|
177
211
|
let hasError = false;
|
|
178
212
|
|
|
179
|
-
// Load rows once per DE then fan out to every target
|
|
213
|
+
// Load rows once per DE then fan out to every target (CSV/TSV file mode streams to avoid OOM)
|
|
180
214
|
for (const deKey of deKeys) {
|
|
181
|
-
|
|
215
|
+
/** @type {object[]|null} */
|
|
216
|
+
let rows = null;
|
|
217
|
+
/** @type {string[]|null} */
|
|
218
|
+
let streamingImportPaths = null;
|
|
219
|
+
/** @type {'csv'|'tsv'|'json'|null} */
|
|
220
|
+
let streamingDetectedFormat = null;
|
|
221
|
+
|
|
182
222
|
if (isFileBased) {
|
|
183
|
-
const
|
|
184
|
-
|
|
223
|
+
const groupPaths = filePaths.filter(
|
|
224
|
+
(fp) => parseExportBasename(path.basename(fp)).customerKey === deKey,
|
|
225
|
+
);
|
|
226
|
+
const { paths: importPaths } = await resolveImportSet(groupPaths);
|
|
227
|
+
if (importPaths.length === 0) {
|
|
228
|
+
throw new Error(`No resolvable import files for DE "${deKey}".`);
|
|
229
|
+
}
|
|
230
|
+
const detectedFormat = formatFromExtension(importPaths[0]);
|
|
185
231
|
if (!detectedFormat) {
|
|
186
232
|
throw new Error(
|
|
187
|
-
`Cannot determine format for file: ${
|
|
233
|
+
`Cannot determine format for file: ${importPaths[0]}. Use .csv, .tsv, or .json extension.`,
|
|
188
234
|
);
|
|
189
235
|
}
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
236
|
+
if (detectedFormat === 'csv' || detectedFormat === 'tsv') {
|
|
237
|
+
streamingImportPaths = importPaths;
|
|
238
|
+
streamingDetectedFormat = detectedFormat;
|
|
239
|
+
} else {
|
|
240
|
+
rows = await readRowsFromImportPaths(importPaths, detectedFormat);
|
|
241
|
+
if (rows.length === 0) {
|
|
242
|
+
throw new Error(
|
|
243
|
+
`Import files contain no data rows for DE "${deKey}". ` +
|
|
244
|
+
`The files may be empty, contain only a BOM, or contain only a header row.`,
|
|
245
|
+
);
|
|
246
|
+
}
|
|
197
247
|
}
|
|
198
248
|
} else {
|
|
199
249
|
rows = await fetchAllRowObjects(srcSdk, deKey);
|
|
200
250
|
}
|
|
201
251
|
|
|
202
252
|
let snapshotColumns = [];
|
|
203
|
-
if (rows.length === 0 && format !== 'json') {
|
|
253
|
+
if (rows && rows.length === 0 && format !== 'json') {
|
|
204
254
|
try {
|
|
205
255
|
snapshotColumns = await fetchDataExtensionFieldNames(srcSdk.soap, deKey);
|
|
206
256
|
} catch (ex) {
|
|
207
|
-
|
|
257
|
+
log.warn(
|
|
208
258
|
`Warning: could not retrieve field names for empty DE "${deKey}" (snapshot): ${ex.message}`,
|
|
209
259
|
);
|
|
210
260
|
}
|
|
@@ -215,19 +265,21 @@ export async function crossBuImport(params) {
|
|
|
215
265
|
const tgtSdk = new SDK(buildSdkAuthObject(authCred, mid), buildSdkOptions(logger));
|
|
216
266
|
|
|
217
267
|
const countBefore = await getDeRowCount(tgtSdk, deKey);
|
|
218
|
-
|
|
268
|
+
log.info(
|
|
219
269
|
`Row count before import: ${countBefore ?? '(unavailable)'} (${credential}/${bu} DE "${deKey}")`,
|
|
220
270
|
);
|
|
221
271
|
|
|
222
272
|
// Clear target before import (already confirmed above); skip if DE is empty
|
|
273
|
+
let clearedTargetDe = false;
|
|
223
274
|
if (clearBeforeImport) {
|
|
224
275
|
if (countBefore === 0) {
|
|
225
|
-
|
|
276
|
+
log.info(
|
|
226
277
|
`Skipping clear-data for ${credential}/${bu} DE "${deKey}" — DE is already empty.`,
|
|
227
278
|
);
|
|
228
279
|
} else {
|
|
229
280
|
await clearDataExtensionRows(tgtSdk.soap, deKey);
|
|
230
|
-
|
|
281
|
+
clearedTargetDe = true;
|
|
282
|
+
log.warn(`Cleared data: ${credential}/${bu} DE "${deKey}"`);
|
|
231
283
|
}
|
|
232
284
|
}
|
|
233
285
|
|
|
@@ -237,19 +289,46 @@ export async function crossBuImport(params) {
|
|
|
237
289
|
const ts = filesystemSafeTimestamp();
|
|
238
290
|
const basename = buildExportBasename(deKey, ts, format, false);
|
|
239
291
|
const snapshotPath = path.join(dir, basename);
|
|
240
|
-
await fs.writeFile(
|
|
241
|
-
snapshotPath,
|
|
242
|
-
serializeRows(rows, format, false, snapshotColumns),
|
|
243
|
-
'utf8',
|
|
244
|
-
);
|
|
245
|
-
console.error(`Download stored: "${path.resolve(snapshotPath)}" (${rows.length} rows)`);
|
|
246
292
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
293
|
+
/** @type {{ count: number, requestIds: (string|null)[] }} */
|
|
294
|
+
let importResult;
|
|
295
|
+
if (streamingImportPaths && streamingDetectedFormat) {
|
|
296
|
+
await concatenateFilesToPath(snapshotPath, streamingImportPaths);
|
|
297
|
+
log.info(`Download stored: "${path.resolve(snapshotPath)}"`);
|
|
298
|
+
const rowCount = await countDataRowsFromImportPaths(
|
|
299
|
+
streamingImportPaths,
|
|
300
|
+
streamingDetectedFormat,
|
|
301
|
+
);
|
|
302
|
+
assertNonEmptyImportRowCount(rowCount, streamingImportPaths.join(', '));
|
|
303
|
+
const totalMemoryBatches = Math.max(1, Math.ceil(rowCount / MAX_OBJECTS_PER_BATCH));
|
|
304
|
+
const rowSource = streamRowsFromImportPaths(
|
|
305
|
+
streamingImportPaths,
|
|
306
|
+
streamingDetectedFormat,
|
|
307
|
+
);
|
|
308
|
+
importResult = await importRowsStreamingForDe(tgtSdk, {
|
|
309
|
+
deKey,
|
|
310
|
+
rowSource,
|
|
311
|
+
mode,
|
|
312
|
+
totalMemoryBatches,
|
|
313
|
+
});
|
|
314
|
+
} else {
|
|
315
|
+
await fs.writeFile(
|
|
316
|
+
snapshotPath,
|
|
317
|
+
serializeRows(/** @type {object[]} */ (rows), format, false, snapshotColumns),
|
|
318
|
+
'utf8',
|
|
319
|
+
);
|
|
320
|
+
log.info(
|
|
321
|
+
`Download stored: "${path.resolve(snapshotPath)}" (${/** @type {object[]} */ (rows).length} rows)`,
|
|
322
|
+
);
|
|
323
|
+
importResult = await importRowsForDe(tgtSdk, {
|
|
324
|
+
deKey,
|
|
325
|
+
rows: /** @type {object[]} */ (rows),
|
|
326
|
+
mode,
|
|
327
|
+
});
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
const { count: imported, requestIds } = importResult;
|
|
331
|
+
log.info(`Imported: ${credential}/${bu} DE ${deKey} (${imported} rows)`);
|
|
253
332
|
|
|
254
333
|
const importHadError = await pollAsyncImportCompletion(tgtSdk, requestIds);
|
|
255
334
|
if (importHadError) {
|
|
@@ -257,24 +336,17 @@ export async function crossBuImport(params) {
|
|
|
257
336
|
}
|
|
258
337
|
|
|
259
338
|
const countAfter = await getDeRowCount(tgtSdk, deKey);
|
|
260
|
-
|
|
339
|
+
log.info(
|
|
261
340
|
`Row count after import: ${countAfter ?? '(unavailable)'} (${credential}/${bu} DE "${deKey}")`,
|
|
262
341
|
);
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
: imported;
|
|
272
|
-
if (countAfter < expected) {
|
|
273
|
-
console.error(
|
|
274
|
-
`Import result for ${credential}/${bu} DE "${deKey}" looks unexpected: expected at least ${expected} rows, got ${countAfter}.`,
|
|
275
|
-
);
|
|
276
|
-
}
|
|
277
|
-
}
|
|
342
|
+
warnIfImportCountUnexpected({
|
|
343
|
+
countBefore,
|
|
344
|
+
cleared: clearedTargetDe,
|
|
345
|
+
countAfter,
|
|
346
|
+
imported,
|
|
347
|
+
mode,
|
|
348
|
+
label: `${credential}/${bu} DE "${deKey}"`,
|
|
349
|
+
});
|
|
278
350
|
}
|
|
279
351
|
}
|
|
280
352
|
|
package/lib/export-de.mjs
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
|
+
import { createWriteStream } from 'node:fs';
|
|
1
2
|
import fs from 'node:fs/promises';
|
|
2
3
|
import path from 'node:path';
|
|
3
|
-
import {
|
|
4
|
+
import { finished } from 'node:stream/promises';
|
|
5
|
+
import { stringify } from 'csv-stringify';
|
|
6
|
+
import { stringify as stringifySync } from 'csv-stringify/sync';
|
|
4
7
|
import { rowsetGetPath } from './import-routes.mjs';
|
|
5
8
|
import { buildExportBasename, filesystemSafeTimestamp } from './filename.mjs';
|
|
6
9
|
import { dataDirectoryForBu } from './paths.mjs';
|
|
10
|
+
import { log } from './log.mjs';
|
|
7
11
|
|
|
8
12
|
/**
|
|
9
13
|
* @param {{rest: {getBulk: (path: string, pageSize?: number) => Promise.<any>}}} sdk
|
|
@@ -76,11 +80,11 @@ export function serializeRows(rows, format, jsonPretty, columns = []) {
|
|
|
76
80
|
if (rows.length === 0 && columns.length > 0) {
|
|
77
81
|
options.columns = columns;
|
|
78
82
|
}
|
|
79
|
-
return
|
|
83
|
+
return stringifySync(rows, options);
|
|
80
84
|
}
|
|
81
85
|
|
|
82
86
|
/**
|
|
83
|
-
* @param {{ rest:
|
|
87
|
+
* @param {{ rest: object, soap: { retrieve: Function } }} sdk
|
|
84
88
|
* @param {object} params
|
|
85
89
|
* @param {string} params.projectRoot
|
|
86
90
|
* @param {string} params.credentialName
|
|
@@ -89,7 +93,8 @@ export function serializeRows(rows, format, jsonPretty, columns = []) {
|
|
|
89
93
|
* @param {'csv'|'tsv'|'json'} params.format
|
|
90
94
|
* @param {boolean} [params.jsonPretty]
|
|
91
95
|
* @param {boolean} [params.useGit]
|
|
92
|
-
* @
|
|
96
|
+
* @param {number} [params.maxRowsPerFile] - split output into part files with at most this many data rows each
|
|
97
|
+
* @returns {Promise.<{paths: string[], rowCount: number}>}
|
|
93
98
|
*/
|
|
94
99
|
export async function exportDataExtensionToFile(sdk, params) {
|
|
95
100
|
const {
|
|
@@ -100,24 +105,199 @@ export async function exportDataExtensionToFile(sdk, params) {
|
|
|
100
105
|
format,
|
|
101
106
|
jsonPretty = false,
|
|
102
107
|
useGit = false,
|
|
108
|
+
maxRowsPerFile,
|
|
103
109
|
} = params;
|
|
104
|
-
const
|
|
105
|
-
|
|
106
|
-
|
|
110
|
+
const dir = dataDirectoryForBu(projectRoot, credentialName, buName);
|
|
111
|
+
await fs.mkdir(dir, { recursive: true });
|
|
112
|
+
const ts = filesystemSafeTimestamp();
|
|
113
|
+
const basePath = rowsetGetPath(deKey);
|
|
114
|
+
const cap =
|
|
115
|
+
typeof maxRowsPerFile === 'number' && maxRowsPerFile > 0 ? maxRowsPerFile : undefined;
|
|
116
|
+
|
|
117
|
+
/** @type {string[]} */
|
|
118
|
+
const paths = [];
|
|
119
|
+
let totalRows = 0;
|
|
120
|
+
|
|
121
|
+
if (format === 'json') {
|
|
122
|
+
let partIndex = 0;
|
|
123
|
+
/** @type {import('node:fs').WriteStream|null} */
|
|
124
|
+
let writeStream = null;
|
|
125
|
+
let rowsInPart = 0;
|
|
126
|
+
let firstInArray = true;
|
|
127
|
+
|
|
128
|
+
const closeJsonFile = async () => {
|
|
129
|
+
if (writeStream) {
|
|
130
|
+
writeStream.write('\n]\n');
|
|
131
|
+
writeStream.end();
|
|
132
|
+
await finished(writeStream);
|
|
133
|
+
writeStream = null;
|
|
134
|
+
}
|
|
135
|
+
firstInArray = true;
|
|
136
|
+
rowsInPart = 0;
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
const openJsonPart = async () => {
|
|
140
|
+
await closeJsonFile();
|
|
141
|
+
partIndex++;
|
|
142
|
+
const basename = cap
|
|
143
|
+
? buildExportBasename(deKey, ts, format, useGit, partIndex)
|
|
144
|
+
: buildExportBasename(deKey, ts, format, useGit);
|
|
145
|
+
const outPath = path.join(dir, basename);
|
|
146
|
+
paths.push(outPath);
|
|
147
|
+
writeStream = createWriteStream(outPath, { encoding: 'utf8' });
|
|
148
|
+
writeStream.write('[\n');
|
|
149
|
+
firstInArray = true;
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
try {
|
|
153
|
+
for await (const step of sdk.rest.getBulkPages(basePath, 2500)) {
|
|
154
|
+
if (step.totalPages === undefined) {
|
|
155
|
+
log.info(`Downloading next batch (currently ${totalRows} records)`);
|
|
156
|
+
} else {
|
|
157
|
+
log.info(
|
|
158
|
+
`Downloading batch ${step.page} of ${step.totalPages} (${totalRows} records so far)`,
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
for (const item of step.pageItems) {
|
|
162
|
+
const row = { ...item.keys, ...item.values };
|
|
163
|
+
if (writeStream === null) {
|
|
164
|
+
await openJsonPart();
|
|
165
|
+
}
|
|
166
|
+
if (cap && rowsInPart >= cap) {
|
|
167
|
+
await openJsonPart();
|
|
168
|
+
}
|
|
169
|
+
if (firstInArray) {
|
|
170
|
+
firstInArray = false;
|
|
171
|
+
} else {
|
|
172
|
+
writeStream.write(',\n');
|
|
173
|
+
}
|
|
174
|
+
const chunk = jsonPretty ? JSON.stringify(row, null, 2) : JSON.stringify(row);
|
|
175
|
+
writeStream.write(chunk);
|
|
176
|
+
rowsInPart++;
|
|
177
|
+
totalRows++;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
} catch (ex) {
|
|
181
|
+
if (ex.message !== 'Could not find an array to iterate over') {
|
|
182
|
+
throw ex;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if (writeStream) {
|
|
187
|
+
await closeJsonFile();
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if (paths.length === 0) {
|
|
191
|
+
let columns = [];
|
|
192
|
+
try {
|
|
193
|
+
columns = await fetchDataExtensionFieldNames(sdk.soap, deKey);
|
|
194
|
+
} catch (ex) {
|
|
195
|
+
log.warn(
|
|
196
|
+
`Warning: could not retrieve field names for empty DE "${deKey}": ${ex.message}`,
|
|
197
|
+
);
|
|
198
|
+
}
|
|
199
|
+
const basename = buildExportBasename(deKey, ts, format, useGit);
|
|
200
|
+
const outPath = path.join(dir, basename);
|
|
201
|
+
const body = serializeRows([], format, jsonPretty, columns);
|
|
202
|
+
await fs.writeFile(outPath, body, 'utf8');
|
|
203
|
+
paths.push(outPath);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return { paths, rowCount: totalRows };
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/** @type {string[]|null} */
|
|
210
|
+
let columnNames = null;
|
|
211
|
+
let partIndex = 0;
|
|
212
|
+
/** @type {import('stream').Transform|null} */
|
|
213
|
+
let stringifier = null;
|
|
214
|
+
/** @type {import('node:fs').WriteStream|null} */
|
|
215
|
+
let writeStream = null;
|
|
216
|
+
let rowsInPart = 0;
|
|
217
|
+
let isFirstCsvFile = true;
|
|
218
|
+
|
|
219
|
+
const closeCsvPart = async () => {
|
|
220
|
+
if (stringifier && writeStream) {
|
|
221
|
+
stringifier.end();
|
|
222
|
+
await finished(writeStream);
|
|
223
|
+
}
|
|
224
|
+
stringifier = null;
|
|
225
|
+
writeStream = null;
|
|
226
|
+
rowsInPart = 0;
|
|
227
|
+
};
|
|
228
|
+
|
|
229
|
+
const openCsvPart = async () => {
|
|
230
|
+
await closeCsvPart();
|
|
231
|
+
partIndex++;
|
|
232
|
+
const basename = cap
|
|
233
|
+
? buildExportBasename(deKey, ts, format, useGit, partIndex)
|
|
234
|
+
: buildExportBasename(deKey, ts, format, useGit);
|
|
235
|
+
const outPath = path.join(dir, basename);
|
|
236
|
+
paths.push(outPath);
|
|
237
|
+
writeStream = createWriteStream(outPath, { encoding: 'utf8' });
|
|
238
|
+
const includeHeader = isFirstCsvFile;
|
|
239
|
+
isFirstCsvFile = false;
|
|
240
|
+
stringifier = stringify({
|
|
241
|
+
header: includeHeader,
|
|
242
|
+
bom: includeHeader,
|
|
243
|
+
quoted: format === 'csv',
|
|
244
|
+
delimiter: format === 'tsv' ? '\t' : ',',
|
|
245
|
+
...(columnNames && columnNames.length > 0 ? { columns: columnNames } : {}),
|
|
246
|
+
});
|
|
247
|
+
stringifier.pipe(writeStream);
|
|
248
|
+
};
|
|
249
|
+
|
|
250
|
+
try {
|
|
251
|
+
for await (const step of sdk.rest.getBulkPages(basePath, 2500)) {
|
|
252
|
+
if (step.totalPages === undefined) {
|
|
253
|
+
log.info(`Downloading next batch (currently ${totalRows} records)`);
|
|
254
|
+
} else {
|
|
255
|
+
log.info(
|
|
256
|
+
`Downloading batch ${step.page} of ${step.totalPages} (${totalRows} records so far)`,
|
|
257
|
+
);
|
|
258
|
+
}
|
|
259
|
+
for (const item of step.pageItems) {
|
|
260
|
+
const row = { ...item.keys, ...item.values };
|
|
261
|
+
if (columnNames === null && Object.keys(row).length > 0) {
|
|
262
|
+
columnNames = Object.keys(row);
|
|
263
|
+
}
|
|
264
|
+
if (writeStream === null) {
|
|
265
|
+
await openCsvPart();
|
|
266
|
+
}
|
|
267
|
+
if (cap && rowsInPart >= cap) {
|
|
268
|
+
await openCsvPart();
|
|
269
|
+
}
|
|
270
|
+
stringifier.write(row);
|
|
271
|
+
rowsInPart++;
|
|
272
|
+
totalRows++;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
} catch (ex) {
|
|
276
|
+
if (ex.message !== 'Could not find an array to iterate over') {
|
|
277
|
+
throw ex;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
if (writeStream) {
|
|
282
|
+
await closeCsvPart();
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
if (paths.length === 0) {
|
|
286
|
+
let columns = [];
|
|
107
287
|
try {
|
|
108
288
|
columns = await fetchDataExtensionFieldNames(sdk.soap, deKey);
|
|
109
289
|
} catch (ex) {
|
|
110
|
-
|
|
290
|
+
log.warn(
|
|
111
291
|
`Warning: could not retrieve field names for empty DE "${deKey}": ${ex.message}`,
|
|
112
292
|
);
|
|
113
293
|
}
|
|
294
|
+
columnNames = columns.length > 0 ? columns : null;
|
|
295
|
+
await openCsvPart();
|
|
296
|
+
stringifier.end();
|
|
297
|
+
await finished(writeStream);
|
|
298
|
+
stringifier = null;
|
|
299
|
+
writeStream = null;
|
|
114
300
|
}
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
const ts = filesystemSafeTimestamp();
|
|
118
|
-
const basename = buildExportBasename(deKey, ts, format, useGit);
|
|
119
|
-
const outPath = path.join(dir, basename);
|
|
120
|
-
const body = serializeRows(rows, format, jsonPretty, columns);
|
|
121
|
-
await fs.writeFile(outPath, body, 'utf8');
|
|
122
|
-
return { path: outPath, rowCount: rows.length };
|
|
301
|
+
|
|
302
|
+
return { paths, rowCount: totalRows };
|
|
123
303
|
}
|
package/lib/file-resolve.mjs
CHANGED
|
@@ -2,6 +2,77 @@ import fs from 'node:fs/promises';
|
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { parseExportBasename } from './filename.mjs';
|
|
4
4
|
|
|
5
|
+
/**
|
|
6
|
+
* @typedef {{ path: string, partNumber: number|null, mtime: number }} CandidateEntry
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Group multi-part exports (`part1`, `part2`, … sharing the same key and timestamp) and pick
|
|
11
|
+
* the newest export run. Returns ordered paths (part 1, 2, …) or a single file path.
|
|
12
|
+
*
|
|
13
|
+
* @param {string[]} candidatePaths - from {@link findImportCandidates}
|
|
14
|
+
* @returns {Promise.<{ paths: string[], isMultiPart: boolean }>}
|
|
15
|
+
*/
|
|
16
|
+
export async function resolveImportSet(candidatePaths) {
|
|
17
|
+
/** @type {Map<string, CandidateEntry[]>} */
|
|
18
|
+
const groups = new Map();
|
|
19
|
+
for (const filePath of candidatePaths) {
|
|
20
|
+
const name = path.basename(filePath);
|
|
21
|
+
let parsed;
|
|
22
|
+
try {
|
|
23
|
+
parsed = parseExportBasename(name);
|
|
24
|
+
} catch {
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
const { customerKey, timestampPart, ext, partNumber } = parsed;
|
|
28
|
+
const groupKey = `${customerKey}\0${timestampPart}\0${ext}`;
|
|
29
|
+
let st;
|
|
30
|
+
try {
|
|
31
|
+
st = await fs.stat(filePath);
|
|
32
|
+
} catch {
|
|
33
|
+
continue;
|
|
34
|
+
}
|
|
35
|
+
const entry = {
|
|
36
|
+
path: filePath,
|
|
37
|
+
partNumber: partNumber === undefined ? null : partNumber,
|
|
38
|
+
mtime: st.mtimeMs,
|
|
39
|
+
};
|
|
40
|
+
const list = groups.get(groupKey);
|
|
41
|
+
if (list) {
|
|
42
|
+
list.push(entry);
|
|
43
|
+
} else {
|
|
44
|
+
groups.set(groupKey, [entry]);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
if (groups.size === 0) {
|
|
48
|
+
return { paths: [], isMultiPart: false };
|
|
49
|
+
}
|
|
50
|
+
let bestEntries = /** @type {CandidateEntry[]|null} */ (null);
|
|
51
|
+
let bestMaxMtime = -1;
|
|
52
|
+
for (const entries of groups.values()) {
|
|
53
|
+
const maxM = Math.max(...entries.map((e) => e.mtime));
|
|
54
|
+
if (maxM > bestMaxMtime) {
|
|
55
|
+
bestMaxMtime = maxM;
|
|
56
|
+
bestEntries = entries;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
if (!bestEntries || bestEntries.length === 0) {
|
|
60
|
+
return { paths: [], isMultiPart: false };
|
|
61
|
+
}
|
|
62
|
+
const hasParts = bestEntries.some((e) => e.partNumber !== null);
|
|
63
|
+
if (hasParts) {
|
|
64
|
+
const sorted = [...bestEntries].toSorted(
|
|
65
|
+
(a, b) => (a.partNumber ?? 0) - (b.partNumber ?? 0),
|
|
66
|
+
);
|
|
67
|
+
return {
|
|
68
|
+
paths: sorted.map((e) => e.path),
|
|
69
|
+
isMultiPart: sorted.length > 1,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
const one = bestEntries.reduce((a, b) => (a.mtime >= b.mtime ? a : b));
|
|
73
|
+
return { paths: [one.path], isMultiPart: false };
|
|
74
|
+
}
|
|
75
|
+
|
|
5
76
|
/** Supported import/export file extensions */
|
|
6
77
|
const SUPPORTED_EXTENSIONS = ['csv', 'tsv', 'json'];
|
|
7
78
|
|