sfmc-dataloader 2.6.0 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/config.mjs CHANGED
@@ -1,5 +1,6 @@
1
1
  import fs from 'node:fs';
2
2
  import path from 'node:path';
3
+ import { log } from './log.mjs';
3
4
 
4
5
  export const FILE_MCDEV_RC = '.mcdevrc.json';
5
6
  export const FILE_MCDEV_AUTH = '.mcdev-auth.json';
@@ -39,7 +40,7 @@ export const WARN_MCDATA_SUPERSEDED =
39
40
  * @returns {{ mcdevrc: Mcdevrc, mcdevAuth: Record<string, AuthCredential> }}
40
41
  */
41
42
  export function loadProjectConfig(projectRoot, options = {}) {
42
- const err = options.stderr ?? ((msg) => console.error(msg));
43
+ const err = options.stderr ?? ((msg) => log.error(msg));
43
44
  const rcMcdev = path.join(projectRoot, FILE_MCDEV_RC);
44
45
  const authMcdev = path.join(projectRoot, FILE_MCDEV_AUTH);
45
46
  const rcMcdata = path.join(projectRoot, FILE_MCDATA_RC);
@@ -170,9 +171,37 @@ export function buildSdkAuthObject(authCred, mid) {
170
171
  */
171
172
  export function buildSdkOptions(logger = null) {
172
173
  /** @type {import('sfmc-sdk').SdkOptions} */
173
- const options = { requestAttempts: 3 };
174
+ const options = {
175
+ requestAttempts: 3,
176
+ retryOnConnectionError: true,
177
+ eventHandlers: {
178
+ onLoop: (_type, accumulator, context) => {
179
+ if (context) {
180
+ log.info(
181
+ `Downloading batch ${context.nextPage} of ${context.totalPages} (${context.accumulatedCount} records so far)`,
182
+ );
183
+ } else {
184
+ log.info(
185
+ `Downloading next batch (currently ${accumulator?.length ?? 0} records)`,
186
+ );
187
+ }
188
+ },
189
+ onConnectionError: (ex, remainingAttempts) => {
190
+ const endpointStr = ex.endpoint ? String(ex.endpoint) : '';
191
+ const endpointSuffix = endpointStr
192
+ ? ` - ${endpointStr.split('rest.marketingcloudapis.com')[1] ?? endpointStr}`
193
+ : '';
194
+ log.warn(
195
+ `Connection problem (Code: ${ex.code}). Retrying ${remainingAttempts} time${
196
+ remainingAttempts > 1 ? 's' : ''
197
+ }${endpointSuffix}`,
198
+ );
199
+ },
200
+ },
201
+ };
174
202
  if (logger) {
175
203
  options.eventHandlers = {
204
+ ...options.eventHandlers,
176
205
  logRequest: (req) => {
177
206
  const msg = structuredClone(req);
178
207
  if (msg.headers?.Authorization) {
@@ -1,5 +1,7 @@
1
+ import { createReadStream, createWriteStream } from 'node:fs';
1
2
  import fs from 'node:fs/promises';
2
3
  import path from 'node:path';
4
+ import { finished, pipeline } from 'node:stream/promises';
3
5
  import readline from 'node:readline/promises';
4
6
  import { stdin as input, stdout as output } from 'node:process';
5
7
  import SDK from 'sfmc-sdk';
@@ -10,15 +12,42 @@ import {
10
12
  serializeRows,
11
13
  exportDataExtensionToFile,
12
14
  } from './export-de.mjs';
13
- import { formatFromExtension } from './file-resolve.mjs';
14
- import { importRowsForDe } from './import-de.mjs';
15
+ import { MAX_OBJECTS_PER_BATCH } from './batch.mjs';
16
+ import { formatFromExtension, resolveImportSet } from './file-resolve.mjs';
17
+ import {
18
+ assertNonEmptyImportRowCount,
19
+ importRowsForDe,
20
+ importRowsStreamingForDe,
21
+ warnIfImportCountUnexpected,
22
+ } from './import-de.mjs';
15
23
  import { pollAsyncImportCompletion } from './async-status.mjs';
16
- import { readRowsFromFile } from './read-rows.mjs';
24
+ import {
25
+ countDataRowsFromImportPaths,
26
+ readRowsFromImportPaths,
27
+ streamRowsFromImportPaths,
28
+ } from './read-rows.mjs';
17
29
  import { clearDataExtensionRows } from './clear-de.mjs';
18
30
  import { confirmClearBeforeImport } from './confirm-clear.mjs';
19
31
  import { dataDirectoryForBu } from './paths.mjs';
20
32
  import { buildExportBasename, filesystemSafeTimestamp, parseExportBasename } from './filename.mjs';
21
33
  import { getDeRowCount } from './row-count.mjs';
34
+ import { log } from './log.mjs';
35
+
36
+ /**
37
+ * Concatenates source files into a single snapshot on disk (streaming; bounded memory).
38
+ *
39
+ * @param {string} destPath
40
+ * @param {string[]} srcPaths
41
+ * @returns {Promise.<void>}
42
+ */
43
+ async function concatenateFilesToPath(destPath, srcPaths) {
44
+ const out = createWriteStream(destPath);
45
+ for (const src of srcPaths) {
46
+ await pipeline(createReadStream(src), out, { end: false });
47
+ }
48
+ out.end();
49
+ await finished(out);
50
+ }
22
51
 
23
52
  /**
24
53
  * @typedef {{ credential: string, bu: string }} CredBuTarget
@@ -111,20 +140,24 @@ export async function crossBuImport(params) {
111
140
  const filePaths = params.filePaths ?? null;
112
141
  const isFileBased = filePaths !== null && filePaths.length > 0;
113
142
 
114
- // Derive DE keys: from explicit list (API mode) or from filenames (file mode)
143
+ // Derive DE keys: from explicit list (API mode) or from filenames (file mode, first-seen order)
144
+ /** @type {string[]} */
115
145
  const deKeys = isFileBased
116
- ? filePaths.map((fp) => parseExportBasename(path.basename(fp)).customerKey)
146
+ ? (() => {
147
+ /** @type {string[]} */
148
+ const keys = [];
149
+ const seen = new Set();
150
+ for (const fp of filePaths) {
151
+ const k = parseExportBasename(path.basename(fp)).customerKey;
152
+ if (!seen.has(k)) {
153
+ seen.add(k);
154
+ keys.push(k);
155
+ }
156
+ }
157
+ return keys;
158
+ })()
117
159
  : (params.deKeys ?? []);
118
160
 
119
- // Build a lookup map from deKey → filePath for file mode
120
- /** @type {Map<string, string>} */
121
- const fileByDeKey = new Map();
122
- if (isFileBased) {
123
- for (const fp of filePaths) {
124
- fileByDeKey.set(parseExportBasename(path.basename(fp)).customerKey, fp);
125
- }
126
- }
127
-
128
161
  // Validate all target BU configurations upfront
129
162
  for (const { credential, bu } of targets) {
130
163
  resolveCredentialAndMid(mcdevrc, mcdevAuth, credential, bu);
@@ -156,7 +189,7 @@ export async function crossBuImport(params) {
156
189
  const { mid, authCred } = resolveCredentialAndMid(mcdevrc, mcdevAuth, credential, bu);
157
190
  const tgtSdk = new SDK(buildSdkAuthObject(authCred, mid), buildSdkOptions(logger));
158
191
  for (const deKey of deKeys) {
159
- const { path: outPath, rowCount } = await exportDataExtensionToFile(tgtSdk, {
192
+ const { paths: outPaths, rowCount } = await exportDataExtensionToFile(tgtSdk, {
160
193
  projectRoot,
161
194
  credentialName: credential,
162
195
  buName: bu,
@@ -164,7 +197,8 @@ export async function crossBuImport(params) {
164
197
  format,
165
198
  useGit: false,
166
199
  });
167
- console.error(`Backup export: "${path.resolve(outPath)}" (${rowCount} rows)`);
200
+ const label = outPaths.map((p) => `"${path.resolve(p)}"`).join(', ');
201
+ log.info(`Backup export: ${label} (${rowCount} rows)`);
168
202
  }
169
203
  }
170
204
  }
@@ -176,35 +210,51 @@ export async function crossBuImport(params) {
176
210
 
177
211
  let hasError = false;
178
212
 
179
- // Load rows once per DE then fan out to every target
213
+ // Load rows once per DE then fan out to every target (CSV/TSV file mode streams to avoid OOM)
180
214
  for (const deKey of deKeys) {
181
- let rows;
215
+ /** @type {object[]|null} */
216
+ let rows = null;
217
+ /** @type {string[]|null} */
218
+ let streamingImportPaths = null;
219
+ /** @type {'csv'|'tsv'|'json'|null} */
220
+ let streamingDetectedFormat = null;
221
+
182
222
  if (isFileBased) {
183
- const filePath = fileByDeKey.get(deKey);
184
- const detectedFormat = formatFromExtension(filePath);
223
+ const groupPaths = filePaths.filter(
224
+ (fp) => parseExportBasename(path.basename(fp)).customerKey === deKey,
225
+ );
226
+ const { paths: importPaths } = await resolveImportSet(groupPaths);
227
+ if (importPaths.length === 0) {
228
+ throw new Error(`No resolvable import files for DE "${deKey}".`);
229
+ }
230
+ const detectedFormat = formatFromExtension(importPaths[0]);
185
231
  if (!detectedFormat) {
186
232
  throw new Error(
187
- `Cannot determine format for file: ${filePath}. Use .csv, .tsv, or .json extension.`,
233
+ `Cannot determine format for file: ${importPaths[0]}. Use .csv, .tsv, or .json extension.`,
188
234
  );
189
235
  }
190
- rows = await readRowsFromFile(filePath, detectedFormat);
191
- if (rows.length === 0) {
192
- throw new Error(
193
- `Import file contains no data rows: "${filePath}". ` +
194
- `The file may be empty, contain only a BOM, or contain only a header row. ` +
195
- `Export the DE first to obtain a template with column names, then add rows.`,
196
- );
236
+ if (detectedFormat === 'csv' || detectedFormat === 'tsv') {
237
+ streamingImportPaths = importPaths;
238
+ streamingDetectedFormat = detectedFormat;
239
+ } else {
240
+ rows = await readRowsFromImportPaths(importPaths, detectedFormat);
241
+ if (rows.length === 0) {
242
+ throw new Error(
243
+ `Import files contain no data rows for DE "${deKey}". ` +
244
+ `The files may be empty, contain only a BOM, or contain only a header row.`,
245
+ );
246
+ }
197
247
  }
198
248
  } else {
199
249
  rows = await fetchAllRowObjects(srcSdk, deKey);
200
250
  }
201
251
 
202
252
  let snapshotColumns = [];
203
- if (rows.length === 0 && format !== 'json') {
253
+ if (rows && rows.length === 0 && format !== 'json') {
204
254
  try {
205
255
  snapshotColumns = await fetchDataExtensionFieldNames(srcSdk.soap, deKey);
206
256
  } catch (ex) {
207
- console.error(
257
+ log.warn(
208
258
  `Warning: could not retrieve field names for empty DE "${deKey}" (snapshot): ${ex.message}`,
209
259
  );
210
260
  }
@@ -215,19 +265,21 @@ export async function crossBuImport(params) {
215
265
  const tgtSdk = new SDK(buildSdkAuthObject(authCred, mid), buildSdkOptions(logger));
216
266
 
217
267
  const countBefore = await getDeRowCount(tgtSdk, deKey);
218
- console.error(
268
+ log.info(
219
269
  `Row count before import: ${countBefore ?? '(unavailable)'} (${credential}/${bu} DE "${deKey}")`,
220
270
  );
221
271
 
222
272
  // Clear target before import (already confirmed above); skip if DE is empty
273
+ let clearedTargetDe = false;
223
274
  if (clearBeforeImport) {
224
275
  if (countBefore === 0) {
225
- console.error(
276
+ log.info(
226
277
  `Skipping clear-data for ${credential}/${bu} DE "${deKey}" — DE is already empty.`,
227
278
  );
228
279
  } else {
229
280
  await clearDataExtensionRows(tgtSdk.soap, deKey);
230
- console.warn(`Cleared data: ${credential}/${bu} DE "${deKey}"`);
281
+ clearedTargetDe = true;
282
+ log.warn(`Cleared data: ${credential}/${bu} DE "${deKey}"`);
231
283
  }
232
284
  }
233
285
 
@@ -237,19 +289,46 @@ export async function crossBuImport(params) {
237
289
  const ts = filesystemSafeTimestamp();
238
290
  const basename = buildExportBasename(deKey, ts, format, false);
239
291
  const snapshotPath = path.join(dir, basename);
240
- await fs.writeFile(
241
- snapshotPath,
242
- serializeRows(rows, format, false, snapshotColumns),
243
- 'utf8',
244
- );
245
- console.error(`Download stored: "${path.resolve(snapshotPath)}" (${rows.length} rows)`);
246
292
 
247
- const { count: imported, requestIds } = await importRowsForDe(tgtSdk, {
248
- deKey,
249
- rows,
250
- mode,
251
- });
252
- console.error(`Imported: ${credential}/${bu} DE ${deKey} (${imported} rows)`);
293
+ /** @type {{ count: number, requestIds: (string|null)[] }} */
294
+ let importResult;
295
+ if (streamingImportPaths && streamingDetectedFormat) {
296
+ await concatenateFilesToPath(snapshotPath, streamingImportPaths);
297
+ log.info(`Download stored: "${path.resolve(snapshotPath)}"`);
298
+ const rowCount = await countDataRowsFromImportPaths(
299
+ streamingImportPaths,
300
+ streamingDetectedFormat,
301
+ );
302
+ assertNonEmptyImportRowCount(rowCount, streamingImportPaths.join(', '));
303
+ const totalMemoryBatches = Math.max(1, Math.ceil(rowCount / MAX_OBJECTS_PER_BATCH));
304
+ const rowSource = streamRowsFromImportPaths(
305
+ streamingImportPaths,
306
+ streamingDetectedFormat,
307
+ );
308
+ importResult = await importRowsStreamingForDe(tgtSdk, {
309
+ deKey,
310
+ rowSource,
311
+ mode,
312
+ totalMemoryBatches,
313
+ });
314
+ } else {
315
+ await fs.writeFile(
316
+ snapshotPath,
317
+ serializeRows(/** @type {object[]} */ (rows), format, false, snapshotColumns),
318
+ 'utf8',
319
+ );
320
+ log.info(
321
+ `Download stored: "${path.resolve(snapshotPath)}" (${/** @type {object[]} */ (rows).length} rows)`,
322
+ );
323
+ importResult = await importRowsForDe(tgtSdk, {
324
+ deKey,
325
+ rows: /** @type {object[]} */ (rows),
326
+ mode,
327
+ });
328
+ }
329
+
330
+ const { count: imported, requestIds } = importResult;
331
+ log.info(`Imported: ${credential}/${bu} DE ${deKey} (${imported} rows)`);
253
332
 
254
333
  const importHadError = await pollAsyncImportCompletion(tgtSdk, requestIds);
255
334
  if (importHadError) {
@@ -257,24 +336,17 @@ export async function crossBuImport(params) {
257
336
  }
258
337
 
259
338
  const countAfter = await getDeRowCount(tgtSdk, deKey);
260
- console.error(
339
+ log.info(
261
340
  `Row count after import: ${countAfter ?? '(unavailable)'} (${credential}/${bu} DE "${deKey}")`,
262
341
  );
263
- if (countAfter === null) {
264
- console.error(
265
- `Could not verify import result for ${credential}/${bu} DE "${deKey}".`,
266
- );
267
- } else {
268
- const expected =
269
- mode === 'insert' || countBefore === 0
270
- ? (countBefore ?? 0) + imported
271
- : imported;
272
- if (countAfter < expected) {
273
- console.error(
274
- `Import result for ${credential}/${bu} DE "${deKey}" looks unexpected: expected at least ${expected} rows, got ${countAfter}.`,
275
- );
276
- }
277
- }
342
+ warnIfImportCountUnexpected({
343
+ countBefore,
344
+ cleared: clearedTargetDe,
345
+ countAfter,
346
+ imported,
347
+ mode,
348
+ label: `${credential}/${bu} DE "${deKey}"`,
349
+ });
278
350
  }
279
351
  }
280
352
 
package/lib/export-de.mjs CHANGED
@@ -1,9 +1,13 @@
1
+ import { createWriteStream } from 'node:fs';
1
2
  import fs from 'node:fs/promises';
2
3
  import path from 'node:path';
3
- import { stringify } from 'csv-stringify/sync';
4
+ import { finished } from 'node:stream/promises';
5
+ import { stringify } from 'csv-stringify';
6
+ import { stringify as stringifySync } from 'csv-stringify/sync';
4
7
  import { rowsetGetPath } from './import-routes.mjs';
5
8
  import { buildExportBasename, filesystemSafeTimestamp } from './filename.mjs';
6
9
  import { dataDirectoryForBu } from './paths.mjs';
10
+ import { log } from './log.mjs';
7
11
 
8
12
  /**
9
13
  * @param {{rest: {getBulk: (path: string, pageSize?: number) => Promise.<any>}}} sdk
@@ -76,11 +80,11 @@ export function serializeRows(rows, format, jsonPretty, columns = []) {
76
80
  if (rows.length === 0 && columns.length > 0) {
77
81
  options.columns = columns;
78
82
  }
79
- return stringify(rows, options);
83
+ return stringifySync(rows, options);
80
84
  }
81
85
 
82
86
  /**
83
- * @param {{ rest: { getBulk: (path: string, pageSize?: number) => Promise.<any> }, soap: { retrieve: Function } }} sdk
87
+ * @param {{ rest: object, soap: { retrieve: Function } }} sdk
84
88
  * @param {object} params
85
89
  * @param {string} params.projectRoot
86
90
  * @param {string} params.credentialName
@@ -89,7 +93,8 @@ export function serializeRows(rows, format, jsonPretty, columns = []) {
89
93
  * @param {'csv'|'tsv'|'json'} params.format
90
94
  * @param {boolean} [params.jsonPretty]
91
95
  * @param {boolean} [params.useGit]
92
- * @returns {Promise.<{path: string, rowCount: number}>}
96
+ * @param {number} [params.maxRowsPerFile] - split output into part files with at most this many data rows each
97
+ * @returns {Promise.<{paths: string[], rowCount: number}>}
93
98
  */
94
99
  export async function exportDataExtensionToFile(sdk, params) {
95
100
  const {
@@ -100,24 +105,199 @@ export async function exportDataExtensionToFile(sdk, params) {
100
105
  format,
101
106
  jsonPretty = false,
102
107
  useGit = false,
108
+ maxRowsPerFile,
103
109
  } = params;
104
- const rows = await fetchAllRowObjects(sdk, deKey);
105
- let columns = [];
106
- if (rows.length === 0 && format !== 'json') {
110
+ const dir = dataDirectoryForBu(projectRoot, credentialName, buName);
111
+ await fs.mkdir(dir, { recursive: true });
112
+ const ts = filesystemSafeTimestamp();
113
+ const basePath = rowsetGetPath(deKey);
114
+ const cap =
115
+ typeof maxRowsPerFile === 'number' && maxRowsPerFile > 0 ? maxRowsPerFile : undefined;
116
+
117
+ /** @type {string[]} */
118
+ const paths = [];
119
+ let totalRows = 0;
120
+
121
+ if (format === 'json') {
122
+ let partIndex = 0;
123
+ /** @type {import('node:fs').WriteStream|null} */
124
+ let writeStream = null;
125
+ let rowsInPart = 0;
126
+ let firstInArray = true;
127
+
128
+ const closeJsonFile = async () => {
129
+ if (writeStream) {
130
+ writeStream.write('\n]\n');
131
+ writeStream.end();
132
+ await finished(writeStream);
133
+ writeStream = null;
134
+ }
135
+ firstInArray = true;
136
+ rowsInPart = 0;
137
+ };
138
+
139
+ const openJsonPart = async () => {
140
+ await closeJsonFile();
141
+ partIndex++;
142
+ const basename = cap
143
+ ? buildExportBasename(deKey, ts, format, useGit, partIndex)
144
+ : buildExportBasename(deKey, ts, format, useGit);
145
+ const outPath = path.join(dir, basename);
146
+ paths.push(outPath);
147
+ writeStream = createWriteStream(outPath, { encoding: 'utf8' });
148
+ writeStream.write('[\n');
149
+ firstInArray = true;
150
+ };
151
+
152
+ try {
153
+ for await (const step of sdk.rest.getBulkPages(basePath, 2500)) {
154
+ if (step.totalPages === undefined) {
155
+ log.info(`Downloading next batch (currently ${totalRows} records)`);
156
+ } else {
157
+ log.info(
158
+ `Downloading batch ${step.page} of ${step.totalPages} (${totalRows} records so far)`,
159
+ );
160
+ }
161
+ for (const item of step.pageItems) {
162
+ const row = { ...item.keys, ...item.values };
163
+ if (writeStream === null) {
164
+ await openJsonPart();
165
+ }
166
+ if (cap && rowsInPart >= cap) {
167
+ await openJsonPart();
168
+ }
169
+ if (firstInArray) {
170
+ firstInArray = false;
171
+ } else {
172
+ writeStream.write(',\n');
173
+ }
174
+ const chunk = jsonPretty ? JSON.stringify(row, null, 2) : JSON.stringify(row);
175
+ writeStream.write(chunk);
176
+ rowsInPart++;
177
+ totalRows++;
178
+ }
179
+ }
180
+ } catch (ex) {
181
+ if (ex.message !== 'Could not find an array to iterate over') {
182
+ throw ex;
183
+ }
184
+ }
185
+
186
+ if (writeStream) {
187
+ await closeJsonFile();
188
+ }
189
+
190
+ if (paths.length === 0) {
191
+ let columns = [];
192
+ try {
193
+ columns = await fetchDataExtensionFieldNames(sdk.soap, deKey);
194
+ } catch (ex) {
195
+ log.warn(
196
+ `Warning: could not retrieve field names for empty DE "${deKey}": ${ex.message}`,
197
+ );
198
+ }
199
+ const basename = buildExportBasename(deKey, ts, format, useGit);
200
+ const outPath = path.join(dir, basename);
201
+ const body = serializeRows([], format, jsonPretty, columns);
202
+ await fs.writeFile(outPath, body, 'utf8');
203
+ paths.push(outPath);
204
+ }
205
+
206
+ return { paths, rowCount: totalRows };
207
+ }
208
+
209
+ /** @type {string[]|null} */
210
+ let columnNames = null;
211
+ let partIndex = 0;
212
+ /** @type {import('stream').Transform|null} */
213
+ let stringifier = null;
214
+ /** @type {import('node:fs').WriteStream|null} */
215
+ let writeStream = null;
216
+ let rowsInPart = 0;
217
+ let isFirstCsvFile = true;
218
+
219
+ const closeCsvPart = async () => {
220
+ if (stringifier && writeStream) {
221
+ stringifier.end();
222
+ await finished(writeStream);
223
+ }
224
+ stringifier = null;
225
+ writeStream = null;
226
+ rowsInPart = 0;
227
+ };
228
+
229
+ const openCsvPart = async () => {
230
+ await closeCsvPart();
231
+ partIndex++;
232
+ const basename = cap
233
+ ? buildExportBasename(deKey, ts, format, useGit, partIndex)
234
+ : buildExportBasename(deKey, ts, format, useGit);
235
+ const outPath = path.join(dir, basename);
236
+ paths.push(outPath);
237
+ writeStream = createWriteStream(outPath, { encoding: 'utf8' });
238
+ const includeHeader = isFirstCsvFile;
239
+ isFirstCsvFile = false;
240
+ stringifier = stringify({
241
+ header: includeHeader,
242
+ bom: includeHeader,
243
+ quoted: format === 'csv',
244
+ delimiter: format === 'tsv' ? '\t' : ',',
245
+ ...(columnNames && columnNames.length > 0 ? { columns: columnNames } : {}),
246
+ });
247
+ stringifier.pipe(writeStream);
248
+ };
249
+
250
+ try {
251
+ for await (const step of sdk.rest.getBulkPages(basePath, 2500)) {
252
+ if (step.totalPages === undefined) {
253
+ log.info(`Downloading next batch (currently ${totalRows} records)`);
254
+ } else {
255
+ log.info(
256
+ `Downloading batch ${step.page} of ${step.totalPages} (${totalRows} records so far)`,
257
+ );
258
+ }
259
+ for (const item of step.pageItems) {
260
+ const row = { ...item.keys, ...item.values };
261
+ if (columnNames === null && Object.keys(row).length > 0) {
262
+ columnNames = Object.keys(row);
263
+ }
264
+ if (writeStream === null) {
265
+ await openCsvPart();
266
+ }
267
+ if (cap && rowsInPart >= cap) {
268
+ await openCsvPart();
269
+ }
270
+ stringifier.write(row);
271
+ rowsInPart++;
272
+ totalRows++;
273
+ }
274
+ }
275
+ } catch (ex) {
276
+ if (ex.message !== 'Could not find an array to iterate over') {
277
+ throw ex;
278
+ }
279
+ }
280
+
281
+ if (writeStream) {
282
+ await closeCsvPart();
283
+ }
284
+
285
+ if (paths.length === 0) {
286
+ let columns = [];
107
287
  try {
108
288
  columns = await fetchDataExtensionFieldNames(sdk.soap, deKey);
109
289
  } catch (ex) {
110
- console.error(
290
+ log.warn(
111
291
  `Warning: could not retrieve field names for empty DE "${deKey}": ${ex.message}`,
112
292
  );
113
293
  }
294
+ columnNames = columns.length > 0 ? columns : null;
295
+ await openCsvPart();
296
+ stringifier.end();
297
+ await finished(writeStream);
298
+ stringifier = null;
299
+ writeStream = null;
114
300
  }
115
- const dir = dataDirectoryForBu(projectRoot, credentialName, buName);
116
- await fs.mkdir(dir, { recursive: true });
117
- const ts = filesystemSafeTimestamp();
118
- const basename = buildExportBasename(deKey, ts, format, useGit);
119
- const outPath = path.join(dir, basename);
120
- const body = serializeRows(rows, format, jsonPretty, columns);
121
- await fs.writeFile(outPath, body, 'utf8');
122
- return { path: outPath, rowCount: rows.length };
301
+
302
+ return { paths, rowCount: totalRows };
123
303
  }
@@ -2,6 +2,77 @@ import fs from 'node:fs/promises';
2
2
  import path from 'node:path';
3
3
  import { parseExportBasename } from './filename.mjs';
4
4
 
5
+ /**
6
+ * @typedef {{ path: string, partNumber: number|null, mtime: number }} CandidateEntry
7
+ */
8
+
9
+ /**
10
+ * Group multi-part exports (`part1`, `part2`, … sharing the same key and timestamp) and pick
11
+ * the newest export run. Returns ordered paths (part 1, 2, …) or a single file path.
12
+ *
13
+ * @param {string[]} candidatePaths - from {@link findImportCandidates}
14
+ * @returns {Promise.<{ paths: string[], isMultiPart: boolean }>}
15
+ */
16
+ export async function resolveImportSet(candidatePaths) {
17
+ /** @type {Map<string, CandidateEntry[]>} */
18
+ const groups = new Map();
19
+ for (const filePath of candidatePaths) {
20
+ const name = path.basename(filePath);
21
+ let parsed;
22
+ try {
23
+ parsed = parseExportBasename(name);
24
+ } catch {
25
+ continue;
26
+ }
27
+ const { customerKey, timestampPart, ext, partNumber } = parsed;
28
+ const groupKey = `${customerKey}\0${timestampPart}\0${ext}`;
29
+ let st;
30
+ try {
31
+ st = await fs.stat(filePath);
32
+ } catch {
33
+ continue;
34
+ }
35
+ const entry = {
36
+ path: filePath,
37
+ partNumber: partNumber === undefined ? null : partNumber,
38
+ mtime: st.mtimeMs,
39
+ };
40
+ const list = groups.get(groupKey);
41
+ if (list) {
42
+ list.push(entry);
43
+ } else {
44
+ groups.set(groupKey, [entry]);
45
+ }
46
+ }
47
+ if (groups.size === 0) {
48
+ return { paths: [], isMultiPart: false };
49
+ }
50
+ let bestEntries = /** @type {CandidateEntry[]|null} */ (null);
51
+ let bestMaxMtime = -1;
52
+ for (const entries of groups.values()) {
53
+ const maxM = Math.max(...entries.map((e) => e.mtime));
54
+ if (maxM > bestMaxMtime) {
55
+ bestMaxMtime = maxM;
56
+ bestEntries = entries;
57
+ }
58
+ }
59
+ if (!bestEntries || bestEntries.length === 0) {
60
+ return { paths: [], isMultiPart: false };
61
+ }
62
+ const hasParts = bestEntries.some((e) => e.partNumber !== null);
63
+ if (hasParts) {
64
+ const sorted = [...bestEntries].toSorted(
65
+ (a, b) => (a.partNumber ?? 0) - (b.partNumber ?? 0),
66
+ );
67
+ return {
68
+ paths: sorted.map((e) => e.path),
69
+ isMultiPart: sorted.length > 1,
70
+ };
71
+ }
72
+ const one = bestEntries.reduce((a, b) => (a.mtime >= b.mtime ? a : b));
73
+ return { paths: [one.path], isMultiPart: false };
74
+ }
75
+
5
76
  /** Supported import/export file extensions */
6
77
  const SUPPORTED_EXTENSIONS = ['csv', 'tsv', 'json'];
7
78