sfmc-dataloader 2.6.1 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/async-status.mjs +11 -10
- package/lib/batch.mjs +2 -2
- package/lib/cli.mjs +178 -86
- package/lib/config.mjs +15 -9
- package/lib/cross-bu-import.mjs +133 -61
- package/lib/export-de.mjs +196 -16
- package/lib/file-resolve.mjs +71 -0
- package/lib/filename.mjs +25 -5
- package/lib/import-de.mjs +160 -37
- package/lib/init-project.mjs +2 -1
- package/lib/log.mjs +56 -0
- package/lib/multi-bu-export.mjs +8 -3
- package/lib/read-rows.mjs +156 -27
- package/lib/row-count.mjs +2 -1
- package/package.json +2 -2
package/lib/filename.mjs
CHANGED
|
@@ -42,14 +42,20 @@ export function reverseFilterIllegalFilenames(filename) {
|
|
|
42
42
|
* @param {string} safeTs - filesystem-safe UTC timestamp (ignored when useGit is true)
|
|
43
43
|
* @param {'csv'|'tsv'|'json'} ext
|
|
44
44
|
* @param {boolean} [useGit] - stable `key.mcdata.ext` without timestamp
|
|
45
|
+
* @param {number} [partNumber] - when set, inserts `partN` before the timestamp (or before the extension in --git mode)
|
|
45
46
|
* @returns {string} basename without directory
|
|
46
47
|
*/
|
|
47
|
-
export function buildExportBasename(customerKey, safeTs, ext, useGit = false) {
|
|
48
|
+
export function buildExportBasename(customerKey, safeTs, ext, useGit = false, partNumber) {
|
|
48
49
|
const enc = filterIllegalFilenames(customerKey);
|
|
50
|
+
const partInfix =
|
|
51
|
+
partNumber !== undefined && partNumber !== null ? `part${Number(partNumber)}.` : '';
|
|
49
52
|
if (useGit) {
|
|
53
|
+
if (partNumber !== undefined && partNumber !== null) {
|
|
54
|
+
return `${enc}.mcdata.part${Number(partNumber)}.${ext}`;
|
|
55
|
+
}
|
|
50
56
|
return `${enc}.mcdata.${ext}`;
|
|
51
57
|
}
|
|
52
|
-
return `${enc}${MCDATA_SEGMENT}${safeTs}.${ext}`;
|
|
58
|
+
return `${enc}${MCDATA_SEGMENT}${partInfix}${safeTs}.${ext}`;
|
|
53
59
|
}
|
|
54
60
|
|
|
55
61
|
/**
|
|
@@ -62,7 +68,7 @@ export function filesystemSafeTimestamp(d = new Date()) {
|
|
|
62
68
|
|
|
63
69
|
/**
|
|
64
70
|
* @param {string} basename - e.g. `encodedKey.mcdata.2026-04-06T15-00-00.000Z.csv` or `encodedKey.mcdata.csv`
|
|
65
|
-
* @returns {{ customerKey: string, timestampPart: string, ext: string }}
|
|
71
|
+
* @returns {{ customerKey: string, timestampPart: string, ext: string, partNumber?: number }}
|
|
66
72
|
*/
|
|
67
73
|
export function parseExportBasename(basename) {
|
|
68
74
|
const lastDot = basename.lastIndexOf('.');
|
|
@@ -72,11 +78,25 @@ export function parseExportBasename(basename) {
|
|
|
72
78
|
const idx = stem.indexOf(MCDATA_SEGMENT);
|
|
73
79
|
if (idx !== -1) {
|
|
74
80
|
const encodedKey = stem.slice(0, idx);
|
|
75
|
-
|
|
81
|
+
let rest = stem.slice(idx + MCDATA_SEGMENT.length);
|
|
82
|
+
/** @type {number|undefined} */
|
|
83
|
+
let partNumber;
|
|
84
|
+
const partTs = rest.match(/^part(\d+)\.(.+)$/);
|
|
85
|
+
if (partTs) {
|
|
86
|
+
partNumber = Number(partTs[1]);
|
|
87
|
+
rest = partTs[2];
|
|
88
|
+
} else {
|
|
89
|
+
const partOnly = rest.match(/^part(\d+)$/);
|
|
90
|
+
if (partOnly) {
|
|
91
|
+
partNumber = Number(partOnly[1]);
|
|
92
|
+
rest = '';
|
|
93
|
+
}
|
|
94
|
+
}
|
|
76
95
|
return {
|
|
77
96
|
customerKey: reverseFilterIllegalFilenames(encodedKey),
|
|
78
|
-
timestampPart,
|
|
97
|
+
timestampPart: rest,
|
|
79
98
|
ext,
|
|
99
|
+
...(typeof partNumber === 'number' ? { partNumber } : {}),
|
|
80
100
|
};
|
|
81
101
|
}
|
|
82
102
|
|
package/lib/import-de.mjs
CHANGED
|
@@ -1,9 +1,101 @@
|
|
|
1
1
|
import { RestError } from 'sfmc-sdk/util';
|
|
2
|
-
import { chunkItemsForPayload } from './batch.mjs';
|
|
2
|
+
import { chunkItemsForPayload, MAX_OBJECTS_PER_BATCH } from './batch.mjs';
|
|
3
3
|
import { formatFromExtension } from './file-resolve.mjs';
|
|
4
4
|
import { resolveImportRoute } from './import-routes.mjs';
|
|
5
5
|
import { withRetry429 } from './retry.mjs';
|
|
6
|
-
import {
|
|
6
|
+
import { countDataRowsFromImportPaths, streamRowsFromFile } from './read-rows.mjs';
|
|
7
|
+
import { log } from './log.mjs';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* @param {number} rowCount
|
|
11
|
+
* @param {string} [emptySourceLabel]
|
|
12
|
+
*/
|
|
13
|
+
export function assertNonEmptyImportRowCount(rowCount, emptySourceLabel) {
|
|
14
|
+
if (rowCount > 0) {
|
|
15
|
+
return;
|
|
16
|
+
}
|
|
17
|
+
const prefix = emptySourceLabel
|
|
18
|
+
? `Import file contains no data rows: "${emptySourceLabel}". `
|
|
19
|
+
: 'Import file contains no data rows. ';
|
|
20
|
+
throw new Error(
|
|
21
|
+
prefix +
|
|
22
|
+
'The file may be empty, contain only a BOM, or contain only a header row. ' +
|
|
23
|
+
'Export the DE first to obtain a template with column names, then add rows.',
|
|
24
|
+
);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Logs a warning when the post-import row count cannot be verified or looks unexpectedly low.
|
|
29
|
+
* Skips the "unexpected" check when `countBefore` is unavailable (cannot compute expected).
|
|
30
|
+
*
|
|
31
|
+
* @param {object} opts
|
|
32
|
+
* @param {number|null} opts.countBefore - row count before any clear/import
|
|
33
|
+
* @param {boolean} opts.cleared - whether `clearDataExtensionRows` ran (DE emptied before upload)
|
|
34
|
+
* @param {number|null} opts.countAfter - row count after the import
|
|
35
|
+
* @param {number} opts.imported - number of rows sent to the API
|
|
36
|
+
* @param {'upsert'|'insert'} opts.mode
|
|
37
|
+
* @param {string} opts.label - text after "for" in messages, e.g. `DE "MyKey"` or `cred/bu DE "MyKey"`
|
|
38
|
+
*/
|
|
39
|
+
export function warnIfImportCountUnexpected({
|
|
40
|
+
countBefore,
|
|
41
|
+
cleared,
|
|
42
|
+
countAfter,
|
|
43
|
+
imported,
|
|
44
|
+
mode,
|
|
45
|
+
label,
|
|
46
|
+
}) {
|
|
47
|
+
if (countAfter === null) {
|
|
48
|
+
log.warn(`Could not verify import result for ${label}.`);
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
if (countBefore === null) {
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
const effectiveCountBefore = cleared ? 0 : countBefore;
|
|
55
|
+
const expected =
|
|
56
|
+
mode === 'insert' || effectiveCountBefore === 0
|
|
57
|
+
? effectiveCountBefore + imported
|
|
58
|
+
: imported;
|
|
59
|
+
if (countAfter < expected) {
|
|
60
|
+
log.warn(
|
|
61
|
+
`Import result for ${label} looks unexpected: expected at least ${expected} rows, got ${countAfter}.`,
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* @param {{ rest: { put: Function, post: Function } }} sdk
|
|
68
|
+
* @param {string} deKey
|
|
69
|
+
* @param {'upsert'|'insert'} mode
|
|
70
|
+
* @param {object[]} chunk
|
|
71
|
+
* @returns {Promise.<string|null>}
|
|
72
|
+
*/
|
|
73
|
+
async function postImportPayloadChunk(sdk, deKey, mode, chunk) {
|
|
74
|
+
const route = resolveImportRoute(mode);
|
|
75
|
+
const p = route.path(deKey);
|
|
76
|
+
const body = { items: chunk };
|
|
77
|
+
try {
|
|
78
|
+
const resp = await withRetry429(() =>
|
|
79
|
+
route.method === 'PUT' ? sdk.rest.put(p, body) : sdk.rest.post(p, body),
|
|
80
|
+
);
|
|
81
|
+
return resp?.requestId ?? null;
|
|
82
|
+
} catch (ex) {
|
|
83
|
+
const msgs =
|
|
84
|
+
ex instanceof RestError &&
|
|
85
|
+
ex.response?.status === 400 &&
|
|
86
|
+
Array.isArray(ex.response?.data?.resultMessages) &&
|
|
87
|
+
ex.response.data.resultMessages.length > 0
|
|
88
|
+
? ex.response.data.resultMessages
|
|
89
|
+
: null;
|
|
90
|
+
if (msgs) {
|
|
91
|
+
const summary = msgs.map((m) => m.message ?? String(m)).join('; ');
|
|
92
|
+
throw new Error(`Import failed for DE "${deKey}" (HTTP 400): ${summary}`, {
|
|
93
|
+
cause: ex,
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
throw ex;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
7
99
|
|
|
8
100
|
/**
|
|
9
101
|
* @param {{ rest: { put: Function, post: Function } }} sdk
|
|
@@ -15,38 +107,71 @@ import { readRowsFromFile } from './read-rows.mjs';
|
|
|
15
107
|
*/
|
|
16
108
|
export async function importRowsForDe(sdk, params) {
|
|
17
109
|
const { deKey, rows, mode } = params;
|
|
18
|
-
const route = resolveImportRoute(mode);
|
|
19
110
|
const chunks = chunkItemsForPayload(rows);
|
|
20
111
|
const requestIds = [];
|
|
21
112
|
for (const chunk of chunks) {
|
|
22
|
-
|
|
23
|
-
const body = { items: chunk };
|
|
24
|
-
let resp;
|
|
25
|
-
try {
|
|
26
|
-
resp = await withRetry429(() =>
|
|
27
|
-
route.method === 'PUT' ? sdk.rest.put(p, body) : sdk.rest.post(p, body),
|
|
28
|
-
);
|
|
29
|
-
} catch (ex) {
|
|
30
|
-
const msgs =
|
|
31
|
-
ex instanceof RestError &&
|
|
32
|
-
ex.response?.status === 400 &&
|
|
33
|
-
Array.isArray(ex.response?.data?.resultMessages) &&
|
|
34
|
-
ex.response.data.resultMessages.length > 0
|
|
35
|
-
? ex.response.data.resultMessages
|
|
36
|
-
: null;
|
|
37
|
-
if (msgs) {
|
|
38
|
-
const summary = msgs.map((m) => m.message ?? String(m)).join('; ');
|
|
39
|
-
throw new Error(`Import failed for DE "${deKey}" (HTTP 400): ${summary}`, {
|
|
40
|
-
cause: ex,
|
|
41
|
-
});
|
|
42
|
-
}
|
|
43
|
-
throw ex;
|
|
44
|
-
}
|
|
45
|
-
requestIds.push(resp?.requestId ?? null);
|
|
113
|
+
requestIds.push(await postImportPayloadChunk(sdk, deKey, mode, chunk));
|
|
46
114
|
}
|
|
47
115
|
return { count: rows.length, requestIds };
|
|
48
116
|
}
|
|
49
117
|
|
|
118
|
+
/**
|
|
119
|
+
* Streams row objects from disk in memory windows, then chunks for HTTP payload limits.
|
|
120
|
+
*
|
|
121
|
+
* @param {{ rest: { put: Function, post: Function } }} sdk
|
|
122
|
+
* @param {object} params
|
|
123
|
+
* @param {string} params.deKey
|
|
124
|
+
* @param {AsyncIterable<object>} params.rowSource
|
|
125
|
+
* @param {'upsert'|'insert'} params.mode
|
|
126
|
+
* @param {number} [params.maxRowsPerBatch]
|
|
127
|
+
* @param {number} params.totalMemoryBatches - upload windows (ceil(rowCount / maxRowsPerBatch))
|
|
128
|
+
* @param {string} [params.emptySourceLabel] - included in error when zero rows (e.g. file path)
|
|
129
|
+
* @returns {Promise.<{ count: number, requestIds: (string|null)[] }>}
|
|
130
|
+
*/
|
|
131
|
+
export async function importRowsStreamingForDe(sdk, params) {
|
|
132
|
+
const { deKey, rowSource, mode, totalMemoryBatches } = params;
|
|
133
|
+
const maxRowsPerBatch = params.maxRowsPerBatch ?? MAX_OBJECTS_PER_BATCH;
|
|
134
|
+
const emptySourceLabel = params.emptySourceLabel;
|
|
135
|
+
|
|
136
|
+
const requestIds = [];
|
|
137
|
+
let totalCount = 0;
|
|
138
|
+
let memoryBatchIndex = 0;
|
|
139
|
+
/** @type {object[]} */
|
|
140
|
+
let buffer = [];
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* @param {object[]} rows
|
|
144
|
+
* @returns {Promise.<void>}
|
|
145
|
+
*/
|
|
146
|
+
async function flushMemoryBatch(rows) {
|
|
147
|
+
if (rows.length === 0) {
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
memoryBatchIndex += 1;
|
|
151
|
+
log.info(`Uploading batch ${memoryBatchIndex} of ${totalMemoryBatches}`);
|
|
152
|
+
const httpChunks = chunkItemsForPayload(rows);
|
|
153
|
+
for (const chunk of httpChunks) {
|
|
154
|
+
requestIds.push(await postImportPayloadChunk(sdk, deKey, mode, chunk));
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
for await (const row of rowSource) {
|
|
159
|
+
buffer.push(row);
|
|
160
|
+
totalCount += 1;
|
|
161
|
+
if (buffer.length >= maxRowsPerBatch) {
|
|
162
|
+
await flushMemoryBatch(buffer);
|
|
163
|
+
buffer = [];
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
await flushMemoryBatch(buffer);
|
|
167
|
+
|
|
168
|
+
if (totalCount === 0) {
|
|
169
|
+
assertNonEmptyImportRowCount(0, emptySourceLabel);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return { count: totalCount, requestIds };
|
|
173
|
+
}
|
|
174
|
+
|
|
50
175
|
/**
|
|
51
176
|
* @param {{ rest: { put: Function, post: Function } }} sdk
|
|
52
177
|
* @param {object} params
|
|
@@ -63,17 +188,15 @@ export async function importFromFile(sdk, params) {
|
|
|
63
188
|
`Cannot determine format for file: ${params.filePath}. Use .csv, .tsv, or .json extension.`,
|
|
64
189
|
);
|
|
65
190
|
}
|
|
66
|
-
const
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
`Export the DE first to obtain a template with column names, then add rows.`,
|
|
72
|
-
);
|
|
73
|
-
}
|
|
74
|
-
return importRowsForDe(sdk, {
|
|
191
|
+
const rowCount = await countDataRowsFromImportPaths([params.filePath], format);
|
|
192
|
+
assertNonEmptyImportRowCount(rowCount, params.filePath);
|
|
193
|
+
const totalMemoryBatches = Math.max(1, Math.ceil(rowCount / MAX_OBJECTS_PER_BATCH));
|
|
194
|
+
const rowSource = streamRowsFromFile(params.filePath, format);
|
|
195
|
+
return importRowsStreamingForDe(sdk, {
|
|
75
196
|
deKey: params.deKey,
|
|
76
|
-
|
|
197
|
+
rowSource,
|
|
77
198
|
mode: params.mode,
|
|
199
|
+
totalMemoryBatches,
|
|
200
|
+
emptySourceLabel: params.filePath,
|
|
78
201
|
});
|
|
79
202
|
}
|
package/lib/init-project.mjs
CHANGED
|
@@ -4,6 +4,7 @@ import readline from 'node:readline/promises';
|
|
|
4
4
|
import { stdin as input, stdout as output } from 'node:process';
|
|
5
5
|
import { FILE_MCDEV_RC, FILE_MCDEV_AUTH, FILE_MCDATA_RC, FILE_MCDATA_AUTH } from './config.mjs';
|
|
6
6
|
import { fetchBusinessUnits } from './business-units.mjs';
|
|
7
|
+
import { log } from './log.mjs';
|
|
7
8
|
|
|
8
9
|
/**
|
|
9
10
|
* @typedef {object} InitOptions
|
|
@@ -68,7 +69,7 @@ function ensureGitignore(projectRoot) {
|
|
|
68
69
|
export async function runMcdataInit(opts) {
|
|
69
70
|
const { projectRoot, isTTY, yes = false, _buFetcher } = opts;
|
|
70
71
|
const out = opts.stdout ?? ((msg) => console.log(msg));
|
|
71
|
-
const err = opts.stderr ?? ((msg) =>
|
|
72
|
+
const err = opts.stderr ?? ((msg) => log.error(msg));
|
|
72
73
|
|
|
73
74
|
// Guard: do not init over an existing mcdev project (both files must be present)
|
|
74
75
|
if (
|
package/lib/log.mjs
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Timestamped logging for mcdata (local timezone via `Date`).
|
|
3
|
+
* Operational messages use info / warn / error levels.
|
|
4
|
+
* info goes to stdout; warn and error go to stderr.
|
|
5
|
+
* When a debug logger is registered via setDebugLogger(), every log.* call
|
|
6
|
+
* also appends the formatted line to the debug log file.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
/** @type {import('./debug-logger.mjs').DebugLogger|null} */
|
|
10
|
+
let _debugLogger = null;
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* @returns {string}
|
|
14
|
+
*/
|
|
15
|
+
export function formatTime() {
|
|
16
|
+
return new Date().toLocaleTimeString([], {
|
|
17
|
+
hour: '2-digit',
|
|
18
|
+
minute: '2-digit',
|
|
19
|
+
second: '2-digit',
|
|
20
|
+
hour12: false,
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Register the debug file logger. Once set, every log.* call also appends to the log file.
|
|
26
|
+
*
|
|
27
|
+
* @param {import('./debug-logger.mjs').DebugLogger} logger
|
|
28
|
+
*/
|
|
29
|
+
export function setDebugLogger(logger) {
|
|
30
|
+
_debugLogger = logger;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* @param {'info'|'warn'|'error'} level
|
|
35
|
+
* @param {string} message
|
|
36
|
+
*/
|
|
37
|
+
function write(level, message) {
|
|
38
|
+
const line = `${formatTime()} ${level}: ${message}`;
|
|
39
|
+
_debugLogger?.write(line);
|
|
40
|
+
if (level === 'error') {
|
|
41
|
+
console.error(line);
|
|
42
|
+
} else if (level === 'warn') {
|
|
43
|
+
console.warn(line);
|
|
44
|
+
} else {
|
|
45
|
+
console.log(line);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export const log = {
|
|
50
|
+
/** @param {string} message */
|
|
51
|
+
info: (message) => write('info', message),
|
|
52
|
+
/** @param {string} message */
|
|
53
|
+
warn: (message) => write('warn', message),
|
|
54
|
+
/** @param {string} message */
|
|
55
|
+
error: (message) => write('error', message),
|
|
56
|
+
};
|
package/lib/multi-bu-export.mjs
CHANGED
|
@@ -2,6 +2,7 @@ import path from 'node:path';
|
|
|
2
2
|
import SDK from 'sfmc-sdk';
|
|
3
3
|
import { resolveCredentialAndMid, buildSdkAuthObject, buildSdkOptions } from './config.mjs';
|
|
4
4
|
import { exportDataExtensionToFile } from './export-de.mjs';
|
|
5
|
+
import { log } from './log.mjs';
|
|
5
6
|
|
|
6
7
|
/**
|
|
7
8
|
* @typedef {{ credential: string, bu: string }} CredBuSource
|
|
@@ -21,6 +22,7 @@ import { exportDataExtensionToFile } from './export-de.mjs';
|
|
|
21
22
|
* @param {'csv'|'tsv'|'json'} params.format
|
|
22
23
|
* @param {boolean} [params.jsonPretty]
|
|
23
24
|
* @param {boolean} [params.useGit]
|
|
25
|
+
* @param {number} [params.maxRowsPerFile]
|
|
24
26
|
* @param {import('./config.mjs').DebugLogger|null} [params.logger]
|
|
25
27
|
* @returns {Promise.<string[]>} Paths of all written files
|
|
26
28
|
*/
|
|
@@ -33,6 +35,7 @@ export async function multiBuExport({
|
|
|
33
35
|
format,
|
|
34
36
|
jsonPretty = false,
|
|
35
37
|
useGit = false,
|
|
38
|
+
maxRowsPerFile,
|
|
36
39
|
logger = null,
|
|
37
40
|
}) {
|
|
38
41
|
/** @type {string[]} */
|
|
@@ -41,7 +44,7 @@ export async function multiBuExport({
|
|
|
41
44
|
const { mid, authCred } = resolveCredentialAndMid(mcdevrc, mcdevAuth, credential, bu);
|
|
42
45
|
const sdk = new SDK(buildSdkAuthObject(authCred, mid), buildSdkOptions(logger));
|
|
43
46
|
for (const deKey of deKeys) {
|
|
44
|
-
const {
|
|
47
|
+
const { paths: outPaths, rowCount } = await exportDataExtensionToFile(sdk, {
|
|
45
48
|
projectRoot,
|
|
46
49
|
credentialName: credential,
|
|
47
50
|
buName: bu,
|
|
@@ -49,9 +52,11 @@ export async function multiBuExport({
|
|
|
49
52
|
format,
|
|
50
53
|
jsonPretty,
|
|
51
54
|
useGit,
|
|
55
|
+
maxRowsPerFile,
|
|
52
56
|
});
|
|
53
|
-
|
|
54
|
-
|
|
57
|
+
const label = outPaths.map((p) => `"${path.resolve(p)}"`).join(', ');
|
|
58
|
+
log.info(`Exported: ${label} (${rowCount} rows)`);
|
|
59
|
+
exported.push(...outPaths);
|
|
55
60
|
}
|
|
56
61
|
}
|
|
57
62
|
return exported;
|
package/lib/read-rows.mjs
CHANGED
|
@@ -1,12 +1,49 @@
|
|
|
1
1
|
import { createReadStream, promises as fsPromises } from 'node:fs';
|
|
2
2
|
import csv from 'csv-parser';
|
|
3
3
|
|
|
4
|
+
/**
|
|
5
|
+
* @param {'csv'|'tsv'} format
|
|
6
|
+
* @param {string[]} [columnHeaders]
|
|
7
|
+
* @returns {object}
|
|
8
|
+
*/
|
|
9
|
+
function buildCsvParserOptions(format, columnHeaders) {
|
|
10
|
+
const delimiter = format === 'tsv' ? '\t' : ',';
|
|
11
|
+
/** @type {object} */
|
|
12
|
+
const parserOptions = {
|
|
13
|
+
separator: delimiter,
|
|
14
|
+
bom: true,
|
|
15
|
+
mapHeaders: ({ header }) => {
|
|
16
|
+
let h = header;
|
|
17
|
+
if (h.startsWith('\uFEFF')) {
|
|
18
|
+
h = h.slice(1);
|
|
19
|
+
}
|
|
20
|
+
if (h.startsWith('"') && h.endsWith('"') && h.length >= 2) {
|
|
21
|
+
h = h.slice(1, -1);
|
|
22
|
+
}
|
|
23
|
+
return h;
|
|
24
|
+
},
|
|
25
|
+
mapValues: ({ value }) => {
|
|
26
|
+
if (value.startsWith('"') && value.endsWith('"') && value.length >= 2) {
|
|
27
|
+
return value.slice(1, -1);
|
|
28
|
+
}
|
|
29
|
+
return value;
|
|
30
|
+
},
|
|
31
|
+
};
|
|
32
|
+
if (columnHeaders && columnHeaders.length > 0) {
|
|
33
|
+
parserOptions.headers = columnHeaders;
|
|
34
|
+
}
|
|
35
|
+
return parserOptions;
|
|
36
|
+
}
|
|
37
|
+
|
|
4
38
|
/**
|
|
5
39
|
* @param {string} filePath
|
|
6
40
|
* @param {'csv'|'tsv'|'json'} format
|
|
41
|
+
* @param {object} [options]
|
|
42
|
+
* @param {string[]} [options.columnHeaders] - when set (CSV/TSV), the file has no header row; map columns by position
|
|
7
43
|
* @returns {Promise.<object[]>}
|
|
8
44
|
*/
|
|
9
|
-
export async function readRowsFromFile(filePath, format) {
|
|
45
|
+
export async function readRowsFromFile(filePath, format, options = {}) {
|
|
46
|
+
const { columnHeaders } = options;
|
|
10
47
|
if (format === 'json') {
|
|
11
48
|
const raw = await fsPromises.readFile(filePath, 'utf8');
|
|
12
49
|
const parsed = JSON.parse(raw);
|
|
@@ -18,37 +55,129 @@ export async function readRowsFromFile(filePath, format) {
|
|
|
18
55
|
}
|
|
19
56
|
throw new Error('JSON import must be an array of row objects or { "items": [...] }');
|
|
20
57
|
}
|
|
21
|
-
const
|
|
58
|
+
const parserOptions = buildCsvParserOptions(format, columnHeaders);
|
|
22
59
|
return new Promise((resolve, reject) => {
|
|
23
60
|
const rows = [];
|
|
24
61
|
createReadStream(filePath)
|
|
25
|
-
.pipe(
|
|
26
|
-
csv({
|
|
27
|
-
separator: delimiter,
|
|
28
|
-
bom: true,
|
|
29
|
-
mapHeaders: ({ header }) => {
|
|
30
|
-
let h = header;
|
|
31
|
-
// Strip BOM if present (backup in case bom:true misses it)
|
|
32
|
-
if (h.startsWith('\uFEFF')) {
|
|
33
|
-
h = h.slice(1);
|
|
34
|
-
}
|
|
35
|
-
// Strip surrounding quotes if present (non-standard quoted TSV)
|
|
36
|
-
if (h.startsWith('"') && h.endsWith('"') && h.length >= 2) {
|
|
37
|
-
h = h.slice(1, -1);
|
|
38
|
-
}
|
|
39
|
-
return h;
|
|
40
|
-
},
|
|
41
|
-
mapValues: ({ value }) => {
|
|
42
|
-
// Strip surrounding quotes from values if present
|
|
43
|
-
if (value.startsWith('"') && value.endsWith('"') && value.length >= 2) {
|
|
44
|
-
return value.slice(1, -1);
|
|
45
|
-
}
|
|
46
|
-
return value;
|
|
47
|
-
},
|
|
48
|
-
}),
|
|
49
|
-
)
|
|
62
|
+
.pipe(csv(parserOptions))
|
|
50
63
|
.on('data', (row) => rows.push(row))
|
|
51
64
|
.on('end', () => resolve(rows))
|
|
52
65
|
.on('error', reject);
|
|
53
66
|
});
|
|
54
67
|
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Streams CSV/TSV rows from disk (one row at a time). JSON reads the full file then yields each row.
|
|
71
|
+
*
|
|
72
|
+
* @param {string} filePath
|
|
73
|
+
* @param {'csv'|'tsv'|'json'} format
|
|
74
|
+
* @param {object} [options]
|
|
75
|
+
* @param {string[]} [options.columnHeaders]
|
|
76
|
+
* @yields {object} one row object per iteration
|
|
77
|
+
* @returns {AsyncGenerator<object, void, void>}
|
|
78
|
+
*/
|
|
79
|
+
export async function* streamRowsFromFile(filePath, format, options = {}) {
|
|
80
|
+
const { columnHeaders } = options;
|
|
81
|
+
if (format === 'json') {
|
|
82
|
+
const rows = await readRowsFromFile(filePath, 'json');
|
|
83
|
+
for (const row of rows) {
|
|
84
|
+
yield row;
|
|
85
|
+
}
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
const parserOptions = buildCsvParserOptions(format, columnHeaders);
|
|
89
|
+
const stream = createReadStream(filePath).pipe(csv(parserOptions));
|
|
90
|
+
for await (const row of stream) {
|
|
91
|
+
yield row;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Reads one or more export parts in order. CSV/TSV multi-part exports from mcdata include a header
|
|
97
|
+
* row only in the first file; continuation files are data-only and require inferred column names.
|
|
98
|
+
*
|
|
99
|
+
* @param {string[]} filePaths
|
|
100
|
+
* @param {'csv'|'tsv'|'json'} format
|
|
101
|
+
* @returns {Promise.<object[]>}
|
|
102
|
+
*/
|
|
103
|
+
export async function readRowsFromImportPaths(filePaths, format) {
|
|
104
|
+
if (filePaths.length === 0) {
|
|
105
|
+
throw new Error('readRowsFromImportPaths requires at least one file path');
|
|
106
|
+
}
|
|
107
|
+
if (format === 'json') {
|
|
108
|
+
const all = [];
|
|
109
|
+
for (const fp of filePaths) {
|
|
110
|
+
all.push(...(await readRowsFromFile(fp, 'json')));
|
|
111
|
+
}
|
|
112
|
+
return all;
|
|
113
|
+
}
|
|
114
|
+
const firstRows = await readRowsFromFile(filePaths[0], format);
|
|
115
|
+
if (filePaths.length === 1) {
|
|
116
|
+
return firstRows;
|
|
117
|
+
}
|
|
118
|
+
const headers = firstRows.length > 0 ? Object.keys(firstRows[0]) : [];
|
|
119
|
+
if (headers.length === 0) {
|
|
120
|
+
throw new Error(
|
|
121
|
+
`Cannot import multi-part ${format}: the first file has no data rows to infer columns from.`,
|
|
122
|
+
);
|
|
123
|
+
}
|
|
124
|
+
const rest = [];
|
|
125
|
+
for (let i = 1; i < filePaths.length; i++) {
|
|
126
|
+
rest.push(
|
|
127
|
+
...(await readRowsFromFile(filePaths[i], format, {
|
|
128
|
+
columnHeaders: headers,
|
|
129
|
+
})),
|
|
130
|
+
);
|
|
131
|
+
}
|
|
132
|
+
return [...firstRows, ...rest];
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Counts data rows without retaining row objects (two passes vs import: count then stream).
|
|
137
|
+
*
|
|
138
|
+
* @param {string[]} filePaths
|
|
139
|
+
* @param {'csv'|'tsv'|'json'} format
|
|
140
|
+
* @returns {Promise.<number>}
|
|
141
|
+
*/
|
|
142
|
+
export async function countDataRowsFromImportPaths(filePaths, format) {
|
|
143
|
+
let n = 0;
|
|
144
|
+
for await (const _row of streamRowsFromImportPaths(filePaths, format)) {
|
|
145
|
+
n += 1;
|
|
146
|
+
}
|
|
147
|
+
return n;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
export async function* streamRowsFromImportPaths(filePaths, format) {
|
|
151
|
+
if (filePaths.length === 0) {
|
|
152
|
+
throw new Error('streamRowsFromImportPaths requires at least one file path');
|
|
153
|
+
}
|
|
154
|
+
if (format === 'json') {
|
|
155
|
+
for (const fp of filePaths) {
|
|
156
|
+
yield* streamRowsFromFile(fp, 'json');
|
|
157
|
+
}
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
/** @type {string[]|null} */
|
|
161
|
+
let headers = null;
|
|
162
|
+
for (let i = 0; i < filePaths.length; i++) {
|
|
163
|
+
if (i === 0) {
|
|
164
|
+
let sawRow = false;
|
|
165
|
+
for await (const row of streamRowsFromFile(filePaths[0], format)) {
|
|
166
|
+
if (!sawRow) {
|
|
167
|
+
headers = Object.keys(row);
|
|
168
|
+
sawRow = true;
|
|
169
|
+
}
|
|
170
|
+
yield row;
|
|
171
|
+
}
|
|
172
|
+
if (filePaths.length > 1 && (!headers || headers.length === 0)) {
|
|
173
|
+
throw new Error(
|
|
174
|
+
`Cannot import multi-part ${format}: the first file has no data rows to infer columns from.`,
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
} else {
|
|
178
|
+
yield* streamRowsFromFile(filePaths[i], format, {
|
|
179
|
+
columnHeaders: /** @type {string[]} */ (headers),
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
package/lib/row-count.mjs
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { rowsetGetPath } from './import-routes.mjs';
|
|
2
|
+
import { log } from './log.mjs';
|
|
2
3
|
|
|
3
4
|
/**
|
|
4
5
|
* Fetch the total row count for a Data Extension without downloading all rows.
|
|
@@ -14,7 +15,7 @@ export async function getDeRowCount(sdk, deKey) {
|
|
|
14
15
|
const result = await sdk.rest.get(`${rowsetGetPath(deKey)}?$page=1&$pagesize=1`);
|
|
15
16
|
return result?.count ?? 0;
|
|
16
17
|
} catch (ex) {
|
|
17
|
-
|
|
18
|
+
log.warn(`Could not retrieve row count for DE "${deKey}": ${ex.message}`);
|
|
18
19
|
return null;
|
|
19
20
|
}
|
|
20
21
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sfmc-dataloader",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.7.0",
|
|
4
4
|
"description": "SFMC Data Loader CLI (mcdata) — standalone export/import of Marketing Cloud Data Extension rows; optional mcdev integration",
|
|
5
5
|
"author": "Jörn Berkefeld <joern.berkefeld@gmail.com>",
|
|
6
6
|
"license": "MIT",
|
|
@@ -38,7 +38,7 @@
|
|
|
38
38
|
"dependencies": {
|
|
39
39
|
"csv-parser": "3.2.0",
|
|
40
40
|
"csv-stringify": "6.7.0",
|
|
41
|
-
"sfmc-sdk": "3.
|
|
41
|
+
"sfmc-sdk": "^3.2.1"
|
|
42
42
|
},
|
|
43
43
|
"scripts": {
|
|
44
44
|
"test": "node --test test/*.test.js",
|