@engine9/input-tools 2.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.prettierrc +7 -0
- package/ForEachEntry.js +194 -0
- package/LICENSE +674 -0
- package/README.md +6 -0
- package/ValidatingReadable.js +21 -0
- package/buildSamplePackets.js +13 -0
- package/eslint.config.mjs +17 -0
- package/file/FileUtilities.js +1076 -0
- package/file/GoogleDrive.js +39 -0
- package/file/Parquet.js +137 -0
- package/file/R2.js +32 -0
- package/file/S3.js +329 -0
- package/file/tools.js +359 -0
- package/index.js +426 -0
- package/package.json +54 -0
- package/skills/transaction-mapping/SKILL.md +105 -0
- package/skills/transaction-mapping/reference.md +72 -0
- package/test/cli.js +9 -0
- package/test/file.js +23 -0
- package/test/processing/bigDataMessage.js +52 -0
- package/test/processing/forEach.js +53 -0
- package/test/processing/forEachResume.js +54 -0
- package/test/processing/message.js +40 -0
- package/test/processing/zip.js +21 -0
- package/test/sample/1000_message.packet.zip +0 -0
- package/test/sample/5_message.packet.zip +0 -0
- package/test/sample/fileWithHead.csv +3 -0
- package/test/sample/fileWithoutHead.csv +2 -0
- package/test/sample/message/1000_fake_people.csv +1001 -0
- package/test/sample/message/5_fake_people.csv +6 -0
- package/test/sample/message/message.json5 +41 -0
- package/test/uuid.js +20 -0
- package/timelineTypes.js +139 -0
package/index.js
ADDED
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import dayjs from 'dayjs';
|
|
4
|
+
import debug$0 from 'debug';
|
|
5
|
+
import unzipper from 'unzipper';
|
|
6
|
+
import { v4 as uuidv4, v5 as uuidv5, v7 as uuidv7, validate as uuidIsValid } from 'uuid';
|
|
7
|
+
import archiver from 'archiver';
|
|
8
|
+
import handlebars from 'handlebars';
|
|
9
|
+
import FileUtilities from './file/FileUtilities.js';
|
|
10
|
+
import tools from './file/tools.js';
|
|
11
|
+
import ForEachEntry from './ForEachEntry.js';
|
|
12
|
+
import { TIMELINE_ENTRY_TYPES } from './timelineTypes.js';
|
|
13
|
+
const debug = debug$0('@engine9/input-tools');
|
|
14
|
+
|
|
15
|
+
const {
|
|
16
|
+
appendPostfix,
|
|
17
|
+
bool,
|
|
18
|
+
getBatchTransform,
|
|
19
|
+
getDebatchTransform,
|
|
20
|
+
getFile,
|
|
21
|
+
getManifest,
|
|
22
|
+
getPacketFiles,
|
|
23
|
+
getStringArray,
|
|
24
|
+
downloadFile,
|
|
25
|
+
getTempFilename,
|
|
26
|
+
getTempDir,
|
|
27
|
+
isValidDate,
|
|
28
|
+
parseJSON5,
|
|
29
|
+
relativeDate,
|
|
30
|
+
streamPacket,
|
|
31
|
+
makeStrings,
|
|
32
|
+
writeTempFile
|
|
33
|
+
} = tools;
|
|
34
|
+
function getFormattedDate(dateObject, format = 'MMM DD,YYYY') {
|
|
35
|
+
let d = dateObject;
|
|
36
|
+
if (d === 'now') d = new Date();
|
|
37
|
+
if (d) return dayjs(d).format(format);
|
|
38
|
+
return '';
|
|
39
|
+
}
|
|
40
|
+
handlebars.registerHelper('date', (d, f) => {
|
|
41
|
+
let format;
|
|
42
|
+
if (typeof f === 'string') format = f;
|
|
43
|
+
return getFormattedDate(d, format);
|
|
44
|
+
});
|
|
45
|
+
handlebars.registerHelper('json', (d) => JSON.stringify(d));
|
|
46
|
+
handlebars.registerHelper('uuid', () => uuidv7());
|
|
47
|
+
handlebars.registerHelper('percent', (a, b) => `${((100 * a) / b).toFixed(2)}%`);
|
|
48
|
+
handlebars.registerHelper('or', (a, b, c) => a || b || c);
|
|
49
|
+
async function list(_path) {
|
|
50
|
+
const directory = await unzipper.Open.file(_path);
|
|
51
|
+
return new Promise((resolve, reject) => {
|
|
52
|
+
directory.files[0].stream().pipe(fs.createWriteStream('firstFile')).on('error', reject).on('finish', resolve);
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
async function extract(_path, _file) {
|
|
56
|
+
const directory = await unzipper.Open(_path);
|
|
57
|
+
// return directory.files.map((f) => f.path);
|
|
58
|
+
const file = directory.files.find((d) => d.path === _file);
|
|
59
|
+
const tempFilename = await getTempFilename({ source: _file });
|
|
60
|
+
return new Promise((resolve, reject) => {
|
|
61
|
+
file.stream().pipe(fs.createWriteStream(tempFilename)).on('error', reject).on('finish', resolve);
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
function appendFiles(existingFiles, _newFiles, options) {
|
|
65
|
+
const newFiles = getStringArray(_newFiles);
|
|
66
|
+
if (newFiles.length === 0) return;
|
|
67
|
+
let { type, dateCreated } = options || {};
|
|
68
|
+
if (!type) type = 'unknown';
|
|
69
|
+
if (!dateCreated) dateCreated = new Date().toISOString();
|
|
70
|
+
let arr = newFiles;
|
|
71
|
+
if (!Array.isArray(newFiles)) arr = [arr];
|
|
72
|
+
arr.forEach((p) => {
|
|
73
|
+
const item = {
|
|
74
|
+
type,
|
|
75
|
+
originalFilename: '',
|
|
76
|
+
isNew: true,
|
|
77
|
+
dateCreated
|
|
78
|
+
};
|
|
79
|
+
if (typeof p === 'string') {
|
|
80
|
+
item.originalFilename = path.resolve(process.cwd(), p);
|
|
81
|
+
} else {
|
|
82
|
+
item.originalFilename = path.resolve(process.cwd(), item.originalFilename);
|
|
83
|
+
}
|
|
84
|
+
const file = item.originalFilename.split(path.sep).pop();
|
|
85
|
+
item.path = `${type}/${file}`;
|
|
86
|
+
const existingFile = existingFiles.find((f) => f.path === item.path);
|
|
87
|
+
if (existingFile) throw new Error('Error adding files, duplicate path found for path:', +item.path);
|
|
88
|
+
existingFiles.push(item);
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
async function create(options) {
|
|
92
|
+
const {
|
|
93
|
+
accountId = 'engine9',
|
|
94
|
+
pluginId = '',
|
|
95
|
+
target = '', // target filename, creates one if not specified
|
|
96
|
+
messageFiles = [], // file with contents of message, used for delivery
|
|
97
|
+
personFiles = [], // files with data on people
|
|
98
|
+
timelineFiles = [], // activity entry
|
|
99
|
+
statisticsFiles = [] // files with aggregate statistics
|
|
100
|
+
} = options;
|
|
101
|
+
if (options.peopleFiles) throw new Error('Unknown option: peopleFiles, did you mean personFiles?');
|
|
102
|
+
const files = [];
|
|
103
|
+
const dateCreated = new Date().toISOString();
|
|
104
|
+
appendFiles(files, messageFiles, { type: 'message', dateCreated });
|
|
105
|
+
appendFiles(files, personFiles, { type: 'person', dateCreated });
|
|
106
|
+
appendFiles(files, timelineFiles, { type: 'timeline', dateCreated });
|
|
107
|
+
appendFiles(files, statisticsFiles, { type: 'statistics', dateCreated });
|
|
108
|
+
const zipFilename = target || (await getTempFilename({ postfix: '.packet.zip' }));
|
|
109
|
+
const manifest = {
|
|
110
|
+
accountId,
|
|
111
|
+
source: {
|
|
112
|
+
pluginId
|
|
113
|
+
},
|
|
114
|
+
dateCreated,
|
|
115
|
+
files
|
|
116
|
+
};
|
|
117
|
+
// create a file to stream archive data to.
|
|
118
|
+
const output = fs.createWriteStream(zipFilename);
|
|
119
|
+
const archive = archiver('zip', {
|
|
120
|
+
zlib: { level: 9 } // Sets the compression level.
|
|
121
|
+
});
|
|
122
|
+
return new Promise((resolve, reject) => {
|
|
123
|
+
debug(`Setting up write stream to ${zipFilename}`);
|
|
124
|
+
// listen for all archive data to be written
|
|
125
|
+
// 'close' event is fired only when a file descriptor is involved
|
|
126
|
+
output.on('close', () => {
|
|
127
|
+
debug('archiver has been finalized and the output file descriptor has closed, calling success');
|
|
128
|
+
debug(zipFilename);
|
|
129
|
+
return resolve({
|
|
130
|
+
filename: zipFilename,
|
|
131
|
+
bytes: archive.pointer()
|
|
132
|
+
});
|
|
133
|
+
});
|
|
134
|
+
// This event is fired when the data source is drained no matter what was the data source.
|
|
135
|
+
// It is not part of this library but rather from the NodeJS Stream API.
|
|
136
|
+
// @see: https://nodejs.org/api/stream.html#stream_event_end
|
|
137
|
+
output.on('end', () => {
|
|
138
|
+
// debug('end event -- Data has been drained');
|
|
139
|
+
});
|
|
140
|
+
// warnings could be file not founds, etc, but we error even on those
|
|
141
|
+
archive.on('warning', (err) => {
|
|
142
|
+
reject(err);
|
|
143
|
+
});
|
|
144
|
+
// good practice to catch this error explicitly
|
|
145
|
+
archive.on('error', (err) => {
|
|
146
|
+
reject(err);
|
|
147
|
+
});
|
|
148
|
+
archive.pipe(output);
|
|
149
|
+
files.forEach(({ path: name, originalFilename }) => archive.file(originalFilename, { name }));
|
|
150
|
+
files.forEach((f) => {
|
|
151
|
+
delete f.originalFilename;
|
|
152
|
+
delete f.isNew;
|
|
153
|
+
});
|
|
154
|
+
archive.append(Buffer.from(JSON.stringify(manifest, null, 4), 'utf8'), { name: 'manifest.json' });
|
|
155
|
+
archive.finalize();
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
function intToByteArray(_v) {
|
|
159
|
+
// we want to represent the input as a 8-bytes array
|
|
160
|
+
const byteArray = [0, 0, 0, 0, 0, 0, 0, 0];
|
|
161
|
+
let v = _v;
|
|
162
|
+
for (let index = 0; index < byteArray.length; index += 1) {
|
|
163
|
+
const byte = v & 0xff;
|
|
164
|
+
byteArray[index] = byte;
|
|
165
|
+
v = (v - byte) / 256;
|
|
166
|
+
}
|
|
167
|
+
return byteArray;
|
|
168
|
+
}
|
|
169
|
+
function getPluginUUID(uniqueNamespaceLikeDomainName, valueWithinNamespace) {
|
|
170
|
+
// Random custom namespace for plugins -- not intended for cryptographically secure, just a unique namespace:
|
|
171
|
+
return uuidv5(`${uniqueNamespaceLikeDomainName}::${valueWithinNamespace}`, 'f9e1024d-21ac-473c-bac6-64796dd771dd');
|
|
172
|
+
}
|
|
173
|
+
function getInputUUID(a, b) {
|
|
174
|
+
let pluginId = a;
|
|
175
|
+
let remoteInputId = b;
|
|
176
|
+
if (typeof a === 'object') {
|
|
177
|
+
pluginId = a.pluginId;
|
|
178
|
+
remoteInputId = a.remoteInputId;
|
|
179
|
+
}
|
|
180
|
+
if (!pluginId) throw new Error('getInputUUID: Cowardly rejecting a blank plugin_id');
|
|
181
|
+
if (!uuidIsValid(pluginId)) throw new Error(`Invalid pluginId:${pluginId}, should be a UUID`);
|
|
182
|
+
const rid = (remoteInputId || '').trim();
|
|
183
|
+
if (!rid) throw new Error('getInputUUID: Cowardly rejecting a blank remote_input_id, set a default');
|
|
184
|
+
// Random custom namespace for inputs -- not secure, just a namespace:
|
|
185
|
+
// 3d0e5d99-6ba9-4fab-9bb2-c32304d3df8e
|
|
186
|
+
return uuidv5(`${pluginId}:${rid}`, '3d0e5d99-6ba9-4fab-9bb2-c32304d3df8e');
|
|
187
|
+
}
|
|
188
|
+
const timestampMatch = /^\d{13}$/;
|
|
189
|
+
function dateFromString(s) {
|
|
190
|
+
if (typeof s === 'number') return new Date(s);
|
|
191
|
+
if (typeof s === 'string') {
|
|
192
|
+
if (s.match(timestampMatch)) return new Date(parseInt(s));
|
|
193
|
+
}
|
|
194
|
+
return new Date(s);
|
|
195
|
+
}
|
|
196
|
+
function getUUIDv7(date, inputUuid) {
|
|
197
|
+
/* optional date and input UUID */
|
|
198
|
+
const uuid = inputUuid || uuidv7();
|
|
199
|
+
const bytes = Buffer.from(uuid.replace(/-/g, ''), 'hex');
|
|
200
|
+
if (date !== undefined) {
|
|
201
|
+
const d = dateFromString(date);
|
|
202
|
+
// isNaN behaves differently than Number.isNaN -- we're actually going for the
|
|
203
|
+
// attempted conversion here
|
|
204
|
+
if (isNaN(d)) throw new Error(`getUUIDv7 got an invalid date:${date || '<blank>'}`);
|
|
205
|
+
const dateBytes = intToByteArray(d.getTime()).reverse();
|
|
206
|
+
dateBytes.slice(2, 8).forEach((b, i) => {
|
|
207
|
+
bytes[i] = b;
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
return uuidv4({ random: bytes });
|
|
211
|
+
}
|
|
212
|
+
/* Returns a date from a given uuid (assumed to be a v7, otherwise the results are ... weird */
|
|
213
|
+
function getUUIDTimestamp(uuid) {
|
|
214
|
+
const ts = parseInt(`${uuid}`.replace(/-/g, '').slice(0, 12), 16);
|
|
215
|
+
return new Date(ts);
|
|
216
|
+
}
|
|
217
|
+
function getEntryTypeId(o, { defaults = {} } = {}) {
|
|
218
|
+
let id = o.entry_type_id || defaults.entry_type_id;
|
|
219
|
+
if (id) return id;
|
|
220
|
+
const etype = o.entry_type || defaults.entry_type;
|
|
221
|
+
if (!etype) {
|
|
222
|
+
throw new Error('No entry_type, nor entry_type_id specified, specify one to generate a timeline suitable ID');
|
|
223
|
+
}
|
|
224
|
+
id = TIMELINE_ENTRY_TYPES[etype];
|
|
225
|
+
if (id === undefined) throw new Error(`Invalid entry_type: ${etype}`);
|
|
226
|
+
return id;
|
|
227
|
+
}
|
|
228
|
+
function getEntryType(o, defaults = {}) {
|
|
229
|
+
let etype = o.entry_type || defaults.entry_type;
|
|
230
|
+
if (etype) return etype;
|
|
231
|
+
const id = o.entry_type_id || defaults.entry_type_id;
|
|
232
|
+
etype = TIMELINE_ENTRY_TYPES[id];
|
|
233
|
+
if (etype === undefined) throw new Error(`Invalid entry_type: ${etype}`);
|
|
234
|
+
return etype;
|
|
235
|
+
}
|
|
236
|
+
const requiredTimelineEntryFields = ['ts', 'entry_type_id', 'input_id', 'person_id'];
|
|
237
|
+
function getTimelineEntryUUID(inputObject, { defaults = {} } = {}) {
|
|
238
|
+
const o = { ...defaults, ...inputObject };
|
|
239
|
+
/*
|
|
240
|
+
Outside systems CAN specify a unique UUID as remote_entry_uuid,
|
|
241
|
+
which will be used for updates, etc.
|
|
242
|
+
If not, it will be generated using whatever info we have
|
|
243
|
+
*/
|
|
244
|
+
if (o.remote_entry_uuid) {
|
|
245
|
+
if (!uuidIsValid(o.remote_entry_uuid)) throw new Error('Invalid remote_entry_uuid, it must be a UUID');
|
|
246
|
+
return o.remote_entry_uuid;
|
|
247
|
+
}
|
|
248
|
+
/*
|
|
249
|
+
Outside systems CAN specify a unique remote_entry_id
|
|
250
|
+
If not, it will be generated using whatever info we have
|
|
251
|
+
*/
|
|
252
|
+
if (o.remote_entry_id) {
|
|
253
|
+
// get a temp ID
|
|
254
|
+
if (!o.input_id)
|
|
255
|
+
throw new Error('Error generating timeline entry uuid -- remote_entry_id specified, but no input_id');
|
|
256
|
+
try {
|
|
257
|
+
const uuid = uuidv5(String(o.remote_entry_id), o.input_id);
|
|
258
|
+
// Change out the ts to match the v7 sorting.
|
|
259
|
+
// But because outside specified remote_entry_uuid
|
|
260
|
+
// may not match this standard, uuid sorting isn't guaranteed
|
|
261
|
+
return getUUIDv7(o.ts, uuid);
|
|
262
|
+
} catch (e) {
|
|
263
|
+
debug('Error getting uuid with object:', o);
|
|
264
|
+
throw e;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
o.entry_type_id = getEntryTypeId(o);
|
|
268
|
+
const missing = requiredTimelineEntryFields.filter((d) => o[d] === undefined); // 0 could be an entry type value
|
|
269
|
+
if (missing.length > 0) throw new Error(`Missing required fields to append an entry_id:${missing.join(',')}`);
|
|
270
|
+
const ts = new Date(o.ts);
|
|
271
|
+
// isNaN behaves differently than Number.isNaN -- we're actually going for the
|
|
272
|
+
// attempted conversion here
|
|
273
|
+
if (isNaN(ts)) throw new Error(`getTimelineEntryUUID got an invalid date:${o.ts || '<blank>'}`);
|
|
274
|
+
const idString = `${ts.toISOString()}-${o.person_id}-${o.entry_type_id}-${o.source_code_id || 0}`;
|
|
275
|
+
if (!uuidIsValid(o.input_id)) {
|
|
276
|
+
throw new Error(`Invalid input_id:'${o.input_id}', type ${typeof o.input_id} -- should be a uuid`);
|
|
277
|
+
}
|
|
278
|
+
// get a temp ID
|
|
279
|
+
const uuid = uuidv5(idString, o.input_id);
|
|
280
|
+
// Change out the ts to match the v7 sorting.
|
|
281
|
+
// But because outside specified remote_entry_uuid
|
|
282
|
+
// may not match this standard, uuid sorting isn't guaranteed
|
|
283
|
+
return getUUIDv7(ts, uuid);
|
|
284
|
+
}
|
|
285
|
+
function getDateRangeArray(startDate, endDate) {
|
|
286
|
+
const start = new Date(startDate);
|
|
287
|
+
const end = new Date(endDate);
|
|
288
|
+
const result = [];
|
|
289
|
+
const msInDay = 24 * 60 * 60 * 1000;
|
|
290
|
+
function addDays(date, days) {
|
|
291
|
+
const d = new Date(date);
|
|
292
|
+
d.setDate(d.getDate() + days);
|
|
293
|
+
return d;
|
|
294
|
+
}
|
|
295
|
+
function addMonths(date, months) {
|
|
296
|
+
const d = new Date(date);
|
|
297
|
+
d.setMonth(d.getMonth() + months);
|
|
298
|
+
return d;
|
|
299
|
+
}
|
|
300
|
+
function addYears(date, years) {
|
|
301
|
+
const d = new Date(date);
|
|
302
|
+
d.setFullYear(d.getFullYear() + years);
|
|
303
|
+
return d;
|
|
304
|
+
}
|
|
305
|
+
const diffDays = Math.floor((end - start) / msInDay);
|
|
306
|
+
const diffMonths = (end.getFullYear() - start.getFullYear()) * 12 + (end.getMonth() - start.getMonth());
|
|
307
|
+
const diffYears = end.getFullYear() - start.getFullYear();
|
|
308
|
+
let current = new Date(start);
|
|
309
|
+
let stepFn;
|
|
310
|
+
if (diffDays < 10) {
|
|
311
|
+
stepFn = (date) => addDays(date, 1);
|
|
312
|
+
} else if (diffDays < 32) {
|
|
313
|
+
stepFn = (date) => addDays(date, 3);
|
|
314
|
+
} else if (diffMonths < 4) {
|
|
315
|
+
stepFn = (date) => addDays(date, 7);
|
|
316
|
+
} else if (diffYears < 2) {
|
|
317
|
+
stepFn = (date) => addMonths(date, 1);
|
|
318
|
+
} else if (diffYears < 4) {
|
|
319
|
+
stepFn = (date) => addMonths(date, 3);
|
|
320
|
+
} else {
|
|
321
|
+
stepFn = (date) => addYears(date, 1);
|
|
322
|
+
}
|
|
323
|
+
while (current <= end) {
|
|
324
|
+
result.push(new Date(current));
|
|
325
|
+
const next = stepFn(current);
|
|
326
|
+
if (next > end) break;
|
|
327
|
+
current = next;
|
|
328
|
+
}
|
|
329
|
+
// Ensure the last date is exactly the end date
|
|
330
|
+
if (result.length === 0 || result[result.length - 1].getTime() !== end.getTime()) {
|
|
331
|
+
result.push(new Date(end));
|
|
332
|
+
}
|
|
333
|
+
return result;
|
|
334
|
+
}
|
|
335
|
+
class ObjectError extends Error {
|
|
336
|
+
constructor(data) {
|
|
337
|
+
if (typeof data === 'string') {
|
|
338
|
+
// normal behavior
|
|
339
|
+
super(data);
|
|
340
|
+
} else if (typeof data === 'object') {
|
|
341
|
+
super(data.message);
|
|
342
|
+
Object.keys(data).forEach((k) => {
|
|
343
|
+
this[k] = data[k];
|
|
344
|
+
});
|
|
345
|
+
this.status = data.status;
|
|
346
|
+
} else {
|
|
347
|
+
super('(No error message)');
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
export { appendPostfix };
|
|
352
|
+
export { bool };
|
|
353
|
+
export { create };
|
|
354
|
+
export { list };
|
|
355
|
+
export { downloadFile };
|
|
356
|
+
export { extract };
|
|
357
|
+
export { ForEachEntry };
|
|
358
|
+
export { FileUtilities };
|
|
359
|
+
export { getBatchTransform };
|
|
360
|
+
export { getDateRangeArray };
|
|
361
|
+
export { getDebatchTransform };
|
|
362
|
+
export { getEntryType };
|
|
363
|
+
export { getEntryTypeId };
|
|
364
|
+
export { getFile };
|
|
365
|
+
export { getManifest };
|
|
366
|
+
export { getStringArray };
|
|
367
|
+
export { getTempDir };
|
|
368
|
+
export { getTempFilename };
|
|
369
|
+
export { getTimelineEntryUUID };
|
|
370
|
+
export { getPacketFiles };
|
|
371
|
+
export { getPluginUUID };
|
|
372
|
+
export { getInputUUID };
|
|
373
|
+
export { getUUIDv7 };
|
|
374
|
+
export { getUUIDTimestamp };
|
|
375
|
+
export { handlebars };
|
|
376
|
+
export { isValidDate };
|
|
377
|
+
export { makeStrings };
|
|
378
|
+
export { ObjectError };
|
|
379
|
+
export { parseJSON5 };
|
|
380
|
+
export { relativeDate };
|
|
381
|
+
export { streamPacket };
|
|
382
|
+
export { TIMELINE_ENTRY_TYPES };
|
|
383
|
+
export { writeTempFile };
|
|
384
|
+
export { uuidIsValid };
|
|
385
|
+
export { uuidv4 };
|
|
386
|
+
export { uuidv5 };
|
|
387
|
+
export { uuidv7 };
|
|
388
|
+
export default {
|
|
389
|
+
appendPostfix,
|
|
390
|
+
bool,
|
|
391
|
+
create,
|
|
392
|
+
list,
|
|
393
|
+
downloadFile,
|
|
394
|
+
extract,
|
|
395
|
+
ForEachEntry,
|
|
396
|
+
FileUtilities,
|
|
397
|
+
getBatchTransform,
|
|
398
|
+
getDateRangeArray,
|
|
399
|
+
getDebatchTransform,
|
|
400
|
+
getEntryType,
|
|
401
|
+
getEntryTypeId,
|
|
402
|
+
getFile,
|
|
403
|
+
getManifest,
|
|
404
|
+
getStringArray,
|
|
405
|
+
getTempDir,
|
|
406
|
+
getTempFilename,
|
|
407
|
+
getTimelineEntryUUID,
|
|
408
|
+
getPacketFiles,
|
|
409
|
+
getPluginUUID,
|
|
410
|
+
getInputUUID,
|
|
411
|
+
getUUIDv7,
|
|
412
|
+
getUUIDTimestamp,
|
|
413
|
+
handlebars,
|
|
414
|
+
isValidDate,
|
|
415
|
+
makeStrings,
|
|
416
|
+
ObjectError,
|
|
417
|
+
parseJSON5,
|
|
418
|
+
relativeDate,
|
|
419
|
+
streamPacket,
|
|
420
|
+
TIMELINE_ENTRY_TYPES,
|
|
421
|
+
writeTempFile,
|
|
422
|
+
uuidIsValid,
|
|
423
|
+
uuidv4,
|
|
424
|
+
uuidv5,
|
|
425
|
+
uuidv7
|
|
426
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@engine9/input-tools",
|
|
3
|
+
"version": "2.0.7",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Tools for dealing with Engine9 inputs",
|
|
6
|
+
"main": "index.js",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"test-big-data": "export DEBUG=*;node --test --test-name-pattern=big-data",
|
|
9
|
+
"test": "export DEBUG=*;node --test --test-skip-pattern=big-data"
|
|
10
|
+
},
|
|
11
|
+
"author": "Engine9",
|
|
12
|
+
"license": "GPL-3.0-or-later",
|
|
13
|
+
"devDependencies": {
|
|
14
|
+
"eslint": "^9.33.0"
|
|
15
|
+
},
|
|
16
|
+
"dependencies": {
|
|
17
|
+
"@aws-sdk/client-s3": "^3.893.0",
|
|
18
|
+
"@dsnp/parquetjs": "^1.8.7",
|
|
19
|
+
"archiver": "^7.0.1",
|
|
20
|
+
"async-mutex": "^0.5.0",
|
|
21
|
+
"csv": "^6.3.11",
|
|
22
|
+
"dayjs": "^1.11.13",
|
|
23
|
+
"debug": "^4.3.4",
|
|
24
|
+
"detect-file-encoding-and-language": "^2.4.0",
|
|
25
|
+
"googleapis": "^148.0.0",
|
|
26
|
+
"handlebars": "^4.7.8",
|
|
27
|
+
"json5": "^2.2.3",
|
|
28
|
+
"mime-type": "^5.0.3",
|
|
29
|
+
"mkdirp": "^3.0.1",
|
|
30
|
+
"p-limit": "^7.1.1",
|
|
31
|
+
"parallel-transform": "^1.2.0",
|
|
32
|
+
"throttle-debounce": "^5.0.2",
|
|
33
|
+
"unzipper": "^0.12.1",
|
|
34
|
+
"uuid": "^11.1.0",
|
|
35
|
+
"xlstream": "^2.5.5",
|
|
36
|
+
"yargs": "^17.7.2"
|
|
37
|
+
},
|
|
38
|
+
"directories": {
|
|
39
|
+
"test": "test"
|
|
40
|
+
},
|
|
41
|
+
"repository": {
|
|
42
|
+
"type": "git",
|
|
43
|
+
"url": "git+https://github.com/engine9-io/input-tools.git"
|
|
44
|
+
},
|
|
45
|
+
"keywords": [
|
|
46
|
+
"Engine9",
|
|
47
|
+
"CRM",
|
|
48
|
+
"CDP"
|
|
49
|
+
],
|
|
50
|
+
"bugs": {
|
|
51
|
+
"url": "https://github.com/engine9-io/input-tools/issues"
|
|
52
|
+
},
|
|
53
|
+
"homepage": "https://github.com/engine9-io/input-tools#readme"
|
|
54
|
+
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: transaction-mapping
|
|
3
|
+
description: Assists in writing JavaScript mapping functions that map 3rd-party payment/transaction data into the standard Transaction schema (Frakture Transactions Data). Use when mapping Stripe, PayPal, donor databases, CSV exports, or any external payment data into engine9 transaction records.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Transaction Mapping
|
|
7
|
+
|
|
8
|
+
Use this skill when writing a JavaScript function that maps 3rd-party payment or donation data into the standard Transaction schema. The canonical schema is defined in [Frakture Transactions Data](https://frakture.notion.site/Frakture-Transactions-Data-442349ac436a4f7db8e7d732359e7d8f). The codebase implements this in `interfaces/transaction/schema.js` and processes mapped rows via `interfaces/transaction/transforms/inbound/upsert_tables.js`.
|
|
9
|
+
|
|
10
|
+
## Mapping workflow
|
|
11
|
+
|
|
12
|
+
1. **Inspect the source** – Identify which 3rd-party fields correspond to Transaction schema fields (amount, date, person identifier, recurring vs one-time, refunds, etc.).
|
|
13
|
+
2. **Implement a pure mapping function** – `(rawRow) => transactionRow`. Do not set `id` or `entry_type_id` in the mapper; the inbound transform derives them from `getEntryTypeId` and `getTimelineEntryUUID`.
|
|
14
|
+
3. **Return a single object per payment** – One 3rd-party record (or one logical payment) → one Transaction-shaped object. For refunds that are separate rows, map each and use `entry_type: 'TRANSACTION_REFUND'` and `refund_amount` where appropriate.
|
|
15
|
+
4. **Validate required fields** – Ensure every returned object has `ts`, `amount`, `remote_person_id`, and either `entry_type` or `entry_type_id`. The pipeline will throw if any of these are missing.
|
|
16
|
+
|
|
17
|
+
## Required fields (must be present on every mapped row)
|
|
18
|
+
|
|
19
|
+
| Field | Type | Notes |
|
|
20
|
+
| ------------------ | ------ | -------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
21
|
+
| `ts` | Date | Transaction date/time. Accept ISO string or number (ms); converted with `new Date(ts)`. |
|
|
22
|
+
| `amount` | number | Transaction amount (currency units). |
|
|
23
|
+
| `remote_person_id` | string | Payer/donor identifier from the 3rd-party source (e.g. donor_id, customer_id). The pipeline resolves this to the in-house `person_id`. |
|
|
24
|
+
| `entry_type` | string | Or `entry_type_id`. Use a value from [Transaction entry types](#transaction-entry-types) (e.g. `'TRANSACTION_ONE_TIME'`, `'TRANSACTION_INITIAL'`). |
|
|
25
|
+
|
|
26
|
+
**Note:** Do not set `input_id` in the mapper. It is calculated by the pipeline from `remote_page_name` or `remote_input_id`.
|
|
27
|
+
|
|
28
|
+
## Optional but important fields
|
|
29
|
+
|
|
30
|
+
| Field | Type | Notes |
|
|
31
|
+
| ------------------------------------------------------------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------- |
|
|
32
|
+
| `refund_amount` | number | For refunds; use with `entry_type: 'TRANSACTION_REFUND'` when applicable. |
|
|
33
|
+
| `remote_transaction_id` | string | Idempotency key from 3rd-party system; used for upserts and UUID derivation. |
|
|
34
|
+
| `remote_page_name` | string | e.g. campaign or form name; used (with `remote_input_id`) to derive `input_id`. |
|
|
35
|
+
| `remote_recurring_id` | string | External subscription/recurring series id. |
|
|
36
|
+
| `recurs` | string | Frequency: `'daily'`, `'weekly'`, `'monthly'`, `'quarterly'`, `'annually'`, `'semi-annually'`. Inbound transform maps to `recurs_id`. |
|
|
37
|
+
| `recurring_number` | number | Occurrence index in a series (e.g. 1st, 2nd donation in a recurring set). |
|
|
38
|
+
| `given_name`, `family_name`, `email` | string | Payer/donor info; useful for matching or display. |
|
|
39
|
+
| `source_code_id`, `override_source_code_id`, `final_source_code_id` | number | Attribution/source. |
|
|
40
|
+
| `recommended_message_id`, `override_message_id`, `final_message_id` | UUID | Message attribution. |
|
|
41
|
+
| `extra` | object | JSON blob for vendor-specific data that doesn’t fit the schema. |
|
|
42
|
+
| `remote_entry_uuid` | UUID | If the 3rd-party system provides a stable UUID, set this and it will be used as the record id. |
|
|
43
|
+
|
|
44
|
+
## Transaction entry types
|
|
45
|
+
|
|
46
|
+
Use exactly these `entry_type` string values (or the numeric `entry_type_id`). Import `TIMELINE_ENTRY_TYPES` from `@engine9-io/input-tools` for the full map.
|
|
47
|
+
|
|
48
|
+
| entry_type | entry_type_id | Use when |
|
|
49
|
+
| ------------------------ | ------------- | ------------------------------------------------------------ |
|
|
50
|
+
| `TRANSACTION` | 10 | Generic; prefer a more specific type when known. |
|
|
51
|
+
| `TRANSACTION_ONE_TIME` | 11 | Single, non-recurring payment. |
|
|
52
|
+
| `TRANSACTION_INITIAL` | 12 | First payment in a recurring series. |
|
|
53
|
+
| `TRANSACTION_SUBSEQUENT` | 13 | Later payment in a recurring series. |
|
|
54
|
+
| `TRANSACTION_RECURRING` | 14 | Recurring payment, order unknown. |
|
|
55
|
+
| `TRANSACTION_REFUND` | 15 | Refund; set `refund_amount` and optionally link to original. |
|
|
56
|
+
|
|
57
|
+
## Mapping function template
|
|
58
|
+
|
|
59
|
+
```javascript
|
|
60
|
+
/**
|
|
61
|
+
* Maps a single 3rd-party payment record to the Transaction schema.
|
|
62
|
+
* @param {Object} row - Raw record from the payment system (e.g. Stripe charge, CSV row).
|
|
63
|
+
* @returns {Object} Transaction-shaped object (required: ts, amount, remote_person_id, entry_type).
|
|
64
|
+
*/
|
|
65
|
+
function mapPaymentToTransaction(row) {
|
|
66
|
+
return {
|
|
67
|
+
ts: row.created_at ?? row.date ?? row.timestamp,
|
|
68
|
+
amount: parseFloat(row.amount ?? row.total ?? 0),
|
|
69
|
+
remote_person_id: row.donor_id ?? row.customer_id ?? row.external_id,
|
|
70
|
+
entry_type: row.recurring ? 'TRANSACTION_SUBSEQUENT' : 'TRANSACTION_ONE_TIME',
|
|
71
|
+
remote_transaction_id: row.id ?? row.transaction_id,
|
|
72
|
+
remote_page_name: row.campaign ?? row.form_name ?? null,
|
|
73
|
+
email: row.email ?? null,
|
|
74
|
+
given_name: row.first_name ?? null,
|
|
75
|
+
family_name: row.last_name ?? null,
|
|
76
|
+
remote_recurring_id: row.subscription_id ?? null,
|
|
77
|
+
recurs: row.interval === 'month' ? 'monthly' : null,
|
|
78
|
+
recurring_number: row.occurrence ?? null,
|
|
79
|
+
extra: row.raw ? { raw: row.raw } : undefined
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
- **Do not** set `id`, `entry_type_id`, or `input_id` in the mapper; the inbound transform/pipeline sets them (e.g. `input_id` from `remote_page_name` or `remote_input_id`).
|
|
85
|
+
- **Do** normalize dates to something `new Date(ts)` can parse (ISO string or ms).
|
|
86
|
+
- **Do** use `remote_person_id` for the 3rd-party payer/donor identifier; the pipeline resolves it to in-house `person_id`.
|
|
87
|
+
- **Do** use `remote_transaction_id` when the source has a stable id for idempotent upserts.
|
|
88
|
+
- **Do** use `entry_type` (string) unless you have a reason to use numeric `entry_type_id`.
|
|
89
|
+
|
|
90
|
+
## Refunds
|
|
91
|
+
|
|
92
|
+
- If the source has a separate refund row: set `entry_type: 'TRANSACTION_REFUND'`, `refund_amount`, and `ts`; keep `amount` as 0 or the original amount depending on product rules.
|
|
93
|
+
- If the source only has a flag: you may emit one row with `amount` and optionally `refund_amount`, and still use `TRANSACTION_REFUND` for the entry_type when it’s a refund.
|
|
94
|
+
|
|
95
|
+
## Validation
|
|
96
|
+
|
|
97
|
+
- After writing the mapper, verify that sample rows include `ts`, `amount`, `remote_person_id`, and `entry_type` (or `entry_type_id`). The pipeline will throw on the first row missing any of these.
|
|
98
|
+
- Ensure `remote_person_id` is the identifier from the 3rd-party source (e.g. donor_id, customer_id); the pipeline resolves it to the in-house `person_id`.
|
|
99
|
+
|
|
100
|
+
## Additional reference
|
|
101
|
+
|
|
102
|
+
- Full schema and column types: [reference.md](reference.md)
|
|
103
|
+
- Schema in code: `interfaces/transaction/schema.js`
|
|
104
|
+
- Inbound transform (sets `id`, `entry_type_id`, `recurs_id`): `interfaces/transaction/transforms/inbound/upsert_tables.js`
|
|
105
|
+
- Entry type constants: `input-tools/timelineTypes.js` (`TIMELINE_ENTRY_TYPES`)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Transaction schema reference
|
|
2
|
+
|
|
3
|
+
Canonical documentation: [Frakture Transactions Data](https://frakture.notion.site/Frakture-Transactions-Data-442349ac436a4f7db8e7d732359e7d8f).
|
|
4
|
+
|
|
5
|
+
This file summarizes the schema as implemented in `interfaces/transaction/schema.js` and how the inbound transform and input-tools use it.
|
|
6
|
+
|
|
7
|
+
## Table: transaction
|
|
8
|
+
|
|
9
|
+
| Column | DB type | Required for mapping? | Notes |
|
|
10
|
+
| ----------------------- | -------------- | ------------------------- | ---------------------------------------------------------------------------------------------------------------- |
|
|
11
|
+
| id | id_uuid | No (set by transform) | Set by `getTimelineEntryUUID` in inbound transform. |
|
|
12
|
+
| ts | datetime | **Yes** | Transaction date/time. |
|
|
13
|
+
| input_id | uuid | No (calculated) | Derived by pipeline from `remote_page_name` or `remote_input_id`; do not set in mapper. |
|
|
14
|
+
| entry_type_id | int | **Yes** (or entry_type) | Set by transform from `entry_type` string if not provided. |
|
|
15
|
+
| person_id | person_id | No (resolved by pipeline) | In-house person id; resolved from `remote_person_id` by the pipeline. |
|
|
16
|
+
| remote_person_id | string | **Yes** | Payer/donor identifier from 3rd-party source; pipeline resolves to `person_id`. |
|
|
17
|
+
| amount | currency | **Yes** | Transaction amount (currency units). |
|
|
18
|
+
| remote_transaction_id | string | No | External id; used for upserts and id derivation. |
|
|
19
|
+
| remote_page_name | string | No | e.g. campaign/form name; used (with remote_input_id) to derive input_id. |
|
|
20
|
+
| remote_recurring_id | string | No | External recurring/subscription id. |
|
|
21
|
+
| recurs_id | int | No | Set by transform from `recurs` (daily=1, weekly=2, monthly=3, quarterly=4, annually=5, semi-annually=6, else 0). |
|
|
22
|
+
| recurring_number | int | No | Occurrence index in series. |
|
|
23
|
+
| refund_amount | currency | No | For refunds. |
|
|
24
|
+
| given_name | string | No | Payer first name. |
|
|
25
|
+
| family_name | string | No | Payer last name. |
|
|
26
|
+
| email | string | No | Payer email. |
|
|
27
|
+
| source_code_id | source_code_id | No | Attribution. |
|
|
28
|
+
| override_source_code_id | source_code_id | No | |
|
|
29
|
+
| final_source_code_id | source_code_id | No | |
|
|
30
|
+
| recommended_message_id | uuid | No | |
|
|
31
|
+
| override_message_id | uuid | No | |
|
|
32
|
+
| final_message_id | uuid | No | |
|
|
33
|
+
| extra | json | No | Arbitrary vendor-specific data. |
|
|
34
|
+
|
|
35
|
+
## recurs → recurs_id (inbound transform)
|
|
36
|
+
|
|
37
|
+
The transform maps `recurs` (string) to `recurs_id` (int) when not already set:
|
|
38
|
+
|
|
39
|
+
| recurs | recurs_id |
|
|
40
|
+
| ----------------------------- | --------- |
|
|
41
|
+
| 'daily' | 1 |
|
|
42
|
+
| 'weekly' | 2 |
|
|
43
|
+
| 'monthly' | 3 |
|
|
44
|
+
| 'quarterly' | 4 |
|
|
45
|
+
| 'annually' | 5 |
|
|
46
|
+
| 'semi-annually' | 6 |
|
|
47
|
+
| (none) + recurring_number > 1 | 3 |
|
|
48
|
+
| (none) | 0 |
|
|
49
|
+
|
|
50
|
+
## Transaction entry_type → entry_type_id
|
|
51
|
+
|
|
52
|
+
From `input-tools/timelineTypes.js`:
|
|
53
|
+
|
|
54
|
+
| entry_type | entry_type_id |
|
|
55
|
+
| ---------------------- | ------------- |
|
|
56
|
+
| TRANSACTION | 10 |
|
|
57
|
+
| TRANSACTION_ONE_TIME | 11 |
|
|
58
|
+
| TRANSACTION_INITIAL | 12 |
|
|
59
|
+
| TRANSACTION_SUBSEQUENT | 13 |
|
|
60
|
+
| TRANSACTION_RECURRING | 14 |
|
|
61
|
+
| TRANSACTION_REFUND | 15 |
|
|
62
|
+
|
|
63
|
+
## Required fields from the mapper
|
|
64
|
+
|
|
65
|
+
The mapper must provide (the pipeline derives or resolves the rest):
|
|
66
|
+
|
|
67
|
+
- `ts`
|
|
68
|
+
- `amount`
|
|
69
|
+
- `remote_person_id` (3rd-party payer/donor identifier; pipeline resolves to `person_id`)
|
|
70
|
+
- `entry_type` or `entry_type_id`
|
|
71
|
+
|
|
72
|
+
The pipeline calculates `input_id` from `remote_page_name` or `remote_input_id`. The inbound transform uses `input_id` and `person_id` (resolved from `remote_person_id`) with `getTimelineEntryUUID` to set `id`. If `remote_entry_uuid` is set and valid, it is used as `id` and the composite is not needed.
|
package/test/cli.js
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import yargs from 'yargs/yargs';
|
|
2
|
+
import methods from '../index.js';
|
|
3
|
+
const argv = yargs(process.argv.slice(2)).parse();
|
|
4
|
+
async function run() {
|
|
5
|
+
if (typeof methods[argv._[0]] !== 'function') throw new Error(`${argv._[0]} is not a function`);
|
|
6
|
+
const output = await methods[argv._[0]](argv);
|
|
7
|
+
console.log(output);
|
|
8
|
+
}
|
|
9
|
+
run();
|