meadow-integration 1.0.19 → 1.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -2
- package/source/Meadow-Integration.js +15 -1
- package/source/services/parser/Service-FileParser-CSV.js +263 -0
- package/source/services/parser/Service-FileParser-FixedWidth.js +158 -0
- package/source/services/parser/Service-FileParser-JSON.js +255 -0
- package/source/services/parser/Service-FileParser-XLSX.js +194 -0
- package/source/services/parser/Service-FileParser-XML.js +190 -0
- package/source/services/parser/Service-FileParser.js +142 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "meadow-integration",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.20",
|
|
4
4
|
"description": "Meadow Data Integration",
|
|
5
5
|
"bin": {
|
|
6
6
|
"mdwint": "source/cli/Meadow-Integration-CLI-Run.js"
|
|
@@ -39,12 +39,14 @@
|
|
|
39
39
|
"dependencies": {
|
|
40
40
|
"fable": "^3.1.63",
|
|
41
41
|
"fable-serviceproviderbase": "^3.0.19",
|
|
42
|
+
"fast-xml-parser": "^4.4.1",
|
|
42
43
|
"meadow": "^2.0.33",
|
|
43
44
|
"meadow-connection-mssql": "^1.0.16",
|
|
44
45
|
"meadow-connection-mysql": "^1.0.14",
|
|
45
46
|
"orator": "^6.0.4",
|
|
46
47
|
"orator-serviceserver-restify": "^2.0.9",
|
|
47
48
|
"pict-service-commandlineutility": "^1.0.19",
|
|
48
|
-
"pict-sessionmanager": "^1.0.2"
|
|
49
|
+
"pict-sessionmanager": "^1.0.2",
|
|
50
|
+
"xlsx": "^0.18.5"
|
|
49
51
|
}
|
|
50
52
|
}
|
|
@@ -11,6 +11,13 @@ const libOperation = require(`./services/clone/Meadow-Service-Operation.js`);
|
|
|
11
11
|
const libIntegrationAdapter = require(`./Meadow-Service-Integration-Adapter.js`);
|
|
12
12
|
const libGUIDMap = require(`./Meadow-Service-Integration-GUIDMap.js`);
|
|
13
13
|
|
|
14
|
+
const libFileParser = require(`./services/parser/Service-FileParser.js`);
|
|
15
|
+
const libFileParserCSV = require(`./services/parser/Service-FileParser-CSV.js`);
|
|
16
|
+
const libFileParserJSON = require(`./services/parser/Service-FileParser-JSON.js`);
|
|
17
|
+
const libFileParserXLSX = require(`./services/parser/Service-FileParser-XLSX.js`);
|
|
18
|
+
const libFileParserXML = require(`./services/parser/Service-FileParser-XML.js`);
|
|
19
|
+
const libFileParserFixedWidth = require(`./services/parser/Service-FileParser-FixedWidth.js`);
|
|
20
|
+
|
|
14
21
|
module.exports = (
|
|
15
22
|
{
|
|
16
23
|
TabularCheck: libTabularCheck,
|
|
@@ -24,5 +31,12 @@ module.exports = (
|
|
|
24
31
|
Operation: libOperation,
|
|
25
32
|
|
|
26
33
|
IntegrationAdapter: libIntegrationAdapter,
|
|
27
|
-
GUIDMap: libGUIDMap
|
|
34
|
+
GUIDMap: libGUIDMap,
|
|
35
|
+
|
|
36
|
+
FileParser: libFileParser,
|
|
37
|
+
FileParserCSV: libFileParserCSV,
|
|
38
|
+
FileParserJSON: libFileParserJSON,
|
|
39
|
+
FileParserXLSX: libFileParserXLSX,
|
|
40
|
+
FileParserXML: libFileParserXML,
|
|
41
|
+
FileParserFixedWidth: libFileParserFixedWidth
|
|
28
42
|
});
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const libFableServiceProviderBase = require('fable-serviceproviderbase');
|
|
4
|
+
const libFS = require('fs');
|
|
5
|
+
const libReadline = require('readline');
|
|
6
|
+
|
|
7
|
+
const defaultCSVParserOptions = (
|
|
8
|
+
{
|
|
9
|
+
delimiter: ',',
|
|
10
|
+
quoteChar: '"',
|
|
11
|
+
hasHeaders: true,
|
|
12
|
+
skipRows: 0,
|
|
13
|
+
commentPrefix: '',
|
|
14
|
+
trim: true,
|
|
15
|
+
chunkSize: 100
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
class MeadowIntegrationFileParserCSV extends libFableServiceProviderBase
|
|
19
|
+
{
|
|
20
|
+
constructor(pFable, pOptions, pServiceHash)
|
|
21
|
+
{
|
|
22
|
+
let tmpOptions = Object.assign({}, defaultCSVParserOptions, pOptions);
|
|
23
|
+
super(pFable, tmpOptions, pServiceHash);
|
|
24
|
+
|
|
25
|
+
this.serviceType = 'MeadowIntegrationFileParserCSV';
|
|
26
|
+
|
|
27
|
+
this._headers = null;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Parse a single CSV line into an array of values.
|
|
32
|
+
* Handles quoted fields (including embedded commas and escaped quotes).
|
|
33
|
+
*
|
|
34
|
+
* @param {string} pLine - Raw CSV line
|
|
35
|
+
* @param {string} pDelimiter - Field delimiter character
|
|
36
|
+
* @param {string} pQuoteChar - Quote character
|
|
37
|
+
* @param {boolean} pTrim - Whether to trim field values
|
|
38
|
+
* @returns {Array<string>} Parsed field values
|
|
39
|
+
*/
|
|
40
|
+
_parseCSVLine(pLine, pDelimiter, pQuoteChar, pTrim)
|
|
41
|
+
{
|
|
42
|
+
let tmpDelimiter = pDelimiter || ',';
|
|
43
|
+
let tmpQuoteChar = pQuoteChar || '"';
|
|
44
|
+
let tmpValues = [];
|
|
45
|
+
let tmpCurrent = '';
|
|
46
|
+
let tmpInQuotes = false;
|
|
47
|
+
|
|
48
|
+
for (let i = 0; i < pLine.length; i++)
|
|
49
|
+
{
|
|
50
|
+
let tmpChar = pLine[i];
|
|
51
|
+
|
|
52
|
+
if (tmpChar === tmpQuoteChar)
|
|
53
|
+
{
|
|
54
|
+
if (tmpInQuotes && pLine[i + 1] === tmpQuoteChar)
|
|
55
|
+
{
|
|
56
|
+
// Escaped quote (doubled)
|
|
57
|
+
tmpCurrent += tmpQuoteChar;
|
|
58
|
+
i++;
|
|
59
|
+
}
|
|
60
|
+
else
|
|
61
|
+
{
|
|
62
|
+
tmpInQuotes = !tmpInQuotes;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
else if (tmpChar === tmpDelimiter && !tmpInQuotes)
|
|
66
|
+
{
|
|
67
|
+
tmpValues.push(pTrim ? tmpCurrent.trim() : tmpCurrent);
|
|
68
|
+
tmpCurrent = '';
|
|
69
|
+
}
|
|
70
|
+
else
|
|
71
|
+
{
|
|
72
|
+
tmpCurrent += tmpChar;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
tmpValues.push(pTrim ? tmpCurrent.trim() : tmpCurrent);
|
|
77
|
+
return tmpValues;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Parse a CSV file using streaming readline.
|
|
82
|
+
* Fires chunkCallback with arrays of records as they accumulate.
|
|
83
|
+
* Fires completionCallback when the file is fully consumed.
|
|
84
|
+
*
|
|
85
|
+
* @param {string} pFilePath - Absolute path to the CSV file
|
|
86
|
+
* @param {object} pOptions - Parser options (overrides instance options)
|
|
87
|
+
* @param {function} pChunkCallback - Called with (pError, pRecords) per chunk
|
|
88
|
+
* @param {function} pCompletionCallback - Called with (pError, pTotalCount) when done
|
|
89
|
+
*/
|
|
90
|
+
parseFile(pFilePath, pOptions, pChunkCallback, pCompletionCallback)
|
|
91
|
+
{
|
|
92
|
+
let tmpOptions = Object.assign({}, this.options, pOptions);
|
|
93
|
+
let tmpChunkSize = tmpOptions.chunkSize || 100;
|
|
94
|
+
let tmpHasHeaders = tmpOptions.hasHeaders !== false;
|
|
95
|
+
let tmpSkipRows = parseInt(tmpOptions.skipRows, 10) || 0;
|
|
96
|
+
let tmpCommentPrefix = tmpOptions.commentPrefix || '';
|
|
97
|
+
let tmpTrim = tmpOptions.trim !== false;
|
|
98
|
+
let tmpDelimiter = tmpOptions.delimiter || ',';
|
|
99
|
+
let tmpQuoteChar = tmpOptions.quoteChar || '"';
|
|
100
|
+
|
|
101
|
+
this._headers = null;
|
|
102
|
+
|
|
103
|
+
let tmpLineIndex = 0;
|
|
104
|
+
let tmpRecordCount = 0;
|
|
105
|
+
let tmpChunkBuffer = [];
|
|
106
|
+
|
|
107
|
+
const tmpReadline = libReadline.createInterface(
|
|
108
|
+
{
|
|
109
|
+
input: libFS.createReadStream(pFilePath),
|
|
110
|
+
crlfDelay: Infinity
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
tmpReadline.on('line',
|
|
114
|
+
(pLine) =>
|
|
115
|
+
{
|
|
116
|
+
// Skip comment lines
|
|
117
|
+
if (tmpCommentPrefix && pLine.startsWith(tmpCommentPrefix))
|
|
118
|
+
{
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Skip header/preamble rows
|
|
123
|
+
if (tmpLineIndex < tmpSkipRows)
|
|
124
|
+
{
|
|
125
|
+
tmpLineIndex++;
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
let tmpValues = this._parseCSVLine(pLine, tmpDelimiter, tmpQuoteChar, tmpTrim);
|
|
130
|
+
|
|
131
|
+
// First non-skipped, non-comment line becomes headers
|
|
132
|
+
if (tmpHasHeaders && !this._headers)
|
|
133
|
+
{
|
|
134
|
+
this._headers = tmpValues;
|
|
135
|
+
tmpLineIndex++;
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
let tmpRecord;
|
|
140
|
+
if (this._headers)
|
|
141
|
+
{
|
|
142
|
+
tmpRecord = {};
|
|
143
|
+
for (let i = 0; i < this._headers.length; i++)
|
|
144
|
+
{
|
|
145
|
+
tmpRecord[this._headers[i]] = (tmpValues && tmpValues[i] !== undefined) ? tmpValues[i] : '';
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
else
|
|
149
|
+
{
|
|
150
|
+
tmpRecord = tmpValues || [];
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
tmpChunkBuffer.push(tmpRecord);
|
|
154
|
+
tmpRecordCount++;
|
|
155
|
+
tmpLineIndex++;
|
|
156
|
+
|
|
157
|
+
if (tmpChunkBuffer.length >= tmpChunkSize)
|
|
158
|
+
{
|
|
159
|
+
pChunkCallback(null, tmpChunkBuffer.splice(0, tmpChunkBuffer.length));
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
tmpReadline.on('close',
|
|
164
|
+
() =>
|
|
165
|
+
{
|
|
166
|
+
if (tmpChunkBuffer.length > 0)
|
|
167
|
+
{
|
|
168
|
+
pChunkCallback(null, tmpChunkBuffer.splice(0, tmpChunkBuffer.length));
|
|
169
|
+
}
|
|
170
|
+
return pCompletionCallback(null, tmpRecordCount);
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
tmpReadline.on('error',
|
|
174
|
+
(pError) =>
|
|
175
|
+
{
|
|
176
|
+
return pCompletionCallback(pError);
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Parse CSV content string into a full array of records.
|
|
182
|
+
*
|
|
183
|
+
* @param {string} pContent - Raw CSV text
|
|
184
|
+
* @param {object} pOptions - Parser options
|
|
185
|
+
* @param {function} fCallback - Called with (pError, pRecords)
|
|
186
|
+
*/
|
|
187
|
+
parseContent(pContent, pOptions, fCallback)
|
|
188
|
+
{
|
|
189
|
+
let tmpOptions = Object.assign({}, this.options, pOptions);
|
|
190
|
+
let tmpHasHeaders = tmpOptions.hasHeaders !== false;
|
|
191
|
+
let tmpSkipRows = parseInt(tmpOptions.skipRows, 10) || 0;
|
|
192
|
+
let tmpCommentPrefix = tmpOptions.commentPrefix || '';
|
|
193
|
+
let tmpTrim = tmpOptions.trim !== false;
|
|
194
|
+
let tmpDelimiter = tmpOptions.delimiter || ',';
|
|
195
|
+
let tmpQuoteChar = tmpOptions.quoteChar || '"';
|
|
196
|
+
|
|
197
|
+
let tmpLines = pContent.split('\n');
|
|
198
|
+
let tmpHeaders = null;
|
|
199
|
+
let tmpRecords = [];
|
|
200
|
+
let tmpLineIndex = 0;
|
|
201
|
+
|
|
202
|
+
for (let i = 0; i < tmpLines.length; i++)
|
|
203
|
+
{
|
|
204
|
+
let tmpLine = tmpLines[i];
|
|
205
|
+
|
|
206
|
+
// Strip trailing \r for Windows line endings
|
|
207
|
+
if (tmpLine.length > 0 && tmpLine[tmpLine.length - 1] === '\r')
|
|
208
|
+
{
|
|
209
|
+
tmpLine = tmpLine.slice(0, -1);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Skip comment lines
|
|
213
|
+
if (tmpCommentPrefix && tmpLine.startsWith(tmpCommentPrefix))
|
|
214
|
+
{
|
|
215
|
+
continue;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Skip preamble rows
|
|
219
|
+
if (tmpLineIndex < tmpSkipRows)
|
|
220
|
+
{
|
|
221
|
+
tmpLineIndex++;
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Skip blank lines
|
|
226
|
+
if (!tmpLine || tmpLine.trim().length === 0)
|
|
227
|
+
{
|
|
228
|
+
tmpLineIndex++;
|
|
229
|
+
continue;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
let tmpValues = this._parseCSVLine(tmpLine, tmpDelimiter, tmpQuoteChar, tmpTrim);
|
|
233
|
+
|
|
234
|
+
if (tmpHasHeaders && !tmpHeaders)
|
|
235
|
+
{
|
|
236
|
+
tmpHeaders = tmpValues;
|
|
237
|
+
tmpLineIndex++;
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
let tmpRecord;
|
|
242
|
+
if (tmpHeaders)
|
|
243
|
+
{
|
|
244
|
+
tmpRecord = {};
|
|
245
|
+
for (let j = 0; j < tmpHeaders.length; j++)
|
|
246
|
+
{
|
|
247
|
+
tmpRecord[tmpHeaders[j]] = (tmpValues && tmpValues[j] !== undefined) ? tmpValues[j] : '';
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
else
|
|
251
|
+
{
|
|
252
|
+
tmpRecord = tmpValues || [];
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
tmpRecords.push(tmpRecord);
|
|
256
|
+
tmpLineIndex++;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
return fCallback(null, tmpRecords);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
module.exports = MeadowIntegrationFileParserCSV;
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const libFableServiceProviderBase = require('fable-serviceproviderbase');
|
|
4
|
+
const libFS = require('fs');
|
|
5
|
+
const libReadline = require('readline');
|
|
6
|
+
|
|
7
|
+
const defaultFixedWidthParserOptions = (
|
|
8
|
+
{
|
|
9
|
+
skipLines: 0,
|
|
10
|
+
columns: []
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
class MeadowIntegrationFileParserFixedWidth extends libFableServiceProviderBase
|
|
14
|
+
{
|
|
15
|
+
constructor(pFable, pOptions, pServiceHash)
|
|
16
|
+
{
|
|
17
|
+
let tmpOptions = Object.assign({}, defaultFixedWidthParserOptions, pOptions);
|
|
18
|
+
super(pFable, tmpOptions, pServiceHash);
|
|
19
|
+
|
|
20
|
+
this.serviceType = 'MeadowIntegrationFileParserFixedWidth';
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Extract fields from a fixed-width line using a columns definition.
|
|
25
|
+
* Column start positions are 1-based.
|
|
26
|
+
*
|
|
27
|
+
* @param {string} pLine - Raw text line
|
|
28
|
+
* @param {Array} pColumns - Array of {name, start, width}
|
|
29
|
+
* @returns {object} Extracted record
|
|
30
|
+
*/
|
|
31
|
+
_parseLine(pLine, pColumns)
|
|
32
|
+
{
|
|
33
|
+
let tmpRecord = {};
|
|
34
|
+
for (let i = 0; i < pColumns.length; i++)
|
|
35
|
+
{
|
|
36
|
+
let tmpCol = pColumns[i];
|
|
37
|
+
// start is 1-based
|
|
38
|
+
let tmpStartIdx = (parseInt(tmpCol.start, 10) || 1) - 1;
|
|
39
|
+
let tmpWidth = parseInt(tmpCol.width, 10) || 0;
|
|
40
|
+
let tmpValue = pLine.substring(tmpStartIdx, tmpStartIdx + tmpWidth).trim();
|
|
41
|
+
tmpRecord[tmpCol.name] = tmpValue;
|
|
42
|
+
}
|
|
43
|
+
return tmpRecord;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Parse a fixed-width file using streaming readline.
|
|
48
|
+
*
|
|
49
|
+
* @param {string} pFilePath - Absolute path to the fixed-width file
|
|
50
|
+
* @param {object} pOptions - Parser options: skipLines, columns, chunkSize
|
|
51
|
+
* @param {function} pChunkCallback - Called with (pError, pRecords) per chunk
|
|
52
|
+
* @param {function} pCompletionCallback - Called with (pError, pTotalCount) when done
|
|
53
|
+
*/
|
|
54
|
+
parseFile(pFilePath, pOptions, pChunkCallback, pCompletionCallback)
|
|
55
|
+
{
|
|
56
|
+
let tmpOptions = Object.assign({}, this.options, pOptions);
|
|
57
|
+
let tmpColumns = tmpOptions.columns || [];
|
|
58
|
+
let tmpSkipLines = parseInt(tmpOptions.skipLines, 10) || 0;
|
|
59
|
+
let tmpChunkSize = parseInt(tmpOptions.chunkSize, 10) || 100;
|
|
60
|
+
|
|
61
|
+
if (!tmpColumns || tmpColumns.length === 0)
|
|
62
|
+
{
|
|
63
|
+
return pCompletionCallback(new Error('FixedWidth parser requires options.columns array of {name, start, width}'));
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
let tmpLineIndex = 0;
|
|
67
|
+
let tmpRecordCount = 0;
|
|
68
|
+
let tmpChunkBuffer = [];
|
|
69
|
+
|
|
70
|
+
const tmpReadline = libReadline.createInterface(
|
|
71
|
+
{
|
|
72
|
+
input: libFS.createReadStream(pFilePath),
|
|
73
|
+
crlfDelay: Infinity
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
tmpReadline.on('line',
|
|
77
|
+
(pLine) =>
|
|
78
|
+
{
|
|
79
|
+
if (tmpLineIndex < tmpSkipLines)
|
|
80
|
+
{
|
|
81
|
+
tmpLineIndex++;
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Skip blank lines
|
|
86
|
+
if (!pLine || pLine.trim().length === 0)
|
|
87
|
+
{
|
|
88
|
+
tmpLineIndex++;
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
let tmpRecord = this._parseLine(pLine, tmpColumns);
|
|
93
|
+
tmpChunkBuffer.push(tmpRecord);
|
|
94
|
+
tmpRecordCount++;
|
|
95
|
+
tmpLineIndex++;
|
|
96
|
+
|
|
97
|
+
if (tmpChunkBuffer.length >= tmpChunkSize)
|
|
98
|
+
{
|
|
99
|
+
pChunkCallback(null, tmpChunkBuffer.splice(0, tmpChunkBuffer.length));
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
tmpReadline.on('close',
|
|
104
|
+
() =>
|
|
105
|
+
{
|
|
106
|
+
if (tmpChunkBuffer.length > 0)
|
|
107
|
+
{
|
|
108
|
+
pChunkCallback(null, tmpChunkBuffer.splice(0, tmpChunkBuffer.length));
|
|
109
|
+
}
|
|
110
|
+
return pCompletionCallback(null, tmpRecordCount);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
tmpReadline.on('error',
|
|
114
|
+
(pError) =>
|
|
115
|
+
{
|
|
116
|
+
return pCompletionCallback(pError);
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Parse fixed-width content string into a full array of records.
|
|
122
|
+
*
|
|
123
|
+
* @param {string} pContent - Raw fixed-width text
|
|
124
|
+
* @param {object} pOptions - Parser options
|
|
125
|
+
* @param {function} fCallback - Called with (pError, pRecords)
|
|
126
|
+
*/
|
|
127
|
+
parseContent(pContent, pOptions, fCallback)
|
|
128
|
+
{
|
|
129
|
+
let tmpOptions = Object.assign({}, this.options, pOptions);
|
|
130
|
+
let tmpColumns = tmpOptions.columns || [];
|
|
131
|
+
let tmpSkipLines = parseInt(tmpOptions.skipLines, 10) || 0;
|
|
132
|
+
|
|
133
|
+
if (!tmpColumns || tmpColumns.length === 0)
|
|
134
|
+
{
|
|
135
|
+
return fCallback(new Error('FixedWidth parser requires options.columns array of {name, start, width}'));
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
let tmpLines = pContent.split('\n');
|
|
139
|
+
let tmpRecords = [];
|
|
140
|
+
|
|
141
|
+
for (let i = tmpSkipLines; i < tmpLines.length; i++)
|
|
142
|
+
{
|
|
143
|
+
let tmpLine = tmpLines[i];
|
|
144
|
+
|
|
145
|
+
// Skip blank lines
|
|
146
|
+
if (!tmpLine || tmpLine.trim().length === 0)
|
|
147
|
+
{
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
tmpRecords.push(this._parseLine(tmpLine, tmpColumns));
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return fCallback(null, tmpRecords);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
module.exports = MeadowIntegrationFileParserFixedWidth;
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const libFableServiceProviderBase = require('fable-serviceproviderbase');
|
|
4
|
+
const libFS = require('fs');
|
|
5
|
+
|
|
6
|
+
const defaultJSONParserOptions = (
|
|
7
|
+
{
|
|
8
|
+
rootPath: '',
|
|
9
|
+
flattenNested: false,
|
|
10
|
+
flattenDelimiter: '_'
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
class MeadowIntegrationFileParserJSON extends libFableServiceProviderBase
|
|
14
|
+
{
|
|
15
|
+
constructor(pFable, pOptions, pServiceHash)
|
|
16
|
+
{
|
|
17
|
+
let tmpOptions = Object.assign({}, defaultJSONParserOptions, pOptions);
|
|
18
|
+
super(pFable, tmpOptions, pServiceHash);
|
|
19
|
+
|
|
20
|
+
this.serviceType = 'MeadowIntegrationFileParserJSON';
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Navigate a nested object using a dot-separated path with optional
|
|
25
|
+
* array index notation (e.g. "Results.series[0].data").
|
|
26
|
+
*
|
|
27
|
+
* @param {object} pObject - The object to navigate
|
|
28
|
+
* @param {string} pPath - Dot-separated path, segments may include [n]
|
|
29
|
+
* @returns {*} The resolved value, or null if the path is invalid
|
|
30
|
+
*/
|
|
31
|
+
_resolveDataPath(pObject, pPath)
|
|
32
|
+
{
|
|
33
|
+
if (!pPath || typeof pPath !== 'string')
|
|
34
|
+
{
|
|
35
|
+
return pObject;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
let tmpSegments = pPath.split('.');
|
|
39
|
+
let tmpCurrent = pObject;
|
|
40
|
+
|
|
41
|
+
for (let i = 0; i < tmpSegments.length; i++)
|
|
42
|
+
{
|
|
43
|
+
if (tmpCurrent === null || tmpCurrent === undefined || typeof tmpCurrent !== 'object')
|
|
44
|
+
{
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
let tmpSegment = tmpSegments[i];
|
|
49
|
+
// Check for array index notation: name[index]
|
|
50
|
+
let tmpMatch = tmpSegment.match(/^([^\[]+)\[(\d+)\]$/);
|
|
51
|
+
if (tmpMatch)
|
|
52
|
+
{
|
|
53
|
+
let tmpKey = tmpMatch[1];
|
|
54
|
+
let tmpIndex = parseInt(tmpMatch[2], 10);
|
|
55
|
+
if (!(tmpKey in tmpCurrent) || !Array.isArray(tmpCurrent[tmpKey]))
|
|
56
|
+
{
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
tmpCurrent = tmpCurrent[tmpKey][tmpIndex];
|
|
60
|
+
}
|
|
61
|
+
else
|
|
62
|
+
{
|
|
63
|
+
if (!(tmpSegment in tmpCurrent))
|
|
64
|
+
{
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
tmpCurrent = tmpCurrent[tmpSegment];
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return tmpCurrent;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Flatten a nested object into a single-level object using a delimiter.
|
|
76
|
+
*
|
|
77
|
+
* @param {object} pObject - Nested object
|
|
78
|
+
* @param {string} pDelimiter - Key delimiter (default '_')
|
|
79
|
+
* @param {string} pPrefix - Key prefix for recursion
|
|
80
|
+
* @returns {object} Flat object
|
|
81
|
+
*/
|
|
82
|
+
_flattenObject(pObject, pDelimiter, pPrefix)
|
|
83
|
+
{
|
|
84
|
+
let tmpDelimiter = pDelimiter || '_';
|
|
85
|
+
let tmpPrefix = pPrefix || '';
|
|
86
|
+
let tmpResult = {};
|
|
87
|
+
|
|
88
|
+
let tmpKeys = Object.keys(pObject);
|
|
89
|
+
for (let i = 0; i < tmpKeys.length; i++)
|
|
90
|
+
{
|
|
91
|
+
let tmpKey = tmpKeys[i];
|
|
92
|
+
let tmpFullKey = tmpPrefix ? `${tmpPrefix}${tmpDelimiter}${tmpKey}` : tmpKey;
|
|
93
|
+
let tmpValue = pObject[tmpKey];
|
|
94
|
+
|
|
95
|
+
if (tmpValue !== null && typeof tmpValue === 'object' && !Array.isArray(tmpValue))
|
|
96
|
+
{
|
|
97
|
+
let tmpNested = this._flattenObject(tmpValue, tmpDelimiter, tmpFullKey);
|
|
98
|
+
let tmpNestedKeys = Object.keys(tmpNested);
|
|
99
|
+
for (let j = 0; j < tmpNestedKeys.length; j++)
|
|
100
|
+
{
|
|
101
|
+
tmpResult[tmpNestedKeys[j]] = tmpNested[tmpNestedKeys[j]];
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
else
|
|
105
|
+
{
|
|
106
|
+
tmpResult[tmpFullKey] = tmpValue;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return tmpResult;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Resolve parsed JSON to a records array, applying rootPath navigation.
|
|
115
|
+
*
|
|
116
|
+
* @param {*} pParsed - Parsed JSON value
|
|
117
|
+
* @param {object} pOptions - Parser options
|
|
118
|
+
* @returns {Array|null} Array of records or null on failure
|
|
119
|
+
*/
|
|
120
|
+
_resolveRecords(pParsed, pOptions)
|
|
121
|
+
{
|
|
122
|
+
let tmpData = pParsed;
|
|
123
|
+
|
|
124
|
+
if (pOptions && pOptions.rootPath)
|
|
125
|
+
{
|
|
126
|
+
tmpData = this._resolveDataPath(pParsed, pOptions.rootPath);
|
|
127
|
+
if (tmpData === null || tmpData === undefined)
|
|
128
|
+
{
|
|
129
|
+
return null;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
let tmpRecords;
|
|
134
|
+
if (Array.isArray(tmpData))
|
|
135
|
+
{
|
|
136
|
+
tmpRecords = tmpData;
|
|
137
|
+
}
|
|
138
|
+
else if (typeof tmpData === 'object' && tmpData !== null)
|
|
139
|
+
{
|
|
140
|
+
// Common envelope keys
|
|
141
|
+
if (Array.isArray(tmpData.data))
|
|
142
|
+
{
|
|
143
|
+
tmpRecords = tmpData.data;
|
|
144
|
+
}
|
|
145
|
+
else if (Array.isArray(tmpData.records))
|
|
146
|
+
{
|
|
147
|
+
tmpRecords = tmpData.records;
|
|
148
|
+
}
|
|
149
|
+
else if (Array.isArray(tmpData.rows))
|
|
150
|
+
{
|
|
151
|
+
tmpRecords = tmpData.rows;
|
|
152
|
+
}
|
|
153
|
+
else
|
|
154
|
+
{
|
|
155
|
+
tmpRecords = [tmpData];
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
else
|
|
159
|
+
{
|
|
160
|
+
return null;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return tmpRecords;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Parse a JSON file into an array of records.
|
|
168
|
+
* Reads the entire file into memory.
|
|
169
|
+
*
|
|
170
|
+
* @param {string} pFilePath - Absolute path to the JSON file
|
|
171
|
+
* @param {object} pOptions - Parser options
|
|
172
|
+
* @param {function} pChunkCallback - Called with (pError, pRecords) once with all records
|
|
173
|
+
* @param {function} pCompletionCallback - Called with (pError, pTotalCount) when done
|
|
174
|
+
*/
|
|
175
|
+
parseFile(pFilePath, pOptions, pChunkCallback, pCompletionCallback)
|
|
176
|
+
{
|
|
177
|
+
let tmpOptions = Object.assign({}, this.options, pOptions);
|
|
178
|
+
|
|
179
|
+
let tmpContent;
|
|
180
|
+
try
|
|
181
|
+
{
|
|
182
|
+
tmpContent = libFS.readFileSync(pFilePath, 'utf8');
|
|
183
|
+
}
|
|
184
|
+
catch (pError)
|
|
185
|
+
{
|
|
186
|
+
return pCompletionCallback(new Error(`JSON file read error: ${pError.message}`));
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
this.parseContent(tmpContent, tmpOptions,
|
|
190
|
+
(pError, pRecords) =>
|
|
191
|
+
{
|
|
192
|
+
if (pError)
|
|
193
|
+
{
|
|
194
|
+
return pCompletionCallback(pError);
|
|
195
|
+
}
|
|
196
|
+
pChunkCallback(null, pRecords);
|
|
197
|
+
return pCompletionCallback(null, pRecords.length);
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Parse JSON content string into a full array of records.
|
|
203
|
+
*
|
|
204
|
+
* @param {string} pContent - Raw JSON text
|
|
205
|
+
* @param {object} pOptions - Parser options
|
|
206
|
+
* @param {function} fCallback - Called with (pError, pRecords)
|
|
207
|
+
*/
|
|
208
|
+
parseContent(pContent, pOptions, fCallback)
|
|
209
|
+
{
|
|
210
|
+
let tmpOptions = Object.assign({}, this.options, pOptions);
|
|
211
|
+
let tmpFlattenNested = tmpOptions.flattenNested || false;
|
|
212
|
+
let tmpFlattenDelimiter = tmpOptions.flattenDelimiter || '_';
|
|
213
|
+
|
|
214
|
+
let tmpParsed;
|
|
215
|
+
try
|
|
216
|
+
{
|
|
217
|
+
tmpParsed = JSON.parse(pContent);
|
|
218
|
+
}
|
|
219
|
+
catch (pError)
|
|
220
|
+
{
|
|
221
|
+
return fCallback(new Error(`JSON parse error: ${pError.message}`));
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
let tmpRecords = this._resolveRecords(tmpParsed, tmpOptions);
|
|
225
|
+
if (tmpRecords === null)
|
|
226
|
+
{
|
|
227
|
+
if (tmpOptions.rootPath)
|
|
228
|
+
{
|
|
229
|
+
return fCallback(new Error(`rootPath '${tmpOptions.rootPath}' not found in JSON content`));
|
|
230
|
+
}
|
|
231
|
+
return fCallback(new Error(`Could not resolve records from JSON content`));
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if (tmpFlattenNested)
|
|
235
|
+
{
|
|
236
|
+
let tmpFlattened = [];
|
|
237
|
+
for (let i = 0; i < tmpRecords.length; i++)
|
|
238
|
+
{
|
|
239
|
+
if (tmpRecords[i] !== null && typeof tmpRecords[i] === 'object')
|
|
240
|
+
{
|
|
241
|
+
tmpFlattened.push(this._flattenObject(tmpRecords[i], tmpFlattenDelimiter));
|
|
242
|
+
}
|
|
243
|
+
else
|
|
244
|
+
{
|
|
245
|
+
tmpFlattened.push(tmpRecords[i]);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
return fCallback(null, tmpFlattened);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
return fCallback(null, tmpRecords);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
module.exports = MeadowIntegrationFileParserJSON;
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const libFableServiceProviderBase = require('fable-serviceproviderbase');
|
|
4
|
+
const libFS = require('fs');
|
|
5
|
+
|
|
6
|
+
const defaultXLSXParserOptions = (
|
|
7
|
+
{
|
|
8
|
+
sheetName: '',
|
|
9
|
+
sheetIndex: 0,
|
|
10
|
+
headerRow: 1,
|
|
11
|
+
dataStartRow: 2,
|
|
12
|
+
maxFileSizeMB: 50
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
class MeadowIntegrationFileParserXLSX extends libFableServiceProviderBase
|
|
16
|
+
{
|
|
17
|
+
constructor(pFable, pOptions, pServiceHash)
|
|
18
|
+
{
|
|
19
|
+
let tmpOptions = Object.assign({}, defaultXLSXParserOptions, pOptions);
|
|
20
|
+
super(pFable, tmpOptions, pServiceHash);
|
|
21
|
+
|
|
22
|
+
this.serviceType = 'MeadowIntegrationFileParserXLSX';
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Parse an XLSX file into an array of records.
|
|
27
|
+
* Entire file is read into memory. Enforces maxFileSizeMB guard.
|
|
28
|
+
*
|
|
29
|
+
* @param {string} pFilePath - Absolute path to the XLSX file
|
|
30
|
+
* @param {object} pOptions - Parser options
|
|
31
|
+
* @param {function} pChunkCallback - Called with (pError, pRecords) once with all records
|
|
32
|
+
* @param {function} pCompletionCallback - Called with (pError, pTotalCount) when done
|
|
33
|
+
*/
|
|
34
|
+
parseFile(pFilePath, pOptions, pChunkCallback, pCompletionCallback)
|
|
35
|
+
{
|
|
36
|
+
let tmpOptions = Object.assign({}, this.options, pOptions);
|
|
37
|
+
let tmpMaxFileSizeMB = parseFloat(tmpOptions.maxFileSizeMB) || 50;
|
|
38
|
+
let tmpMaxBytes = tmpMaxFileSizeMB * 1024 * 1024;
|
|
39
|
+
|
|
40
|
+
let tmpStat;
|
|
41
|
+
try
|
|
42
|
+
{
|
|
43
|
+
tmpStat = libFS.statSync(pFilePath);
|
|
44
|
+
}
|
|
45
|
+
catch (pError)
|
|
46
|
+
{
|
|
47
|
+
return pCompletionCallback(new Error(`XLSX file stat error: ${pError.message}`));
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (tmpStat.size > tmpMaxBytes)
|
|
51
|
+
{
|
|
52
|
+
return pCompletionCallback(new Error(`XLSX file size ${(tmpStat.size / 1024 / 1024).toFixed(1)}MB exceeds maxFileSizeMB limit of ${tmpMaxFileSizeMB}MB`));
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
let tmpBuffer;
|
|
56
|
+
try
|
|
57
|
+
{
|
|
58
|
+
tmpBuffer = libFS.readFileSync(pFilePath);
|
|
59
|
+
}
|
|
60
|
+
catch (pError)
|
|
61
|
+
{
|
|
62
|
+
return pCompletionCallback(new Error(`XLSX file read error: ${pError.message}`));
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
this._parseBuffer(tmpBuffer, tmpOptions,
|
|
66
|
+
(pError, pRecords) =>
|
|
67
|
+
{
|
|
68
|
+
if (pError)
|
|
69
|
+
{
|
|
70
|
+
return pCompletionCallback(pError);
|
|
71
|
+
}
|
|
72
|
+
pChunkCallback(null, pRecords);
|
|
73
|
+
return pCompletionCallback(null, pRecords.length);
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Parse XLSX content (Buffer) into a full array of records.
|
|
79
|
+
* Content must be a Buffer containing xlsx file bytes.
|
|
80
|
+
*
|
|
81
|
+
* @param {Buffer|string} pContent - XLSX file as Buffer (or base64 string)
|
|
82
|
+
* @param {object} pOptions - Parser options
|
|
83
|
+
* @param {function} fCallback - Called with (pError, pRecords)
|
|
84
|
+
*/
|
|
85
|
+
parseContent(pContent, pOptions, fCallback)
|
|
86
|
+
{
|
|
87
|
+
let tmpOptions = Object.assign({}, this.options, pOptions);
|
|
88
|
+
let tmpBuffer = Buffer.isBuffer(pContent) ? pContent : Buffer.from(pContent, 'base64');
|
|
89
|
+
return this._parseBuffer(tmpBuffer, tmpOptions, fCallback);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Internal: parse an xlsx Buffer into records using the xlsx library.
|
|
94
|
+
*
|
|
95
|
+
* @param {Buffer} pBuffer - XLSX bytes
|
|
96
|
+
* @param {object} pOptions - Merged options
|
|
97
|
+
* @param {function} fCallback - Called with (pError, pRecords)
|
|
98
|
+
*/
|
|
99
|
+
_parseBuffer(pBuffer, pOptions, fCallback)
|
|
100
|
+
{
|
|
101
|
+
let tmpXLSX;
|
|
102
|
+
try
|
|
103
|
+
{
|
|
104
|
+
tmpXLSX = require('xlsx');
|
|
105
|
+
}
|
|
106
|
+
catch (pError)
|
|
107
|
+
{
|
|
108
|
+
return fCallback(new Error(`xlsx library not available: ${pError.message}`));
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
let tmpWorkbook;
|
|
112
|
+
try
|
|
113
|
+
{
|
|
114
|
+
tmpWorkbook = tmpXLSX.read(pBuffer, { type: 'buffer' });
|
|
115
|
+
}
|
|
116
|
+
catch (pError)
|
|
117
|
+
{
|
|
118
|
+
return fCallback(new Error(`XLSX parse error: ${pError.message}`));
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Determine sheet to use
|
|
122
|
+
let tmpSheetName;
|
|
123
|
+
if (pOptions.sheetName && typeof pOptions.sheetName === 'string' && pOptions.sheetName.length > 0)
|
|
124
|
+
{
|
|
125
|
+
tmpSheetName = pOptions.sheetName;
|
|
126
|
+
}
|
|
127
|
+
else
|
|
128
|
+
{
|
|
129
|
+
let tmpSheetIndex = parseInt(pOptions.sheetIndex, 10) || 0;
|
|
130
|
+
tmpSheetName = tmpWorkbook.SheetNames[tmpSheetIndex];
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (!tmpSheetName || !tmpWorkbook.Sheets[tmpSheetName])
|
|
134
|
+
{
|
|
135
|
+
return fCallback(new Error(`XLSX sheet '${tmpSheetName}' not found in workbook`));
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
let tmpSheet = tmpWorkbook.Sheets[tmpSheetName];
|
|
139
|
+
let tmpHeaderRow = parseInt(pOptions.headerRow, 10) || 1;
|
|
140
|
+
let tmpDataStartRow = parseInt(pOptions.dataStartRow, 10) || 2;
|
|
141
|
+
|
|
142
|
+
// When headerRow and dataStartRow are at their defaults (1 and 2),
|
|
143
|
+
// use xlsx's built-in sheet_to_json which handles this automatically
|
|
144
|
+
if (tmpHeaderRow === 1 && tmpDataStartRow === 2)
|
|
145
|
+
{
|
|
146
|
+
try
|
|
147
|
+
{
|
|
148
|
+
let tmpRecords = tmpXLSX.utils.sheet_to_json(tmpSheet);
|
|
149
|
+
return fCallback(null, tmpRecords);
|
|
150
|
+
}
|
|
151
|
+
catch (pError)
|
|
152
|
+
{
|
|
153
|
+
return fCallback(new Error(`XLSX sheet_to_json error: ${pError.message}`));
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Custom header/data row offsets: read as raw array first
|
|
158
|
+
try
|
|
159
|
+
{
|
|
160
|
+
let tmpRawRows = tmpXLSX.utils.sheet_to_json(tmpSheet, { header: 1 });
|
|
161
|
+
|
|
162
|
+
// Header row is 1-based; convert to 0-based index
|
|
163
|
+
let tmpHeaderIdx = tmpHeaderRow - 1;
|
|
164
|
+
let tmpDataIdx = tmpDataStartRow - 1;
|
|
165
|
+
|
|
166
|
+
if (tmpHeaderIdx >= tmpRawRows.length)
|
|
167
|
+
{
|
|
168
|
+
return fCallback(new Error(`XLSX headerRow ${tmpHeaderRow} is beyond sheet row count`));
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
let tmpHeaders = tmpRawRows[tmpHeaderIdx];
|
|
172
|
+
let tmpRecords = [];
|
|
173
|
+
|
|
174
|
+
for (let i = tmpDataIdx; i < tmpRawRows.length; i++)
|
|
175
|
+
{
|
|
176
|
+
let tmpRow = tmpRawRows[i];
|
|
177
|
+
let tmpRecord = {};
|
|
178
|
+
for (let j = 0; j < tmpHeaders.length; j++)
|
|
179
|
+
{
|
|
180
|
+
tmpRecord[tmpHeaders[j]] = (tmpRow && tmpRow[j] !== undefined) ? tmpRow[j] : '';
|
|
181
|
+
}
|
|
182
|
+
tmpRecords.push(tmpRecord);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return fCallback(null, tmpRecords);
|
|
186
|
+
}
|
|
187
|
+
catch (pError)
|
|
188
|
+
{
|
|
189
|
+
return fCallback(new Error(`XLSX custom row parse error: ${pError.message}`));
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
module.exports = MeadowIntegrationFileParserXLSX;
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const libFableServiceProviderBase = require('fable-serviceproviderbase');
|
|
4
|
+
const libFS = require('fs');
|
|
5
|
+
|
|
6
|
+
const defaultXMLParserOptions = (
|
|
7
|
+
{
|
|
8
|
+
recordPath: '',
|
|
9
|
+
attributePrefix: '@_',
|
|
10
|
+
ignoreAttributes: false
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
class MeadowIntegrationFileParserXML extends libFableServiceProviderBase
|
|
14
|
+
{
|
|
15
|
+
constructor(pFable, pOptions, pServiceHash)
|
|
16
|
+
{
|
|
17
|
+
let tmpOptions = Object.assign({}, defaultXMLParserOptions, pOptions);
|
|
18
|
+
super(pFable, tmpOptions, pServiceHash);
|
|
19
|
+
|
|
20
|
+
this.serviceType = 'MeadowIntegrationFileParserXML';
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Walk an XML-parsed object looking for the first array of object records.
|
|
25
|
+
* Recurses one level at a time: checks direct children first, then recurses.
|
|
26
|
+
*
|
|
27
|
+
* @param {object} pObject - Parsed XML object node
|
|
28
|
+
* @returns {Array|null} First array of objects found, or null
|
|
29
|
+
*/
|
|
30
|
+
_extractXMLRecords(pObject)
|
|
31
|
+
{
|
|
32
|
+
if (!pObject || typeof pObject !== 'object')
|
|
33
|
+
{
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
let tmpKeys = Object.keys(pObject);
|
|
38
|
+
|
|
39
|
+
// First pass: look for array-valued keys whose elements are objects
|
|
40
|
+
for (let i = 0; i < tmpKeys.length; i++)
|
|
41
|
+
{
|
|
42
|
+
let tmpValue = pObject[tmpKeys[i]];
|
|
43
|
+
if (Array.isArray(tmpValue) && tmpValue.length > 0 && typeof tmpValue[0] === 'object')
|
|
44
|
+
{
|
|
45
|
+
return tmpValue;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Second pass: recurse into object-valued keys
|
|
50
|
+
for (let i = 0; i < tmpKeys.length; i++)
|
|
51
|
+
{
|
|
52
|
+
let tmpValue = pObject[tmpKeys[i]];
|
|
53
|
+
if (typeof tmpValue === 'object' && !Array.isArray(tmpValue))
|
|
54
|
+
{
|
|
55
|
+
let tmpResult = this._extractXMLRecords(tmpValue);
|
|
56
|
+
if (tmpResult)
|
|
57
|
+
{
|
|
58
|
+
return tmpResult;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Navigate a parsed XML object using a dot-separated recordPath.
|
|
68
|
+
*
|
|
69
|
+
* @param {object} pParsed - Parsed XML object
|
|
70
|
+
* @param {string} pRecordPath - Dot-separated path to the records array
|
|
71
|
+
* @returns {Array|null} Records array or null
|
|
72
|
+
*/
|
|
73
|
+
_resolveRecordPath(pParsed, pRecordPath)
|
|
74
|
+
{
|
|
75
|
+
let tmpParts = pRecordPath.split('.');
|
|
76
|
+
let tmpCurrent = pParsed;
|
|
77
|
+
|
|
78
|
+
for (let i = 0; i < tmpParts.length; i++)
|
|
79
|
+
{
|
|
80
|
+
if (!tmpCurrent || typeof tmpCurrent !== 'object' || !(tmpParts[i] in tmpCurrent))
|
|
81
|
+
{
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
tmpCurrent = tmpCurrent[tmpParts[i]];
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return Array.isArray(tmpCurrent) ? tmpCurrent : [tmpCurrent];
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Parse an XML file into an array of records.
|
|
92
|
+
* Reads the entire file into memory.
|
|
93
|
+
*
|
|
94
|
+
* @param {string} pFilePath - Absolute path to the XML file
|
|
95
|
+
* @param {object} pOptions - Parser options
|
|
96
|
+
* @param {function} pChunkCallback - Called with (pError, pRecords) once with all records
|
|
97
|
+
* @param {function} pCompletionCallback - Called with (pError, pTotalCount) when done
|
|
98
|
+
*/
|
|
99
|
+
parseFile(pFilePath, pOptions, pChunkCallback, pCompletionCallback)
|
|
100
|
+
{
|
|
101
|
+
let tmpOptions = Object.assign({}, this.options, pOptions);
|
|
102
|
+
|
|
103
|
+
let tmpContent;
|
|
104
|
+
try
|
|
105
|
+
{
|
|
106
|
+
tmpContent = libFS.readFileSync(pFilePath, 'utf8');
|
|
107
|
+
}
|
|
108
|
+
catch (pError)
|
|
109
|
+
{
|
|
110
|
+
return pCompletionCallback(new Error(`XML file read error: ${pError.message}`));
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
this.parseContent(tmpContent, tmpOptions,
|
|
114
|
+
(pError, pRecords) =>
|
|
115
|
+
{
|
|
116
|
+
if (pError)
|
|
117
|
+
{
|
|
118
|
+
return pCompletionCallback(pError);
|
|
119
|
+
}
|
|
120
|
+
pChunkCallback(null, pRecords);
|
|
121
|
+
return pCompletionCallback(null, pRecords.length);
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Parse XML content string into a full array of records.
|
|
127
|
+
*
|
|
128
|
+
* @param {string} pContent - Raw XML text
|
|
129
|
+
* @param {object} pOptions - Parser options
|
|
130
|
+
* @param {function} fCallback - Called with (pError, pRecords)
|
|
131
|
+
*/
|
|
132
|
+
parseContent(pContent, pOptions, fCallback)
|
|
133
|
+
{
|
|
134
|
+
let tmpOptions = Object.assign({}, this.options, pOptions);
|
|
135
|
+
let tmpRecordPath = tmpOptions.recordPath || '';
|
|
136
|
+
let tmpAttributePrefix = tmpOptions.attributePrefix || '@_';
|
|
137
|
+
let tmpIgnoreAttributes = tmpOptions.ignoreAttributes === true;
|
|
138
|
+
|
|
139
|
+
let tmpXMLParser;
|
|
140
|
+
try
|
|
141
|
+
{
|
|
142
|
+
let libFastXMLParser = require('fast-xml-parser');
|
|
143
|
+
tmpXMLParser = new libFastXMLParser.XMLParser(
|
|
144
|
+
{
|
|
145
|
+
ignoreAttributes: tmpIgnoreAttributes,
|
|
146
|
+
attributeNamePrefix: tmpAttributePrefix
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
catch (pError)
|
|
150
|
+
{
|
|
151
|
+
return fCallback(new Error(`fast-xml-parser library not available: ${pError.message}`));
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
let tmpParsed;
|
|
155
|
+
try
|
|
156
|
+
{
|
|
157
|
+
tmpParsed = tmpXMLParser.parse(pContent);
|
|
158
|
+
}
|
|
159
|
+
catch (pError)
|
|
160
|
+
{
|
|
161
|
+
return fCallback(new Error(`XML parse error: ${pError.message}`));
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
let tmpRecords;
|
|
165
|
+
|
|
166
|
+
if (tmpRecordPath)
|
|
167
|
+
{
|
|
168
|
+
tmpRecords = this._resolveRecordPath(tmpParsed, tmpRecordPath);
|
|
169
|
+
if (!tmpRecords)
|
|
170
|
+
{
|
|
171
|
+
return fCallback(new Error(`recordPath '${tmpRecordPath}' not found in XML`));
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
else
|
|
175
|
+
{
|
|
176
|
+
// Smart extraction: walk tree looking for first array of objects
|
|
177
|
+
tmpRecords = this._extractXMLRecords(tmpParsed);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (!tmpRecords)
|
|
181
|
+
{
|
|
182
|
+
// Wrap the entire parsed result as a single record
|
|
183
|
+
tmpRecords = [tmpParsed];
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return fCallback(null, tmpRecords);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
module.exports = MeadowIntegrationFileParserXML;
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const libFableServiceProviderBase = require('fable-serviceproviderbase');
|
|
4
|
+
const libPath = require('path');
|
|
5
|
+
|
|
6
|
+
const defaultFileParserOptions = (
|
|
7
|
+
{
|
|
8
|
+
format: ''
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
// Extension to format mapping
|
|
12
|
+
const EXTENSION_FORMAT_MAP = (
|
|
13
|
+
{
|
|
14
|
+
'.csv': 'csv',
|
|
15
|
+
'.tsv': 'csv',
|
|
16
|
+
'.txt': 'csv',
|
|
17
|
+
'.json': 'json',
|
|
18
|
+
'.jsonl': 'json',
|
|
19
|
+
'.xlsx': 'xlsx',
|
|
20
|
+
'.xlsm': 'xlsx',
|
|
21
|
+
'.xls': 'xlsx',
|
|
22
|
+
'.xml': 'xml',
|
|
23
|
+
'.fw': 'fixedwidth',
|
|
24
|
+
'.dat': 'fixedwidth'
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
class MeadowIntegrationFileParser extends libFableServiceProviderBase
|
|
28
|
+
{
|
|
29
|
+
constructor(pFable, pOptions, pServiceHash)
|
|
30
|
+
{
|
|
31
|
+
let tmpOptions = Object.assign({}, defaultFileParserOptions, pOptions);
|
|
32
|
+
super(pFable, tmpOptions, pServiceHash);
|
|
33
|
+
|
|
34
|
+
this.serviceType = 'MeadowIntegrationFileParser';
|
|
35
|
+
|
|
36
|
+
// Register sub-parser service types
|
|
37
|
+
this.fable.addAndInstantiateServiceTypeIfNotExists('MeadowIntegrationFileParserCSV', require('./Service-FileParser-CSV.js'));
|
|
38
|
+
this.fable.addAndInstantiateServiceTypeIfNotExists('MeadowIntegrationFileParserJSON', require('./Service-FileParser-JSON.js'));
|
|
39
|
+
this.fable.addAndInstantiateServiceTypeIfNotExists('MeadowIntegrationFileParserXLSX', require('./Service-FileParser-XLSX.js'));
|
|
40
|
+
this.fable.addAndInstantiateServiceTypeIfNotExists('MeadowIntegrationFileParserXML', require('./Service-FileParser-XML.js'));
|
|
41
|
+
this.fable.addAndInstantiateServiceTypeIfNotExists('MeadowIntegrationFileParserFixedWidth', require('./Service-FileParser-FixedWidth.js'));
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Detect the format of a file from its extension, then from a content prefix.
|
|
46
|
+
*
|
|
47
|
+
* @param {string} pFilePath - File path (used for extension detection)
|
|
48
|
+
* @param {string} [pContentPrefix] - First bytes of content for content-based detection
|
|
49
|
+
* @returns {string} Format string: 'csv', 'json', 'xlsx', 'xml', 'fixedwidth'
|
|
50
|
+
*/
|
|
51
|
+
detectFormat(pFilePath, pContentPrefix)
|
|
52
|
+
{
|
|
53
|
+
// Extension-based detection
|
|
54
|
+
if (pFilePath && typeof pFilePath === 'string')
|
|
55
|
+
{
|
|
56
|
+
let tmpExt = libPath.extname(pFilePath).toLowerCase();
|
|
57
|
+
if (tmpExt && EXTENSION_FORMAT_MAP[tmpExt])
|
|
58
|
+
{
|
|
59
|
+
return EXTENSION_FORMAT_MAP[tmpExt];
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Content-based detection
|
|
64
|
+
if (pContentPrefix && typeof pContentPrefix === 'string')
|
|
65
|
+
{
|
|
66
|
+
let tmpTrimmed = pContentPrefix.trim();
|
|
67
|
+
if (tmpTrimmed.startsWith('[') || tmpTrimmed.startsWith('{'))
|
|
68
|
+
{
|
|
69
|
+
return 'json';
|
|
70
|
+
}
|
|
71
|
+
if (tmpTrimmed.startsWith('<?xml') || tmpTrimmed.startsWith('<'))
|
|
72
|
+
{
|
|
73
|
+
return 'xml';
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return 'csv';
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Get the appropriate sub-parser service for a given format.
|
|
82
|
+
*
|
|
83
|
+
* @param {string} pFormat - Format string
|
|
84
|
+
* @returns {object} Sub-parser service instance
|
|
85
|
+
*/
|
|
86
|
+
_getParser(pFormat)
|
|
87
|
+
{
|
|
88
|
+
switch (pFormat)
|
|
89
|
+
{
|
|
90
|
+
case 'json':
|
|
91
|
+
return this.fable.MeadowIntegrationFileParserJSON;
|
|
92
|
+
case 'xlsx':
|
|
93
|
+
return this.fable.MeadowIntegrationFileParserXLSX;
|
|
94
|
+
case 'xml':
|
|
95
|
+
return this.fable.MeadowIntegrationFileParserXML;
|
|
96
|
+
case 'fixedwidth':
|
|
97
|
+
return this.fable.MeadowIntegrationFileParserFixedWidth;
|
|
98
|
+
case 'csv':
|
|
99
|
+
default:
|
|
100
|
+
return this.fable.MeadowIntegrationFileParserCSV;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Parse a file using streaming, dispatching to the appropriate sub-parser.
|
|
106
|
+
* Format is determined from options.format, then from file extension, then content.
|
|
107
|
+
*
|
|
108
|
+
* @param {string} pFilePath - Absolute path to the file
|
|
109
|
+
* @param {object} pOptions - Parser options; pOptions.format overrides detection
|
|
110
|
+
* @param {function} pChunkCallback - Called with (pError, pRecords) as records are ready
|
|
111
|
+
* @param {function} pCompletionCallback - Called with (pError, pTotalCount) when done
|
|
112
|
+
*/
|
|
113
|
+
parseFile(pFilePath, pOptions, pChunkCallback, pCompletionCallback)
|
|
114
|
+
{
|
|
115
|
+
let tmpOptions = Object.assign({}, this.options, pOptions);
|
|
116
|
+
let tmpFormat = tmpOptions.format ? tmpOptions.format.toLowerCase() : this.detectFormat(pFilePath);
|
|
117
|
+
let tmpParser = this._getParser(tmpFormat);
|
|
118
|
+
|
|
119
|
+
this.fable.log.info(`FileParser: parsing [${pFilePath}] as format [${tmpFormat}]`);
|
|
120
|
+
return tmpParser.parseFile(pFilePath, tmpOptions, pChunkCallback, pCompletionCallback);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Parse content using a full-array (non-streaming) interface.
|
|
125
|
+
* Format is determined from options.format, then from content prefix detection.
|
|
126
|
+
*
|
|
127
|
+
* @param {string|Buffer} pContent - Raw file content
|
|
128
|
+
* @param {object} pOptions - Parser options; pOptions.format overrides detection
|
|
129
|
+
* @param {function} fCallback - Called with (pError, pRecords)
|
|
130
|
+
*/
|
|
131
|
+
parseContent(pContent, pOptions, fCallback)
|
|
132
|
+
{
|
|
133
|
+
let tmpOptions = Object.assign({}, this.options, pOptions);
|
|
134
|
+
let tmpContentPrefix = Buffer.isBuffer(pContent) ? '' : (pContent || '').substring(0, 100);
|
|
135
|
+
let tmpFormat = tmpOptions.format ? tmpOptions.format.toLowerCase() : this.detectFormat('', tmpContentPrefix);
|
|
136
|
+
let tmpParser = this._getParser(tmpFormat);
|
|
137
|
+
|
|
138
|
+
return tmpParser.parseContent(pContent, tmpOptions, fCallback);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
module.exports = MeadowIntegrationFileParser;
|