meadow-integration 1.0.18 → 1.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,190 @@
1
+ 'use strict';
2
+
3
+ const libFableServiceProviderBase = require('fable-serviceproviderbase');
4
+ const libFS = require('fs');
5
+
6
+ const defaultXMLParserOptions = (
7
+ {
8
+ recordPath: '',
9
+ attributePrefix: '@_',
10
+ ignoreAttributes: false
11
+ });
12
+
13
+ class MeadowIntegrationFileParserXML extends libFableServiceProviderBase
14
+ {
15
+ constructor(pFable, pOptions, pServiceHash)
16
+ {
17
+ let tmpOptions = Object.assign({}, defaultXMLParserOptions, pOptions);
18
+ super(pFable, tmpOptions, pServiceHash);
19
+
20
+ this.serviceType = 'MeadowIntegrationFileParserXML';
21
+ }
22
+
23
+ /**
24
+ * Walk an XML-parsed object looking for the first array of object records.
25
+ * Recurses one level at a time: checks direct children first, then recurses.
26
+ *
27
+ * @param {object} pObject - Parsed XML object node
28
+ * @returns {Array|null} First array of objects found, or null
29
+ */
30
+ _extractXMLRecords(pObject)
31
+ {
32
+ if (!pObject || typeof pObject !== 'object')
33
+ {
34
+ return null;
35
+ }
36
+
37
+ let tmpKeys = Object.keys(pObject);
38
+
39
+ // First pass: look for array-valued keys whose elements are objects
40
+ for (let i = 0; i < tmpKeys.length; i++)
41
+ {
42
+ let tmpValue = pObject[tmpKeys[i]];
43
+ if (Array.isArray(tmpValue) && tmpValue.length > 0 && typeof tmpValue[0] === 'object')
44
+ {
45
+ return tmpValue;
46
+ }
47
+ }
48
+
49
+ // Second pass: recurse into object-valued keys
50
+ for (let i = 0; i < tmpKeys.length; i++)
51
+ {
52
+ let tmpValue = pObject[tmpKeys[i]];
53
+ if (typeof tmpValue === 'object' && !Array.isArray(tmpValue))
54
+ {
55
+ let tmpResult = this._extractXMLRecords(tmpValue);
56
+ if (tmpResult)
57
+ {
58
+ return tmpResult;
59
+ }
60
+ }
61
+ }
62
+
63
+ return null;
64
+ }
65
+
66
+ /**
67
+ * Navigate a parsed XML object using a dot-separated recordPath.
68
+ *
69
+ * @param {object} pParsed - Parsed XML object
70
+ * @param {string} pRecordPath - Dot-separated path to the records array
71
+ * @returns {Array|null} Records array or null
72
+ */
73
+ _resolveRecordPath(pParsed, pRecordPath)
74
+ {
75
+ let tmpParts = pRecordPath.split('.');
76
+ let tmpCurrent = pParsed;
77
+
78
+ for (let i = 0; i < tmpParts.length; i++)
79
+ {
80
+ if (!tmpCurrent || typeof tmpCurrent !== 'object' || !(tmpParts[i] in tmpCurrent))
81
+ {
82
+ return null;
83
+ }
84
+ tmpCurrent = tmpCurrent[tmpParts[i]];
85
+ }
86
+
87
+ return Array.isArray(tmpCurrent) ? tmpCurrent : [tmpCurrent];
88
+ }
89
+
90
+ /**
91
+ * Parse an XML file into an array of records.
92
+ * Reads the entire file into memory.
93
+ *
94
+ * @param {string} pFilePath - Absolute path to the XML file
95
+ * @param {object} pOptions - Parser options
96
+ * @param {function} pChunkCallback - Called with (pError, pRecords) once with all records
97
+ * @param {function} pCompletionCallback - Called with (pError, pTotalCount) when done
98
+ */
99
+ parseFile(pFilePath, pOptions, pChunkCallback, pCompletionCallback)
100
+ {
101
+ let tmpOptions = Object.assign({}, this.options, pOptions);
102
+
103
+ let tmpContent;
104
+ try
105
+ {
106
+ tmpContent = libFS.readFileSync(pFilePath, 'utf8');
107
+ }
108
+ catch (pError)
109
+ {
110
+ return pCompletionCallback(new Error(`XML file read error: ${pError.message}`));
111
+ }
112
+
113
+ this.parseContent(tmpContent, tmpOptions,
114
+ (pError, pRecords) =>
115
+ {
116
+ if (pError)
117
+ {
118
+ return pCompletionCallback(pError);
119
+ }
120
+ pChunkCallback(null, pRecords);
121
+ return pCompletionCallback(null, pRecords.length);
122
+ });
123
+ }
124
+
125
+ /**
126
+ * Parse XML content string into a full array of records.
127
+ *
128
+ * @param {string} pContent - Raw XML text
129
+ * @param {object} pOptions - Parser options
130
+ * @param {function} fCallback - Called with (pError, pRecords)
131
+ */
132
+ parseContent(pContent, pOptions, fCallback)
133
+ {
134
+ let tmpOptions = Object.assign({}, this.options, pOptions);
135
+ let tmpRecordPath = tmpOptions.recordPath || '';
136
+ let tmpAttributePrefix = tmpOptions.attributePrefix || '@_';
137
+ let tmpIgnoreAttributes = tmpOptions.ignoreAttributes === true;
138
+
139
+ let tmpXMLParser;
140
+ try
141
+ {
142
+ let libFastXMLParser = require('fast-xml-parser');
143
+ tmpXMLParser = new libFastXMLParser.XMLParser(
144
+ {
145
+ ignoreAttributes: tmpIgnoreAttributes,
146
+ attributeNamePrefix: tmpAttributePrefix
147
+ });
148
+ }
149
+ catch (pError)
150
+ {
151
+ return fCallback(new Error(`fast-xml-parser library not available: ${pError.message}`));
152
+ }
153
+
154
+ let tmpParsed;
155
+ try
156
+ {
157
+ tmpParsed = tmpXMLParser.parse(pContent);
158
+ }
159
+ catch (pError)
160
+ {
161
+ return fCallback(new Error(`XML parse error: ${pError.message}`));
162
+ }
163
+
164
+ let tmpRecords;
165
+
166
+ if (tmpRecordPath)
167
+ {
168
+ tmpRecords = this._resolveRecordPath(tmpParsed, tmpRecordPath);
169
+ if (!tmpRecords)
170
+ {
171
+ return fCallback(new Error(`recordPath '${tmpRecordPath}' not found in XML`));
172
+ }
173
+ }
174
+ else
175
+ {
176
+ // Smart extraction: walk tree looking for first array of objects
177
+ tmpRecords = this._extractXMLRecords(tmpParsed);
178
+ }
179
+
180
+ if (!tmpRecords)
181
+ {
182
+ // Wrap the entire parsed result as a single record
183
+ tmpRecords = [tmpParsed];
184
+ }
185
+
186
+ return fCallback(null, tmpRecords);
187
+ }
188
+ }
189
+
190
+ module.exports = MeadowIntegrationFileParserXML;
@@ -0,0 +1,142 @@
1
+ 'use strict';
2
+
3
+ const libFableServiceProviderBase = require('fable-serviceproviderbase');
4
+ const libPath = require('path');
5
+
6
+ const defaultFileParserOptions = (
7
+ {
8
+ format: ''
9
+ });
10
+
11
+ // Extension to format mapping
12
+ const EXTENSION_FORMAT_MAP = (
13
+ {
14
+ '.csv': 'csv',
15
+ '.tsv': 'csv',
16
+ '.txt': 'csv',
17
+ '.json': 'json',
18
+ '.jsonl': 'json',
19
+ '.xlsx': 'xlsx',
20
+ '.xlsm': 'xlsx',
21
+ '.xls': 'xlsx',
22
+ '.xml': 'xml',
23
+ '.fw': 'fixedwidth',
24
+ '.dat': 'fixedwidth'
25
+ });
26
+
27
+ class MeadowIntegrationFileParser extends libFableServiceProviderBase
28
+ {
29
+ constructor(pFable, pOptions, pServiceHash)
30
+ {
31
+ let tmpOptions = Object.assign({}, defaultFileParserOptions, pOptions);
32
+ super(pFable, tmpOptions, pServiceHash);
33
+
34
+ this.serviceType = 'MeadowIntegrationFileParser';
35
+
36
+ // Register sub-parser service types
37
+ this.fable.addAndInstantiateServiceTypeIfNotExists('MeadowIntegrationFileParserCSV', require('./Service-FileParser-CSV.js'));
38
+ this.fable.addAndInstantiateServiceTypeIfNotExists('MeadowIntegrationFileParserJSON', require('./Service-FileParser-JSON.js'));
39
+ this.fable.addAndInstantiateServiceTypeIfNotExists('MeadowIntegrationFileParserXLSX', require('./Service-FileParser-XLSX.js'));
40
+ this.fable.addAndInstantiateServiceTypeIfNotExists('MeadowIntegrationFileParserXML', require('./Service-FileParser-XML.js'));
41
+ this.fable.addAndInstantiateServiceTypeIfNotExists('MeadowIntegrationFileParserFixedWidth', require('./Service-FileParser-FixedWidth.js'));
42
+ }
43
+
44
+ /**
45
+ * Detect the format of a file from its extension, then from a content prefix.
46
+ *
47
+ * @param {string} pFilePath - File path (used for extension detection)
48
+ * @param {string} [pContentPrefix] - First bytes of content for content-based detection
49
+ * @returns {string} Format string: 'csv', 'json', 'xlsx', 'xml', 'fixedwidth'
50
+ */
51
+ detectFormat(pFilePath, pContentPrefix)
52
+ {
53
+ // Extension-based detection
54
+ if (pFilePath && typeof pFilePath === 'string')
55
+ {
56
+ let tmpExt = libPath.extname(pFilePath).toLowerCase();
57
+ if (tmpExt && EXTENSION_FORMAT_MAP[tmpExt])
58
+ {
59
+ return EXTENSION_FORMAT_MAP[tmpExt];
60
+ }
61
+ }
62
+
63
+ // Content-based detection
64
+ if (pContentPrefix && typeof pContentPrefix === 'string')
65
+ {
66
+ let tmpTrimmed = pContentPrefix.trim();
67
+ if (tmpTrimmed.startsWith('[') || tmpTrimmed.startsWith('{'))
68
+ {
69
+ return 'json';
70
+ }
71
+ if (tmpTrimmed.startsWith('<?xml') || tmpTrimmed.startsWith('<'))
72
+ {
73
+ return 'xml';
74
+ }
75
+ }
76
+
77
+ return 'csv';
78
+ }
79
+
80
+ /**
81
+ * Get the appropriate sub-parser service for a given format.
82
+ *
83
+ * @param {string} pFormat - Format string
84
+ * @returns {object} Sub-parser service instance
85
+ */
86
+ _getParser(pFormat)
87
+ {
88
+ switch (pFormat)
89
+ {
90
+ case 'json':
91
+ return this.fable.MeadowIntegrationFileParserJSON;
92
+ case 'xlsx':
93
+ return this.fable.MeadowIntegrationFileParserXLSX;
94
+ case 'xml':
95
+ return this.fable.MeadowIntegrationFileParserXML;
96
+ case 'fixedwidth':
97
+ return this.fable.MeadowIntegrationFileParserFixedWidth;
98
+ case 'csv':
99
+ default:
100
+ return this.fable.MeadowIntegrationFileParserCSV;
101
+ }
102
+ }
103
+
104
+ /**
105
+ * Parse a file using streaming, dispatching to the appropriate sub-parser.
106
+ * Format is determined from options.format, then from file extension, then content.
107
+ *
108
+ * @param {string} pFilePath - Absolute path to the file
109
+ * @param {object} pOptions - Parser options; pOptions.format overrides detection
110
+ * @param {function} pChunkCallback - Called with (pError, pRecords) as records are ready
111
+ * @param {function} pCompletionCallback - Called with (pError, pTotalCount) when done
112
+ */
113
+ parseFile(pFilePath, pOptions, pChunkCallback, pCompletionCallback)
114
+ {
115
+ let tmpOptions = Object.assign({}, this.options, pOptions);
116
+ let tmpFormat = tmpOptions.format ? tmpOptions.format.toLowerCase() : this.detectFormat(pFilePath);
117
+ let tmpParser = this._getParser(tmpFormat);
118
+
119
+ this.fable.log.info(`FileParser: parsing [${pFilePath}] as format [${tmpFormat}]`);
120
+ return tmpParser.parseFile(pFilePath, tmpOptions, pChunkCallback, pCompletionCallback);
121
+ }
122
+
123
+ /**
124
+ * Parse content using a full-array (non-streaming) interface.
125
+ * Format is determined from options.format, then from content prefix detection.
126
+ *
127
+ * @param {string|Buffer} pContent - Raw file content
128
+ * @param {object} pOptions - Parser options; pOptions.format overrides detection
129
+ * @param {function} fCallback - Called with (pError, pRecords)
130
+ */
131
+ parseContent(pContent, pOptions, fCallback)
132
+ {
133
+ let tmpOptions = Object.assign({}, this.options, pOptions);
134
+ let tmpContentPrefix = Buffer.isBuffer(pContent) ? '' : (pContent || '').substring(0, 100);
135
+ let tmpFormat = tmpOptions.format ? tmpOptions.format.toLowerCase() : this.detectFormat('', tmpContentPrefix);
136
+ let tmpParser = this._getParser(tmpFormat);
137
+
138
+ return tmpParser.parseContent(pContent, tmpOptions, fCallback);
139
+ }
140
+ }
141
+
142
+ module.exports = MeadowIntegrationFileParser;