befly 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,425 @@
1
+ 'use strict';
2
+
3
+ import {getAllMatches, isName} from './util.js';
4
+
5
+ const defaultOptions = {
6
+ allowBooleanAttributes: false, //A tag can have attributes without any value
7
+ unpairedTags: []
8
+ };
9
+
10
+ //const tagsPattern = new RegExp("<\\/?([\\w:\\-_\.]+)\\s*\/?>","g");
11
+ export function validate(xmlData, options) {
12
+ options = Object.assign({}, defaultOptions, options);
13
+
14
+ //xmlData = xmlData.replace(/(\r\n|\n|\r)/gm,"");//make it single line
15
+ //xmlData = xmlData.replace(/(^\s*<\?xml.*?\?>)/g,"");//Remove XML starting tag
16
+ //xmlData = xmlData.replace(/(<!DOCTYPE[\s\w\"\.\/\-\:]+(\[.*\])*\s*>)/g,"");//Remove DOCTYPE
17
+ const tags = [];
18
+ let tagFound = false;
19
+
20
+ //indicates that the root tag has been closed (aka. depth 0 has been reached)
21
+ let reachedRoot = false;
22
+
23
+ if (xmlData[0] === '\ufeff') {
24
+ // check for byte order mark (BOM)
25
+ xmlData = xmlData.substr(1);
26
+ }
27
+
28
+ for (let i = 0; i < xmlData.length; i++) {
29
+
30
+ if (xmlData[i] === '<' && xmlData[i+1] === '?') {
31
+ i+=2;
32
+ i = readPI(xmlData,i);
33
+ if (i.err) return i;
34
+ }else if (xmlData[i] === '<') {
35
+ //starting of tag
36
+ //read until you reach to '>' avoiding any '>' in attribute value
37
+ let tagStartPos = i;
38
+ i++;
39
+
40
+ if (xmlData[i] === '!') {
41
+ i = readCommentAndCDATA(xmlData, i);
42
+ continue;
43
+ } else {
44
+ let closingTag = false;
45
+ if (xmlData[i] === '/') {
46
+ //closing tag
47
+ closingTag = true;
48
+ i++;
49
+ }
50
+ //read tagname
51
+ let tagName = '';
52
+ for (; i < xmlData.length &&
53
+ xmlData[i] !== '>' &&
54
+ xmlData[i] !== ' ' &&
55
+ xmlData[i] !== '\t' &&
56
+ xmlData[i] !== '\n' &&
57
+ xmlData[i] !== '\r'; i++
58
+ ) {
59
+ tagName += xmlData[i];
60
+ }
61
+ tagName = tagName.trim();
62
+ //console.log(tagName);
63
+
64
+ if (tagName[tagName.length - 1] === '/') {
65
+ //self closing tag without attributes
66
+ tagName = tagName.substring(0, tagName.length - 1);
67
+ //continue;
68
+ i--;
69
+ }
70
+ if (!validateTagName(tagName)) {
71
+ let msg;
72
+ if (tagName.trim().length === 0) {
73
+ msg = "Invalid space after '<'.";
74
+ } else {
75
+ msg = "Tag '"+tagName+"' is an invalid name.";
76
+ }
77
+ return getErrorObject('InvalidTag', msg, getLineNumberForPosition(xmlData, i));
78
+ }
79
+
80
+ const result = readAttributeStr(xmlData, i);
81
+ if (result === false) {
82
+ return getErrorObject('InvalidAttr', "Attributes for '"+tagName+"' have open quote.", getLineNumberForPosition(xmlData, i));
83
+ }
84
+ let attrStr = result.value;
85
+ i = result.index;
86
+
87
+ if (attrStr[attrStr.length - 1] === '/') {
88
+ //self closing tag
89
+ const attrStrStart = i - attrStr.length;
90
+ attrStr = attrStr.substring(0, attrStr.length - 1);
91
+ const isValid = validateAttributeString(attrStr, options);
92
+ if (isValid === true) {
93
+ tagFound = true;
94
+ //continue; //text may presents after self closing tag
95
+ } else {
96
+ //the result from the nested function returns the position of the error within the attribute
97
+ //in order to get the 'true' error line, we need to calculate the position where the attribute begins (i - attrStr.length) and then add the position within the attribute
98
+ //this gives us the absolute index in the entire xml, which we can use to find the line at last
99
+ return getErrorObject(isValid.err.code, isValid.err.msg, getLineNumberForPosition(xmlData, attrStrStart + isValid.err.line));
100
+ }
101
+ } else if (closingTag) {
102
+ if (!result.tagClosed) {
103
+ return getErrorObject('InvalidTag', "Closing tag '"+tagName+"' doesn't have proper closing.", getLineNumberForPosition(xmlData, i));
104
+ } else if (attrStr.trim().length > 0) {
105
+ return getErrorObject('InvalidTag', "Closing tag '"+tagName+"' can't have attributes or invalid starting.", getLineNumberForPosition(xmlData, tagStartPos));
106
+ } else if (tags.length === 0) {
107
+ return getErrorObject('InvalidTag', "Closing tag '"+tagName+"' has not been opened.", getLineNumberForPosition(xmlData, tagStartPos));
108
+ } else {
109
+ const otg = tags.pop();
110
+ if (tagName !== otg.tagName) {
111
+ let openPos = getLineNumberForPosition(xmlData, otg.tagStartPos);
112
+ return getErrorObject('InvalidTag',
113
+ "Expected closing tag '"+otg.tagName+"' (opened in line "+openPos.line+", col "+openPos.col+") instead of closing tag '"+tagName+"'.",
114
+ getLineNumberForPosition(xmlData, tagStartPos));
115
+ }
116
+
117
+ //when there are no more tags, we reached the root level.
118
+ if (tags.length == 0) {
119
+ reachedRoot = true;
120
+ }
121
+ }
122
+ } else {
123
+ const isValid = validateAttributeString(attrStr, options);
124
+ if (isValid !== true) {
125
+ //the result from the nested function returns the position of the error within the attribute
126
+ //in order to get the 'true' error line, we need to calculate the position where the attribute begins (i - attrStr.length) and then add the position within the attribute
127
+ //this gives us the absolute index in the entire xml, which we can use to find the line at last
128
+ return getErrorObject(isValid.err.code, isValid.err.msg, getLineNumberForPosition(xmlData, i - attrStr.length + isValid.err.line));
129
+ }
130
+
131
+ //if the root level has been reached before ...
132
+ if (reachedRoot === true) {
133
+ return getErrorObject('InvalidXml', 'Multiple possible root nodes found.', getLineNumberForPosition(xmlData, i));
134
+ } else if(options.unpairedTags.indexOf(tagName) !== -1){
135
+ //don't push into stack
136
+ } else {
137
+ tags.push({tagName, tagStartPos});
138
+ }
139
+ tagFound = true;
140
+ }
141
+
142
+ //skip tag text value
143
+ //It may include comments and CDATA value
144
+ for (i++; i < xmlData.length; i++) {
145
+ if (xmlData[i] === '<') {
146
+ if (xmlData[i + 1] === '!') {
147
+ //comment or CADATA
148
+ i++;
149
+ i = readCommentAndCDATA(xmlData, i);
150
+ continue;
151
+ } else if (xmlData[i+1] === '?') {
152
+ i = readPI(xmlData, ++i);
153
+ if (i.err) return i;
154
+ } else{
155
+ break;
156
+ }
157
+ } else if (xmlData[i] === '&') {
158
+ const afterAmp = validateAmpersand(xmlData, i);
159
+ if (afterAmp == -1)
160
+ return getErrorObject('InvalidChar', "char '&' is not expected.", getLineNumberForPosition(xmlData, i));
161
+ i = afterAmp;
162
+ }else{
163
+ if (reachedRoot === true && !isWhiteSpace(xmlData[i])) {
164
+ return getErrorObject('InvalidXml', "Extra text at the end", getLineNumberForPosition(xmlData, i));
165
+ }
166
+ }
167
+ } //end of reading tag text value
168
+ if (xmlData[i] === '<') {
169
+ i--;
170
+ }
171
+ }
172
+ } else {
173
+ if ( isWhiteSpace(xmlData[i])) {
174
+ continue;
175
+ }
176
+ return getErrorObject('InvalidChar', "char '"+xmlData[i]+"' is not expected.", getLineNumberForPosition(xmlData, i));
177
+ }
178
+ }
179
+
180
+ if (!tagFound) {
181
+ return getErrorObject('InvalidXml', 'Start tag expected.', 1);
182
+ }else if (tags.length == 1) {
183
+ return getErrorObject('InvalidTag', "Unclosed tag '"+tags[0].tagName+"'.", getLineNumberForPosition(xmlData, tags[0].tagStartPos));
184
+ }else if (tags.length > 0) {
185
+ return getErrorObject('InvalidXml', "Invalid '"+
186
+ JSON.stringify(tags.map(t => t.tagName), null, 4).replace(/\r?\n/g, '')+
187
+ "' found.", {line: 1, col: 1});
188
+ }
189
+
190
+ return true;
191
+ };
192
+
193
+ function isWhiteSpace(char){
194
+ return char === ' ' || char === '\t' || char === '\n' || char === '\r';
195
+ }
196
+ /**
197
+ * Read Processing insstructions and skip
198
+ * @param {*} xmlData
199
+ * @param {*} i
200
+ */
201
+ function readPI(xmlData, i) {
202
+ const start = i;
203
+ for (; i < xmlData.length; i++) {
204
+ if (xmlData[i] == '?' || xmlData[i] == ' ') {
205
+ //tagname
206
+ const tagname = xmlData.substr(start, i - start);
207
+ if (i > 5 && tagname === 'xml') {
208
+ return getErrorObject('InvalidXml', 'XML declaration allowed only at the start of the document.', getLineNumberForPosition(xmlData, i));
209
+ } else if (xmlData[i] == '?' && xmlData[i + 1] == '>') {
210
+ //check if valid attribut string
211
+ i++;
212
+ break;
213
+ } else {
214
+ continue;
215
+ }
216
+ }
217
+ }
218
+ return i;
219
+ }
220
+
221
+ function readCommentAndCDATA(xmlData, i) {
222
+ if (xmlData.length > i + 5 && xmlData[i + 1] === '-' && xmlData[i + 2] === '-') {
223
+ //comment
224
+ for (i += 3; i < xmlData.length; i++) {
225
+ if (xmlData[i] === '-' && xmlData[i + 1] === '-' && xmlData[i + 2] === '>') {
226
+ i += 2;
227
+ break;
228
+ }
229
+ }
230
+ } else if (
231
+ xmlData.length > i + 8 &&
232
+ xmlData[i + 1] === 'D' &&
233
+ xmlData[i + 2] === 'O' &&
234
+ xmlData[i + 3] === 'C' &&
235
+ xmlData[i + 4] === 'T' &&
236
+ xmlData[i + 5] === 'Y' &&
237
+ xmlData[i + 6] === 'P' &&
238
+ xmlData[i + 7] === 'E'
239
+ ) {
240
+ let angleBracketsCount = 1;
241
+ for (i += 8; i < xmlData.length; i++) {
242
+ if (xmlData[i] === '<') {
243
+ angleBracketsCount++;
244
+ } else if (xmlData[i] === '>') {
245
+ angleBracketsCount--;
246
+ if (angleBracketsCount === 0) {
247
+ break;
248
+ }
249
+ }
250
+ }
251
+ } else if (
252
+ xmlData.length > i + 9 &&
253
+ xmlData[i + 1] === '[' &&
254
+ xmlData[i + 2] === 'C' &&
255
+ xmlData[i + 3] === 'D' &&
256
+ xmlData[i + 4] === 'A' &&
257
+ xmlData[i + 5] === 'T' &&
258
+ xmlData[i + 6] === 'A' &&
259
+ xmlData[i + 7] === '['
260
+ ) {
261
+ for (i += 8; i < xmlData.length; i++) {
262
+ if (xmlData[i] === ']' && xmlData[i + 1] === ']' && xmlData[i + 2] === '>') {
263
+ i += 2;
264
+ break;
265
+ }
266
+ }
267
+ }
268
+
269
+ return i;
270
+ }
271
+
272
+ const doubleQuote = '"';
273
+ const singleQuote = "'";
274
+
275
+ /**
276
+ * Keep reading xmlData until '<' is found outside the attribute value.
277
+ * @param {string} xmlData
278
+ * @param {number} i
279
+ */
280
+ function readAttributeStr(xmlData, i) {
281
+ let attrStr = '';
282
+ let startChar = '';
283
+ let tagClosed = false;
284
+ for (; i < xmlData.length; i++) {
285
+ if (xmlData[i] === doubleQuote || xmlData[i] === singleQuote) {
286
+ if (startChar === '') {
287
+ startChar = xmlData[i];
288
+ } else if (startChar !== xmlData[i]) {
289
+ //if vaue is enclosed with double quote then single quotes are allowed inside the value and vice versa
290
+ } else {
291
+ startChar = '';
292
+ }
293
+ } else if (xmlData[i] === '>') {
294
+ if (startChar === '') {
295
+ tagClosed = true;
296
+ break;
297
+ }
298
+ }
299
+ attrStr += xmlData[i];
300
+ }
301
+ if (startChar !== '') {
302
+ return false;
303
+ }
304
+
305
+ return {
306
+ value: attrStr,
307
+ index: i,
308
+ tagClosed: tagClosed
309
+ };
310
+ }
311
+
312
+ /**
313
+ * Select all the attributes whether valid or invalid.
314
+ */
315
+ const validAttrStrRegxp = new RegExp('(\\s*)([^\\s=]+)(\\s*=)?(\\s*([\'"])(([\\s\\S])*?)\\5)?', 'g');
316
+
317
+ //attr, ="sd", a="amit's", a="sd"b="saf", ab cd=""
318
+
319
+ function validateAttributeString(attrStr, options) {
320
+ //console.log("start:"+attrStr+":end");
321
+
322
+ //if(attrStr.trim().length === 0) return true; //empty string
323
+
324
+ const matches = getAllMatches(attrStr, validAttrStrRegxp);
325
+ const attrNames = {};
326
+
327
+ for (let i = 0; i < matches.length; i++) {
328
+ if (matches[i][1].length === 0) {
329
+ //nospace before attribute name: a="sd"b="saf"
330
+ return getErrorObject('InvalidAttr', "Attribute '"+matches[i][2]+"' has no space in starting.", getPositionFromMatch(matches[i]))
331
+ } else if (matches[i][3] !== undefined && matches[i][4] === undefined) {
332
+ return getErrorObject('InvalidAttr', "Attribute '"+matches[i][2]+"' is without value.", getPositionFromMatch(matches[i]));
333
+ } else if (matches[i][3] === undefined && !options.allowBooleanAttributes) {
334
+ //independent attribute: ab
335
+ return getErrorObject('InvalidAttr', "boolean attribute '"+matches[i][2]+"' is not allowed.", getPositionFromMatch(matches[i]));
336
+ }
337
+ /* else if(matches[i][6] === undefined){//attribute without value: ab=
338
+ return { err: { code:"InvalidAttr",msg:"attribute " + matches[i][2] + " has no value assigned."}};
339
+ } */
340
+ const attrName = matches[i][2];
341
+ if (!validateAttrName(attrName)) {
342
+ return getErrorObject('InvalidAttr', "Attribute '"+attrName+"' is an invalid name.", getPositionFromMatch(matches[i]));
343
+ }
344
+ if (!attrNames.hasOwnProperty(attrName)) {
345
+ //check for duplicate attribute.
346
+ attrNames[attrName] = 1;
347
+ } else {
348
+ return getErrorObject('InvalidAttr', "Attribute '"+attrName+"' is repeated.", getPositionFromMatch(matches[i]));
349
+ }
350
+ }
351
+
352
+ return true;
353
+ }
354
+
355
+ function validateNumberAmpersand(xmlData, i) {
356
+ let re = /\d/;
357
+ if (xmlData[i] === 'x') {
358
+ i++;
359
+ re = /[\da-fA-F]/;
360
+ }
361
+ for (; i < xmlData.length; i++) {
362
+ if (xmlData[i] === ';')
363
+ return i;
364
+ if (!xmlData[i].match(re))
365
+ break;
366
+ }
367
+ return -1;
368
+ }
369
+
370
+ function validateAmpersand(xmlData, i) {
371
+ // https://www.w3.org/TR/xml/#dt-charref
372
+ i++;
373
+ if (xmlData[i] === ';')
374
+ return -1;
375
+ if (xmlData[i] === '#') {
376
+ i++;
377
+ return validateNumberAmpersand(xmlData, i);
378
+ }
379
+ let count = 0;
380
+ for (; i < xmlData.length; i++, count++) {
381
+ if (xmlData[i].match(/\w/) && count < 20)
382
+ continue;
383
+ if (xmlData[i] === ';')
384
+ break;
385
+ return -1;
386
+ }
387
+ return i;
388
+ }
389
+
390
+ function getErrorObject(code, message, lineNumber) {
391
+ return {
392
+ err: {
393
+ code: code,
394
+ msg: message,
395
+ line: lineNumber.line || lineNumber,
396
+ col: lineNumber.col,
397
+ },
398
+ };
399
+ }
400
+
401
+ function validateAttrName(attrName) {
402
+ return isName(attrName);
403
+ }
404
+
405
+ // const startsWithXML = /^xml/i;
406
+
407
+ function validateTagName(tagname) {
408
+ return isName(tagname) /* && !tagname.match(startsWithXML) */;
409
+ }
410
+
411
+ //this function returns the line number for the character at the given index
412
+ function getLineNumberForPosition(xmlData, index) {
413
+ const lines = xmlData.substring(0, index).split(/\r?\n/);
414
+ return {
415
+ line: lines.length,
416
+
417
+ // column number is last line's length + 1, because column numbering starts at 1:
418
+ col: lines[lines.length - 1].length + 1
419
+ };
420
+ }
421
+
422
+ //this function returns the position of the first character of match within attrStr
423
+ function getPositionFromMatch(match) {
424
+ return match.startIndex + match[1].length;
425
+ }
@@ -0,0 +1,40 @@
1
+ 'use strict';
2
+
3
+ let METADATA_SYMBOL;
4
+
5
+ if (typeof Symbol !== 'function') {
6
+ METADATA_SYMBOL = '@@xmlMetadata';
7
+ } else {
8
+ METADATA_SYMBOL = Symbol('XML Node Metadata');
9
+ }
10
+
11
+ export default class XmlNode {
12
+ constructor(tagname) {
13
+ this.tagname = tagname;
14
+ this.child = []; //nested tags, text, cdata, comments in order
15
+ this[':@'] = {}; //attributes map
16
+ }
17
+ add(key, val) {
18
+ // this.child.push( {name : key, val: val, isCdata: isCdata });
19
+ if (key === '__proto__') key = '#__proto__';
20
+ this.child.push({ [key]: val });
21
+ }
22
+ addChild(node, startIndex) {
23
+ if (node.tagname === '__proto__') node.tagname = '#__proto__';
24
+ if (node[':@'] && Object.keys(node[':@']).length > 0) {
25
+ this.child.push({ [node.tagname]: node.child, [':@']: node[':@'] });
26
+ } else {
27
+ this.child.push({ [node.tagname]: node.child });
28
+ }
29
+ // if requested, add the startIndex
30
+ if (startIndex !== undefined) {
31
+ // Note: for now we just overwrite the metadata. If we had more complex metadata,
32
+ // we might need to do an object append here: metadata = { ...metadata, startIndex }
33
+ this.child[this.child.length - 1][METADATA_SYMBOL] = { startIndex };
34
+ }
35
+ }
36
+ /** symbol used for metadata */
37
+ static getMetaDataSymbol() {
38
+ return METADATA_SYMBOL;
39
+ }
40
+ }