fast-xml-parser 4.3.4 → 4.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +7 -14
- package/package.json +1 -1
- package/src/v5/CharsSymbol.js +16 -0
- package/src/v5/EntitiesParser.js +107 -0
- package/src/v5/OptionsBuilder.js +64 -0
- package/src/v5/OutputBuilders/BaseOutputBuilder.js +71 -0
- package/src/v5/OutputBuilders/JsArrBuilder.js +103 -0
- package/src/v5/OutputBuilders/JsMinArrBuilder.js +102 -0
- package/src/v5/OutputBuilders/JsObjBuilder.js +156 -0
- package/src/v5/OutputBuilders/ParserOptionsBuilder.js +96 -0
- package/src/v5/Report.js +0 -0
- package/src/v5/TagPath.js +81 -0
- package/src/v5/TagPathMatcher.js +15 -0
- package/src/v5/XMLParser.js +85 -0
- package/src/v5/Xml2JsParser.js +237 -0
- package/src/v5/XmlPartReader.js +212 -0
- package/src/v5/XmlSpecialTagsReader.js +118 -0
- package/src/v5/inputSource/BufferSource.js +118 -0
- package/src/v5/inputSource/StringSource.js +123 -0
- package/src/v5/valueParsers/EntitiesParser.js +107 -0
- package/src/v5/valueParsers/booleanParser.js +23 -0
- package/src/v5/valueParsers/booleanParserExt.js +20 -0
- package/src/v5/valueParsers/currency.js +31 -0
- package/src/v5/valueParsers/join.js +14 -0
- package/src/v5/valueParsers/number.js +16 -0
- package/src/v5/valueParsers/trim.js +8 -0
- package/src/xmlparser/OrderedObjParser.js +2 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* find paired tag for a stop node
|
|
5
|
+
* @param {string} xmlDoc
|
|
6
|
+
* @param {string} tagName
|
|
7
|
+
* @param {number} i : start index
|
|
8
|
+
*/
|
|
9
|
+
function readStopNode(xmlDoc, tagName, i){
|
|
10
|
+
const startIndex = i;
|
|
11
|
+
// Starting at 1 since we already have an open tag
|
|
12
|
+
let openTagCount = 1;
|
|
13
|
+
|
|
14
|
+
for (; i < xmlDoc.length; i++) {
|
|
15
|
+
if( xmlDoc[i] === "<"){
|
|
16
|
+
if (xmlDoc[i+1] === "/") {//close tag
|
|
17
|
+
const closeIndex = findSubStrIndex(xmlDoc, ">", i, `${tagName} is not closed`);
|
|
18
|
+
let closeTagName = xmlDoc.substring(i+2,closeIndex).trim();
|
|
19
|
+
if(closeTagName === tagName){
|
|
20
|
+
openTagCount--;
|
|
21
|
+
if (openTagCount === 0) {
|
|
22
|
+
return {
|
|
23
|
+
tagContent: xmlDoc.substring(startIndex, i),
|
|
24
|
+
i : closeIndex
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
i=closeIndex;
|
|
29
|
+
} else if(xmlDoc[i+1] === '?') {
|
|
30
|
+
const closeIndex = findSubStrIndex(xmlDoc, "?>", i+1, "StopNode is not closed.")
|
|
31
|
+
i=closeIndex;
|
|
32
|
+
} else if(xmlDoc.substr(i + 1, 3) === '!--') {
|
|
33
|
+
const closeIndex = findSubStrIndex(xmlDoc, "-->", i+3, "StopNode is not closed.")
|
|
34
|
+
i=closeIndex;
|
|
35
|
+
} else if(xmlDoc.substr(i + 1, 2) === '![') {
|
|
36
|
+
const closeIndex = findSubStrIndex(xmlDoc, "]]>", i, "StopNode is not closed.") - 2;
|
|
37
|
+
i=closeIndex;
|
|
38
|
+
} else {
|
|
39
|
+
const tagData = readTagExp(xmlDoc, i, '>')
|
|
40
|
+
|
|
41
|
+
if (tagData) {
|
|
42
|
+
const openTagName = tagData && tagData.tagName;
|
|
43
|
+
if (openTagName === tagName && tagData.tagExp[tagData.tagExp.length-1] !== "/") {
|
|
44
|
+
openTagCount++;
|
|
45
|
+
}
|
|
46
|
+
i=tagData.closeIndex;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}//end for loop
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Read closing tag name
|
|
55
|
+
* @param {Source} source
|
|
56
|
+
* @returns tag name
|
|
57
|
+
*/
|
|
58
|
+
function readClosingTagName(source){
|
|
59
|
+
let text = ""; //temporary data
|
|
60
|
+
while(source.canRead()){
|
|
61
|
+
let ch = source.readCh();
|
|
62
|
+
// if (ch === null || ch === undefined) break;
|
|
63
|
+
// source.updateBuffer();
|
|
64
|
+
|
|
65
|
+
if (ch === ">") return text.trimEnd();
|
|
66
|
+
else text += ch;
|
|
67
|
+
}
|
|
68
|
+
throw new Error(`Unexpected end of source. Reading '${substr}'`);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Read XML tag and build attributes map
|
|
73
|
+
* This function can be used to read normal tag, pi tag.
|
|
74
|
+
* This function can't be used to read comment, CDATA, DOCTYPE.
|
|
75
|
+
* Eg <tag attr = ' some"' attr= ">" bool>
|
|
76
|
+
* @param {string} xmlDoc
|
|
77
|
+
* @param {number} startIndex starting index
|
|
78
|
+
* @returns tag expression includes tag name & attribute string
|
|
79
|
+
*/
|
|
80
|
+
function readTagExp(parser) {
|
|
81
|
+
let inSingleQuotes = false;
|
|
82
|
+
let inDoubleQuotes = false;
|
|
83
|
+
let i;
|
|
84
|
+
let EOE = false;
|
|
85
|
+
|
|
86
|
+
for (i = 0; parser.source.canRead(i); i++) {
|
|
87
|
+
const char = parser.source.readChAt(i);
|
|
88
|
+
|
|
89
|
+
if (char === "'" && !inDoubleQuotes) {
|
|
90
|
+
inSingleQuotes = !inSingleQuotes;
|
|
91
|
+
} else if (char === '"' && !inSingleQuotes) {
|
|
92
|
+
inDoubleQuotes = !inDoubleQuotes;
|
|
93
|
+
} else if (char === '>' && !inSingleQuotes && !inDoubleQuotes) {
|
|
94
|
+
// If not inside quotes, stop reading at '>'
|
|
95
|
+
EOE = true;
|
|
96
|
+
break;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
}
|
|
100
|
+
if(inSingleQuotes || inDoubleQuotes){
|
|
101
|
+
throw new Error("Invalid attribute expression. Quote is not properly closed");
|
|
102
|
+
}else if(!EOE) throw new Error("Unexpected closing of source. Waiting for '>'");
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
const exp = parser.source.readStr(i);
|
|
106
|
+
parser.source.updateBufferBoundary(i + 1);
|
|
107
|
+
return buildTagExpObj(exp, parser)
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function readPiExp(parser) {
|
|
111
|
+
let inSingleQuotes = false;
|
|
112
|
+
let inDoubleQuotes = false;
|
|
113
|
+
let i;
|
|
114
|
+
let EOE = false;
|
|
115
|
+
|
|
116
|
+
for (i = 0; parser.source.canRead(i) ; i++) {
|
|
117
|
+
const currentChar = parser.source.readChAt(i);
|
|
118
|
+
const nextChar = parser.source.readChAt(i+1);
|
|
119
|
+
|
|
120
|
+
if (currentChar === "'" && !inDoubleQuotes) {
|
|
121
|
+
inSingleQuotes = !inSingleQuotes;
|
|
122
|
+
} else if (currentChar === '"' && !inSingleQuotes) {
|
|
123
|
+
inDoubleQuotes = !inDoubleQuotes;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (!inSingleQuotes && !inDoubleQuotes) {
|
|
127
|
+
if (currentChar === '?' && nextChar === '>') {
|
|
128
|
+
EOE = true;
|
|
129
|
+
break; // Exit the loop when '?>' is found
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if(inSingleQuotes || inDoubleQuotes){
|
|
134
|
+
throw new Error("Invalid attribute expression. Quote is not properly closed in PI tag expression");
|
|
135
|
+
}else if(!EOE) throw new Error("Unexpected closing of source. Waiting for '?>'");
|
|
136
|
+
|
|
137
|
+
if(!parser.options.attributes.ignore){
|
|
138
|
+
//TODO: use regex to verify attributes if not set to ignore
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const exp = parser.source.readStr(i);
|
|
142
|
+
parser.source.updateBufferBoundary(i + 1);
|
|
143
|
+
return buildTagExpObj(exp, parser)
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function buildTagExpObj(exp, parser){
|
|
147
|
+
const tagExp = {
|
|
148
|
+
tagName: "",
|
|
149
|
+
selfClosing: false
|
|
150
|
+
};
|
|
151
|
+
let attrsExp = "";
|
|
152
|
+
|
|
153
|
+
if(exp[exp.length -1] === "/") tagExp.selfClosing = true;
|
|
154
|
+
|
|
155
|
+
//separate tag name
|
|
156
|
+
let i = 0;
|
|
157
|
+
for (; i < exp.length; i++) {
|
|
158
|
+
const char = exp[i];
|
|
159
|
+
if(char === " "){
|
|
160
|
+
tagExp.tagName = exp.substring(0, i);
|
|
161
|
+
attrsExp = exp.substring(i + 1);
|
|
162
|
+
break;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
//only tag
|
|
166
|
+
if(tagExp.tagName.length === 0 && i === exp.length)tagExp.tagName = exp;
|
|
167
|
+
|
|
168
|
+
tagExp.tagName = tagExp.tagName.trimEnd();
|
|
169
|
+
|
|
170
|
+
if(!parser.options.attributes.ignore && attrsExp.length > 0){
|
|
171
|
+
parseAttributesExp(attrsExp,parser)
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return tagExp;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm');
|
|
178
|
+
|
|
179
|
+
function parseAttributesExp(attrStr, parser) {
|
|
180
|
+
const matches = getAllMatches(attrStr, attrsRegx);
|
|
181
|
+
const len = matches.length; //don't make it inline
|
|
182
|
+
for (let i = 0; i < len; i++) {
|
|
183
|
+
let attrName = parser.processAttrName(matches[i][1]);
|
|
184
|
+
let attrVal = parser.replaceEntities(matches[i][4] || true);
|
|
185
|
+
|
|
186
|
+
parser.outputBuilder.addAttribute(attrName, attrVal);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
const getAllMatches = function(string, regex) {
|
|
192
|
+
const matches = [];
|
|
193
|
+
let match = regex.exec(string);
|
|
194
|
+
while (match) {
|
|
195
|
+
const allmatches = [];
|
|
196
|
+
allmatches.startIndex = regex.lastIndex - match[0].length;
|
|
197
|
+
const len = match.length;
|
|
198
|
+
for (let index = 0; index < len; index++) {
|
|
199
|
+
allmatches.push(match[index]);
|
|
200
|
+
}
|
|
201
|
+
matches.push(allmatches);
|
|
202
|
+
match = regex.exec(string);
|
|
203
|
+
}
|
|
204
|
+
return matches;
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
module.exports = {
|
|
208
|
+
readStopNode: readStopNode,
|
|
209
|
+
readClosingTagName: readClosingTagName,
|
|
210
|
+
readTagExp: readTagExp,
|
|
211
|
+
readPiExp: readPiExp,
|
|
212
|
+
}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
const {readPiExp} = require("./XmlPartReader");
|
|
2
|
+
|
|
3
|
+
function readCdata(parser){
|
|
4
|
+
//<![ are already read till this point
|
|
5
|
+
let str = parser.source.readStr(6); //CDATA[
|
|
6
|
+
parser.source.updateBufferBoundary(6);
|
|
7
|
+
|
|
8
|
+
if(str !== "CDATA[") throw new Error(`Invalid CDATA expression at ${parser.source.line}:${parser.source.cols}`);
|
|
9
|
+
|
|
10
|
+
let text = parser.source.readUpto("]]>");
|
|
11
|
+
parser.outputBuilder.addCdata(text);
|
|
12
|
+
}
|
|
13
|
+
function readPiTag(parser){
|
|
14
|
+
//<? are already read till this point
|
|
15
|
+
let tagExp = readPiExp(parser, "?>");
|
|
16
|
+
if(!tagExp) throw new Error("Invalid Pi Tag expression.");
|
|
17
|
+
|
|
18
|
+
if (tagExp.tagName === "?xml") {//TODO: test if tagName is just xml
|
|
19
|
+
parser.outputBuilder.addDeclaration();
|
|
20
|
+
} else {
|
|
21
|
+
parser.outputBuilder.addPi("?"+tagExp.tagName);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function readComment(parser){
|
|
26
|
+
//<!- are already read till this point
|
|
27
|
+
let ch = parser.source.readCh();
|
|
28
|
+
if(ch !== "-") throw new Error(`Invalid comment expression at ${parser.source.line}:${parser.source.cols}`);
|
|
29
|
+
|
|
30
|
+
let text = parser.source.readUpto("-->");
|
|
31
|
+
parser.outputBuilder.addComment(text);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const DOCTYPE_tags = {
|
|
35
|
+
"EL":/^EMENT\s+([^\s>]+)\s+(ANY|EMPTY|\(.+\)\s*$)/m,
|
|
36
|
+
"AT":/^TLIST\s+[^\s]+\s+[^\s]+\s+[^\s]+\s+[^\s]+\s+$/m,
|
|
37
|
+
"NO":/^TATION.+$/m
|
|
38
|
+
}
|
|
39
|
+
function readDocType(parser){
|
|
40
|
+
//<!D are already read till this point
|
|
41
|
+
let str = parser.source.readStr(6); //OCTYPE
|
|
42
|
+
parser.source.updateBufferBoundary(6);
|
|
43
|
+
|
|
44
|
+
if(str !== "OCTYPE") throw new Error(`Invalid DOCTYPE expression at ${parser.source.line}:${parser.source.cols}`);
|
|
45
|
+
|
|
46
|
+
let hasBody = false, lastch = "";
|
|
47
|
+
|
|
48
|
+
while(parser.source.canRead()){
|
|
49
|
+
//TODO: use readChAt like used in partReader
|
|
50
|
+
let ch = parser.source.readCh();
|
|
51
|
+
if(hasBody){
|
|
52
|
+
if (ch === '<') { //Determine the tag type
|
|
53
|
+
let str = parser.source.readStr(2);
|
|
54
|
+
parser.source.updateBufferBoundary(2);
|
|
55
|
+
if(str === "EN"){ //ENTITY
|
|
56
|
+
let str = parser.source.readStr(4);
|
|
57
|
+
parser.source.updateBufferBoundary(4);
|
|
58
|
+
if(str !== "TITY") throw new Error("Invalid DOCTYPE ENTITY expression");
|
|
59
|
+
|
|
60
|
+
registerEntity(parser);
|
|
61
|
+
}else if(str === "!-") {//comment
|
|
62
|
+
readComment(parser);
|
|
63
|
+
}else{ //ELEMENT, ATTLIST, NOTATION
|
|
64
|
+
let dTagExp = parser.source.readUpto(">");
|
|
65
|
+
const regx = DOCTYPE_tags[str];
|
|
66
|
+
if(regx){
|
|
67
|
+
const match = dTagExp.match(regx);
|
|
68
|
+
if(!match) throw new Error("Invalid DOCTYPE");
|
|
69
|
+
}else throw new Error("Invalid DOCTYPE");
|
|
70
|
+
}
|
|
71
|
+
}else if( ch === '>' && lastch === "]"){//end of doctype
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
}else if( ch === '>'){//end of doctype
|
|
75
|
+
return;
|
|
76
|
+
}else if( ch === '['){
|
|
77
|
+
hasBody = true;
|
|
78
|
+
}else{
|
|
79
|
+
lastch = ch;
|
|
80
|
+
}
|
|
81
|
+
}//End While loop
|
|
82
|
+
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function registerEntity(parser){
|
|
86
|
+
//read Entity
|
|
87
|
+
let attrBoundary="";
|
|
88
|
+
let name ="", val ="";
|
|
89
|
+
while(source.canRead()){
|
|
90
|
+
let ch = source.readCh();
|
|
91
|
+
|
|
92
|
+
if(attrBoundary){
|
|
93
|
+
if (ch === attrBoundary){
|
|
94
|
+
val = text;
|
|
95
|
+
text = ""
|
|
96
|
+
}
|
|
97
|
+
}else if(ch === " " || ch === "\t"){
|
|
98
|
+
if(!name){
|
|
99
|
+
name = text.trimStart();
|
|
100
|
+
text = "";
|
|
101
|
+
}
|
|
102
|
+
}else if (ch === '"' || ch === "'") {//start of attrBoundary
|
|
103
|
+
attrBoundary = ch;
|
|
104
|
+
}else if(ch === ">"){
|
|
105
|
+
parser.entityParser.addExternalEntity(name,val);
|
|
106
|
+
return;
|
|
107
|
+
}else{
|
|
108
|
+
text+=ch;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
module.exports = {
|
|
114
|
+
readCdata: readCdata,
|
|
115
|
+
readComment:readComment,
|
|
116
|
+
readDocType:readDocType,
|
|
117
|
+
readPiTag:readPiTag
|
|
118
|
+
}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
const Constants = {
|
|
2
|
+
space: 32,
|
|
3
|
+
tab: 9
|
|
4
|
+
}
|
|
5
|
+
class BufferSource{
|
|
6
|
+
constructor(bytesArr){
|
|
7
|
+
this.line = 1;
|
|
8
|
+
this.cols = 0;
|
|
9
|
+
this.buffer = bytesArr;
|
|
10
|
+
this.startIndex = 0;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
readCh() {
|
|
16
|
+
return String.fromCharCode(this.buffer[this.startIndex++]);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
readChAt(index) {
|
|
20
|
+
return String.fromCharCode(this.buffer[this.startIndex+index]);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
readStr(n,from){
|
|
24
|
+
if(typeof from === "undefined") from = this.startIndex;
|
|
25
|
+
return this.buffer.slice(from, from + n).toString();
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
readUpto(stopStr) {
|
|
29
|
+
const inputLength = this.buffer.length;
|
|
30
|
+
const stopLength = stopStr.length;
|
|
31
|
+
const stopBuffer = Buffer.from(stopStr);
|
|
32
|
+
|
|
33
|
+
for (let i = this.startIndex; i < inputLength; i++) {
|
|
34
|
+
let match = true;
|
|
35
|
+
for (let j = 0; j < stopLength; j++) {
|
|
36
|
+
if (this.buffer[i + j] !== stopBuffer[j]) {
|
|
37
|
+
match = false;
|
|
38
|
+
break;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (match) {
|
|
43
|
+
const result = this.buffer.slice(this.startIndex, i).toString();
|
|
44
|
+
this.startIndex = i + stopLength;
|
|
45
|
+
return result;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
throw new Error(`Unexpected end of source. Reading '${stopStr}'`);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
readUptoCloseTag(stopStr) { //stopStr: "</tagname"
|
|
53
|
+
const inputLength = this.buffer.length;
|
|
54
|
+
const stopLength = stopStr.length;
|
|
55
|
+
const stopBuffer = Buffer.from(stopStr);
|
|
56
|
+
let stopIndex = 0;
|
|
57
|
+
//0: non-matching, 1: matching stop string, 2: matching closing
|
|
58
|
+
let match = 0;
|
|
59
|
+
|
|
60
|
+
for (let i = this.startIndex; i < inputLength; i++) {
|
|
61
|
+
if(match === 1){//initial part matched
|
|
62
|
+
if(stopIndex === 0) stopIndex = i;
|
|
63
|
+
if(this.buffer[i] === Constants.space || this.buffer[i] === Constants.tab) continue;
|
|
64
|
+
else if(this.buffer[i] === '>'){ //TODO: if it should be equivalent ASCII
|
|
65
|
+
match = 2;
|
|
66
|
+
//tag boundary found
|
|
67
|
+
// this.startIndex
|
|
68
|
+
}
|
|
69
|
+
}else{
|
|
70
|
+
match = 1;
|
|
71
|
+
for (let j = 0; j < stopLength; j++) {
|
|
72
|
+
if (this.buffer[i + j] !== stopBuffer[j]) {
|
|
73
|
+
match = 0;
|
|
74
|
+
break;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
if (match === 2) {//matched closing part
|
|
79
|
+
const result = this.buffer.slice(this.startIndex, stopIndex - 1 ).toString();
|
|
80
|
+
this.startIndex = i + 1;
|
|
81
|
+
return result;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
throw new Error(`Unexpected end of source. Reading '${stopStr}'`);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
readFromBuffer(n, shouldUpdate) {
|
|
89
|
+
let ch;
|
|
90
|
+
if (n === 1) {
|
|
91
|
+
ch = this.buffer[this.startIndex];
|
|
92
|
+
if (ch === 10) {
|
|
93
|
+
this.line++;
|
|
94
|
+
this.cols = 1;
|
|
95
|
+
} else {
|
|
96
|
+
this.cols++;
|
|
97
|
+
}
|
|
98
|
+
ch = String.fromCharCode(ch);
|
|
99
|
+
} else {
|
|
100
|
+
this.cols += n;
|
|
101
|
+
ch = this.buffer.slice(this.startIndex, this.startIndex + n).toString();
|
|
102
|
+
}
|
|
103
|
+
if (shouldUpdate) this.updateBuffer(n);
|
|
104
|
+
return ch;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
updateBufferBoundary(n = 1) { //n: number of characters read
|
|
108
|
+
this.startIndex += n;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
canRead(n){
|
|
112
|
+
n = n || this.startIndex;
|
|
113
|
+
return this.buffer.length - n + 1 > 0;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
module.exports = BufferSource;
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
const whiteSpaces = [" ", "\n", "\t"];
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class StringSource{
|
|
5
|
+
constructor(str){
|
|
6
|
+
this.line = 1;
|
|
7
|
+
this.cols = 0;
|
|
8
|
+
this.buffer = str;
|
|
9
|
+
//a boundary pointer to indicate where from the buffer dat should be read
|
|
10
|
+
// data before this pointer can be deleted to free the memory
|
|
11
|
+
this.startIndex = 0;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
readCh() {
|
|
15
|
+
return this.buffer[this.startIndex++];
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
readChAt(index) {
|
|
19
|
+
return this.buffer[this.startIndex+index];
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
readStr(n,from){
|
|
23
|
+
if(typeof from === "undefined") from = this.startIndex;
|
|
24
|
+
return this.buffer.substring(from, from + n);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
readUpto(stopStr) {
|
|
28
|
+
const inputLength = this.buffer.length;
|
|
29
|
+
const stopLength = stopStr.length;
|
|
30
|
+
|
|
31
|
+
for (let i = this.startIndex; i < inputLength; i++) {
|
|
32
|
+
let match = true;
|
|
33
|
+
for (let j = 0; j < stopLength; j++) {
|
|
34
|
+
if (this.buffer[i + j] !== stopStr[j]) {
|
|
35
|
+
match = false;
|
|
36
|
+
break;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (match) {
|
|
41
|
+
const result = this.buffer.substring(this.startIndex, i);
|
|
42
|
+
this.startIndex = i + stopLength;
|
|
43
|
+
return result;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
throw new Error(`Unexpected end of source. Reading '${stopStr}'`);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
readUptoCloseTag(stopStr) { //stopStr: "</tagname"
|
|
51
|
+
const inputLength = this.buffer.length;
|
|
52
|
+
const stopLength = stopStr.length;
|
|
53
|
+
let stopIndex = 0;
|
|
54
|
+
//0: non-matching, 1: matching stop string, 2: matching closing
|
|
55
|
+
let match = 0;
|
|
56
|
+
|
|
57
|
+
for (let i = this.startIndex; i < inputLength; i++) {
|
|
58
|
+
if(match === 1){//initial part matched
|
|
59
|
+
if(stopIndex === 0) stopIndex = i;
|
|
60
|
+
if(this.buffer[i] === ' ' || this.buffer[i] === '\t') continue;
|
|
61
|
+
else if(this.buffer[i] === '>'){
|
|
62
|
+
match = 2;
|
|
63
|
+
//tag boundary found
|
|
64
|
+
// this.startIndex
|
|
65
|
+
}
|
|
66
|
+
}else{
|
|
67
|
+
match = 1;
|
|
68
|
+
for (let j = 0; j < stopLength; j++) {
|
|
69
|
+
if (this.buffer[i + j] !== stopStr[j]) {
|
|
70
|
+
match = 0;
|
|
71
|
+
break;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
if (match === 2) {//matched closing part
|
|
76
|
+
const result = this.buffer.substring(this.startIndex, stopIndex - 1 );
|
|
77
|
+
this.startIndex = i + 1;
|
|
78
|
+
return result;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
throw new Error(`Unexpected end of source. Reading '${stopStr}'`);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
readFromBuffer(n, updateIndex){
|
|
86
|
+
let ch;
|
|
87
|
+
if(n===1){
|
|
88
|
+
ch = this.buffer[this.startIndex];
|
|
89
|
+
// if(ch === "\n") {
|
|
90
|
+
// this.line++;
|
|
91
|
+
// this.cols = 1;
|
|
92
|
+
// }else{
|
|
93
|
+
// this.cols++;
|
|
94
|
+
// }
|
|
95
|
+
}else{
|
|
96
|
+
ch = this.buffer.substring(this.startIndex, this.startIndex + n);
|
|
97
|
+
// if("".indexOf("\n") !== -1){
|
|
98
|
+
// //TODO: handle the scenario when there are multiple lines
|
|
99
|
+
// //TODO: col should be set to number of chars after last '\n'
|
|
100
|
+
// // this.cols = 1;
|
|
101
|
+
// }else{
|
|
102
|
+
// this.cols += n;
|
|
103
|
+
|
|
104
|
+
// }
|
|
105
|
+
}
|
|
106
|
+
if(updateIndex) this.updateBufferBoundary(n);
|
|
107
|
+
return ch;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
//TODO: rename to updateBufferReadIndex
|
|
111
|
+
|
|
112
|
+
updateBufferBoundary(n = 1) { //n: number of characters read
|
|
113
|
+
this.startIndex += n;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
canRead(n){
|
|
117
|
+
n = n || this.startIndex;
|
|
118
|
+
return this.buffer.length - n + 1 > 0;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
module.exports = StringSource;
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
const ampEntity = { regex: /&(amp|#38|#x26);/g, val : "&"};
|
|
2
|
+
const htmlEntities = {
|
|
3
|
+
"space": { regex: /&(nbsp|#160);/g, val: " " },
|
|
4
|
+
// "lt" : { regex: /&(lt|#60);/g, val: "<" },
|
|
5
|
+
// "gt" : { regex: /&(gt|#62);/g, val: ">" },
|
|
6
|
+
// "amp" : { regex: /&(amp|#38);/g, val: "&" },
|
|
7
|
+
// "quot" : { regex: /&(quot|#34);/g, val: "\"" },
|
|
8
|
+
// "apos" : { regex: /&(apos|#39);/g, val: "'" },
|
|
9
|
+
"cent" : { regex: /&(cent|#162);/g, val: "¢" },
|
|
10
|
+
"pound" : { regex: /&(pound|#163);/g, val: "£" },
|
|
11
|
+
"yen" : { regex: /&(yen|#165);/g, val: "¥" },
|
|
12
|
+
"euro" : { regex: /&(euro|#8364);/g, val: "€" },
|
|
13
|
+
"copyright" : { regex: /&(copy|#169);/g, val: "©" },
|
|
14
|
+
"reg" : { regex: /&(reg|#174);/g, val: "®" },
|
|
15
|
+
"inr" : { regex: /&(inr|#8377);/g, val: "₹" },
|
|
16
|
+
"num_dec": { regex: /&#([0-9]{1,7});/g, val : (_, str) => String.fromCharCode(Number.parseInt(str, 10)) },
|
|
17
|
+
"num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val : (_, str) => String.fromCharCode(Number.parseInt(str, 16)) },
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
class EntitiesParser{
|
|
21
|
+
constructor(replaceHtmlEntities) {
|
|
22
|
+
this.replaceHtmlEntities = replaceHtmlEntities;
|
|
23
|
+
this.docTypeEntities = {};
|
|
24
|
+
this.lastEntities = {
|
|
25
|
+
"apos" : { regex: /&(apos|#39|#x27);/g, val : "'"},
|
|
26
|
+
"gt" : { regex: /&(gt|#62|#x3E);/g, val : ">"},
|
|
27
|
+
"lt" : { regex: /&(lt|#60|#x3C);/g, val : "<"},
|
|
28
|
+
"quot" : { regex: /&(quot|#34|#x22);/g, val : "\""},
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
addExternalEntities(externalEntities){
|
|
33
|
+
const entKeys = Object.keys(externalEntities);
|
|
34
|
+
for (let i = 0; i < entKeys.length; i++) {
|
|
35
|
+
const ent = entKeys[i];
|
|
36
|
+
this.addExternalEntity(ent,externalEntities[ent])
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
addExternalEntity(key,val){
|
|
40
|
+
validateEntityName(key);
|
|
41
|
+
if(val.indexOf("&") !== -1) {
|
|
42
|
+
reportWarning(`Entity ${key} is not added as '&' is found in value;`)
|
|
43
|
+
return;
|
|
44
|
+
}else{
|
|
45
|
+
this.lastEntities[ent] = {
|
|
46
|
+
regex: new RegExp("&"+key+";","g"),
|
|
47
|
+
val : val
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
addDocTypeEntities(entities){
|
|
53
|
+
const entKeys = Object.keys(entities);
|
|
54
|
+
for (let i = 0; i < entKeys.length; i++) {
|
|
55
|
+
const ent = entKeys[i];
|
|
56
|
+
this.docTypeEntities[ent] = {
|
|
57
|
+
regex: new RegExp("&"+ent+";","g"),
|
|
58
|
+
val : entities[ent]
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
parse(val){
|
|
64
|
+
return this.replaceEntitiesValue(val)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* 1. Replace DOCTYPE entities
|
|
69
|
+
* 2. Replace external entities
|
|
70
|
+
* 3. Replace HTML entities if asked
|
|
71
|
+
* @param {string} val
|
|
72
|
+
*/
|
|
73
|
+
replaceEntitiesValue(val){
|
|
74
|
+
if(typeof val === "string" && val.length > 0){
|
|
75
|
+
for(let entityName in this.docTypeEntities){
|
|
76
|
+
const entity = this.docTypeEntities[entityName];
|
|
77
|
+
val = val.replace( entity.regx, entity.val);
|
|
78
|
+
}
|
|
79
|
+
for(let entityName in this.lastEntities){
|
|
80
|
+
const entity = this.lastEntities[entityName];
|
|
81
|
+
val = val.replace( entity.regex, entity.val);
|
|
82
|
+
}
|
|
83
|
+
if(this.replaceHtmlEntities){
|
|
84
|
+
for(let entityName in htmlEntities){
|
|
85
|
+
const entity = htmlEntities[entityName];
|
|
86
|
+
val = val.replace( entity.regex, entity.val);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
val = val.replace( ampEntity.regex, ampEntity.val);
|
|
90
|
+
}
|
|
91
|
+
return val;
|
|
92
|
+
}
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
//an entity name should not contains special characters that may be used in regex
|
|
96
|
+
//Eg !?\\\/[]$%{}^&*()<>
|
|
97
|
+
const specialChar = "!?\\\/[]$%{}^&*()<>|+";
|
|
98
|
+
|
|
99
|
+
function validateEntityName(name){
|
|
100
|
+
for (let i = 0; i < specialChar.length; i++) {
|
|
101
|
+
const ch = specialChar[i];
|
|
102
|
+
if(name.indexOf(ch) !== -1) throw new Error(`Invalid character ${ch} in entity name`);
|
|
103
|
+
}
|
|
104
|
+
return name;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
module.exports = EntitiesParser;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
class boolParser{
|
|
2
|
+
constructor(trueList, falseList){
|
|
3
|
+
if(trueList)
|
|
4
|
+
this.trueList = trueList;
|
|
5
|
+
else
|
|
6
|
+
this.trueList = ["true"];
|
|
7
|
+
|
|
8
|
+
if(falseList)
|
|
9
|
+
this.falseList = falseList;
|
|
10
|
+
else
|
|
11
|
+
this.falseList = ["false"];
|
|
12
|
+
}
|
|
13
|
+
parse(val){
|
|
14
|
+
if (typeof val === 'string') {
|
|
15
|
+
//TODO: performance: don't convert
|
|
16
|
+
const temp = val.toLowerCase();
|
|
17
|
+
if(this.trueList.indexOf(temp) !== -1) return true;
|
|
18
|
+
else if(this.falseList.indexOf(temp) !== -1 ) return false;
|
|
19
|
+
}
|
|
20
|
+
return val;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
module.exports = boolParser;
|