befly 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,362 @@
1
+ import { isName } from './util.js';
2
+
3
+ //TODO: handle comments
4
+ export default function readDocType(xmlData, i) {
5
+ const entities = {};
6
+ if (xmlData[i + 3] === 'O' && xmlData[i + 4] === 'C' && xmlData[i + 5] === 'T' && xmlData[i + 6] === 'Y' && xmlData[i + 7] === 'P' && xmlData[i + 8] === 'E') {
7
+ i = i + 9;
8
+ let angleBracketsCount = 1;
9
+ let hasBody = false,
10
+ comment = false;
11
+ let exp = '';
12
+ for (; i < xmlData.length; i++) {
13
+ if (xmlData[i] === '<' && !comment) {
14
+ //Determine the tag type
15
+ if (hasBody && hasSeq(xmlData, '!ENTITY', i)) {
16
+ i += 7;
17
+ let entityName, val;
18
+ [entityName, val, i] = readEntityExp(xmlData, i + 1);
19
+ if (val.indexOf('&') === -1)
20
+ //Parameter entities are not supported
21
+ entities[entityName] = {
22
+ regx: RegExp(`&${entityName};`, 'g'),
23
+ val: val
24
+ };
25
+ } else if (hasBody && hasSeq(xmlData, '!ELEMENT', i)) {
26
+ i += 8; //Not supported
27
+ const { index } = readElementExp(xmlData, i + 1);
28
+ i = index;
29
+ } else if (hasBody && hasSeq(xmlData, '!ATTLIST', i)) {
30
+ i += 8; //Not supported
31
+ // const {index} = readAttlistExp(xmlData,i+1);
32
+ // i = index;
33
+ } else if (hasBody && hasSeq(xmlData, '!NOTATION', i)) {
34
+ i += 9; //Not supported
35
+ const { index } = readNotationExp(xmlData, i + 1);
36
+ i = index;
37
+ } else if (hasSeq(xmlData, '!--', i)) comment = true;
38
+ else throw new Error(`Invalid DOCTYPE`);
39
+
40
+ angleBracketsCount++;
41
+ exp = '';
42
+ } else if (xmlData[i] === '>') {
43
+ //Read tag content
44
+ if (comment) {
45
+ if (xmlData[i - 1] === '-' && xmlData[i - 2] === '-') {
46
+ comment = false;
47
+ angleBracketsCount--;
48
+ }
49
+ } else {
50
+ angleBracketsCount--;
51
+ }
52
+ if (angleBracketsCount === 0) {
53
+ break;
54
+ }
55
+ } else if (xmlData[i] === '[') {
56
+ hasBody = true;
57
+ } else {
58
+ exp += xmlData[i];
59
+ }
60
+ }
61
+ if (angleBracketsCount !== 0) {
62
+ throw new Error(`Unclosed DOCTYPE`);
63
+ }
64
+ } else {
65
+ throw new Error(`Invalid Tag instead of DOCTYPE`);
66
+ }
67
+ return { entities, i };
68
+ }
69
+
70
+ const skipWhitespace = (data, index) => {
71
+ while (index < data.length && /\s/.test(data[index])) {
72
+ index++;
73
+ }
74
+ return index;
75
+ };
76
+
77
+ function readEntityExp(xmlData, i) {
78
+ //External entities are not supported
79
+ // <!ENTITY ext SYSTEM "http://normal-website.com" >
80
+
81
+ //Parameter entities are not supported
82
+ // <!ENTITY entityname "&anotherElement;">
83
+
84
+ //Internal entities are supported
85
+ // <!ENTITY entityname "replacement text">
86
+
87
+ // Skip leading whitespace after <!ENTITY
88
+ i = skipWhitespace(xmlData, i);
89
+
90
+ // Read entity name
91
+ let entityName = '';
92
+ while (i < xmlData.length && !/\s/.test(xmlData[i]) && xmlData[i] !== '"' && xmlData[i] !== "'") {
93
+ entityName += xmlData[i];
94
+ i++;
95
+ }
96
+ validateEntityName(entityName);
97
+
98
+ // Skip whitespace after entity name
99
+ i = skipWhitespace(xmlData, i);
100
+
101
+ // Check for unsupported constructs (external entities or parameter entities)
102
+ if (xmlData.substring(i, i + 6).toUpperCase() === 'SYSTEM') {
103
+ throw new Error('External entities are not supported');
104
+ } else if (xmlData[i] === '%') {
105
+ throw new Error('Parameter entities are not supported');
106
+ }
107
+
108
+ // Read entity value (internal entity)
109
+ let entityValue = '';
110
+ [i, entityValue] = readIdentifierVal(xmlData, i, 'entity');
111
+ i--;
112
+ return [entityName, entityValue, i];
113
+ }
114
+
115
+ function readNotationExp(xmlData, i) {
116
+ // Skip leading whitespace after <!NOTATION
117
+ i = skipWhitespace(xmlData, i);
118
+
119
+ // Read notation name
120
+ let notationName = '';
121
+ while (i < xmlData.length && !/\s/.test(xmlData[i])) {
122
+ notationName += xmlData[i];
123
+ i++;
124
+ }
125
+ validateEntityName(notationName);
126
+
127
+ // Skip whitespace after notation name
128
+ i = skipWhitespace(xmlData, i);
129
+
130
+ // Check identifier type (SYSTEM or PUBLIC)
131
+ const identifierType = xmlData.substring(i, i + 6).toUpperCase();
132
+ if (identifierType !== 'SYSTEM' && identifierType !== 'PUBLIC') {
133
+ throw new Error(`Expected SYSTEM or PUBLIC, found "${identifierType}"`);
134
+ }
135
+ i += identifierType.length;
136
+
137
+ // Skip whitespace after identifier type
138
+ i = skipWhitespace(xmlData, i);
139
+
140
+ // Read public identifier (if PUBLIC)
141
+ let publicIdentifier = null;
142
+ let systemIdentifier = null;
143
+
144
+ if (identifierType === 'PUBLIC') {
145
+ [i, publicIdentifier] = readIdentifierVal(xmlData, i, 'publicIdentifier');
146
+
147
+ // Skip whitespace after public identifier
148
+ i = skipWhitespace(xmlData, i);
149
+
150
+ // Optionally read system identifier
151
+ if (xmlData[i] === '"' || xmlData[i] === "'") {
152
+ [i, systemIdentifier] = readIdentifierVal(xmlData, i, 'systemIdentifier');
153
+ }
154
+ } else if (identifierType === 'SYSTEM') {
155
+ // Read system identifier (mandatory for SYSTEM)
156
+ [i, systemIdentifier] = readIdentifierVal(xmlData, i, 'systemIdentifier');
157
+
158
+ if (!systemIdentifier) {
159
+ throw new Error('Missing mandatory system identifier for SYSTEM notation');
160
+ }
161
+ }
162
+
163
+ return { notationName, publicIdentifier, systemIdentifier, index: --i };
164
+ }
165
+
166
+ function readIdentifierVal(xmlData, i, type) {
167
+ let identifierVal = '';
168
+ const startChar = xmlData[i];
169
+ if (startChar !== '"' && startChar !== "'") {
170
+ throw new Error(`Expected quoted string, found "${startChar}"`);
171
+ }
172
+ i++;
173
+
174
+ while (i < xmlData.length && xmlData[i] !== startChar) {
175
+ identifierVal += xmlData[i];
176
+ i++;
177
+ }
178
+
179
+ if (xmlData[i] !== startChar) {
180
+ throw new Error(`Unterminated ${type} value`);
181
+ }
182
+ i++;
183
+ return [i, identifierVal];
184
+ }
185
+
186
+ function readElementExp(xmlData, i) {
187
+ // <!ELEMENT br EMPTY>
188
+ // <!ELEMENT div ANY>
189
+ // <!ELEMENT title (#PCDATA)>
190
+ // <!ELEMENT book (title, author+)>
191
+ // <!ELEMENT name (content-model)>
192
+
193
+ // Skip leading whitespace after <!ELEMENT
194
+ i = skipWhitespace(xmlData, i);
195
+
196
+ // Read element name
197
+ let elementName = '';
198
+ while (i < xmlData.length && !/\s/.test(xmlData[i])) {
199
+ elementName += xmlData[i];
200
+ i++;
201
+ }
202
+
203
+ // Validate element name
204
+ if (!validateEntityName(elementName)) {
205
+ throw new Error(`Invalid element name: "${elementName}"`);
206
+ }
207
+
208
+ // Skip whitespace after element name
209
+ i = skipWhitespace(xmlData, i);
210
+ let contentModel = '';
211
+ // Expect '(' to start content model
212
+ if (xmlData[i] === 'E' && hasSeq(xmlData, 'MPTY', i)) i += 4;
213
+ else if (xmlData[i] === 'A' && hasSeq(xmlData, 'NY', i)) i += 2;
214
+ else if (xmlData[i] === '(') {
215
+ i++; // Move past '('
216
+
217
+ // Read content model
218
+ while (i < xmlData.length && xmlData[i] !== ')') {
219
+ contentModel += xmlData[i];
220
+ i++;
221
+ }
222
+ if (xmlData[i] !== ')') {
223
+ throw new Error('Unterminated content model');
224
+ }
225
+ } else {
226
+ throw new Error(`Invalid Element Expression, found "${xmlData[i]}"`);
227
+ }
228
+
229
+ return {
230
+ elementName,
231
+ contentModel: contentModel.trim(),
232
+ index: i
233
+ };
234
+ }
235
+
236
+ function readAttlistExp(xmlData, i) {
237
+ // Skip leading whitespace after <!ATTLIST
238
+ i = skipWhitespace(xmlData, i);
239
+
240
+ // Read element name
241
+ let elementName = '';
242
+ while (i < xmlData.length && !/\s/.test(xmlData[i])) {
243
+ elementName += xmlData[i];
244
+ i++;
245
+ }
246
+
247
+ // Validate element name
248
+ validateEntityName(elementName);
249
+
250
+ // Skip whitespace after element name
251
+ i = skipWhitespace(xmlData, i);
252
+
253
+ // Read attribute name
254
+ let attributeName = '';
255
+ while (i < xmlData.length && !/\s/.test(xmlData[i])) {
256
+ attributeName += xmlData[i];
257
+ i++;
258
+ }
259
+
260
+ // Validate attribute name
261
+ if (!validateEntityName(attributeName)) {
262
+ throw new Error(`Invalid attribute name: "${attributeName}"`);
263
+ }
264
+
265
+ // Skip whitespace after attribute name
266
+ i = skipWhitespace(xmlData, i);
267
+
268
+ // Read attribute type
269
+ let attributeType = '';
270
+ if (xmlData.substring(i, i + 8).toUpperCase() === 'NOTATION') {
271
+ attributeType = 'NOTATION';
272
+ i += 8; // Move past "NOTATION"
273
+
274
+ // Skip whitespace after "NOTATION"
275
+ i = skipWhitespace(xmlData, i);
276
+
277
+ // Expect '(' to start the list of notations
278
+ if (xmlData[i] !== '(') {
279
+ throw new Error(`Expected '(', found "${xmlData[i]}"`);
280
+ }
281
+ i++; // Move past '('
282
+
283
+ // Read the list of allowed notations
284
+ let allowedNotations = [];
285
+ while (i < xmlData.length && xmlData[i] !== ')') {
286
+ let notation = '';
287
+ while (i < xmlData.length && xmlData[i] !== '|' && xmlData[i] !== ')') {
288
+ notation += xmlData[i];
289
+ i++;
290
+ }
291
+
292
+ // Validate notation name
293
+ notation = notation.trim();
294
+ if (!validateEntityName(notation)) {
295
+ throw new Error(`Invalid notation name: "${notation}"`);
296
+ }
297
+
298
+ allowedNotations.push(notation);
299
+
300
+ // Skip '|' separator or exit loop
301
+ if (xmlData[i] === '|') {
302
+ i++; // Move past '|'
303
+ i = skipWhitespace(xmlData, i); // Skip optional whitespace after '|'
304
+ }
305
+ }
306
+
307
+ if (xmlData[i] !== ')') {
308
+ throw new Error('Unterminated list of notations');
309
+ }
310
+ i++; // Move past ')'
311
+
312
+ // Store the allowed notations as part of the attribute type
313
+ attributeType += ' (' + allowedNotations.join('|') + ')';
314
+ } else {
315
+ // Handle simple types (e.g., CDATA, ID, IDREF, etc.)
316
+ while (i < xmlData.length && !/\s/.test(xmlData[i])) {
317
+ attributeType += xmlData[i];
318
+ i++;
319
+ }
320
+
321
+ // Validate simple attribute type
322
+ const validTypes = ['CDATA', 'ID', 'IDREF', 'IDREFS', 'ENTITY', 'ENTITIES', 'NMTOKEN', 'NMTOKENS'];
323
+ if (!validTypes.includes(attributeType.toUpperCase())) {
324
+ throw new Error(`Invalid attribute type: "${attributeType}"`);
325
+ }
326
+ }
327
+
328
+ // Skip whitespace after attribute type
329
+ i = skipWhitespace(xmlData, i);
330
+
331
+ // Read default value
332
+ let defaultValue = '';
333
+ if (xmlData.substring(i, i + 8).toUpperCase() === '#REQUIRED') {
334
+ defaultValue = '#REQUIRED';
335
+ i += 8;
336
+ } else if (xmlData.substring(i, i + 7).toUpperCase() === '#IMPLIED') {
337
+ defaultValue = '#IMPLIED';
338
+ i += 7;
339
+ } else {
340
+ [i, defaultValue] = readIdentifierVal(xmlData, i, 'ATTLIST');
341
+ }
342
+
343
+ return {
344
+ elementName,
345
+ attributeName,
346
+ attributeType,
347
+ defaultValue,
348
+ index: i
349
+ };
350
+ }
351
+
352
+ function hasSeq(data, seq, i) {
353
+ for (let j = 0; j < seq.length; j++) {
354
+ if (seq[j] !== data[i + j + 1]) return false;
355
+ }
356
+ return true;
357
+ }
358
+
359
+ function validateEntityName(name) {
360
+ if (isName(name)) return name;
361
+ else throw new Error(`Invalid entity name ${name}`);
362
+ }
@@ -0,0 +1,45 @@
1
+ export const defaultOptions = {
2
+ preserveOrder: false,
3
+ attributeNamePrefix: '@_',
4
+ attributesGroupName: false,
5
+ textNodeName: '#text',
6
+ ignoreAttributes: true,
7
+ removeNSPrefix: false, // remove NS from tag name or attribute name if true
8
+ allowBooleanAttributes: false, //a tag can have attributes without any value
9
+ //ignoreRootElement : false,
10
+ parseTagValue: true,
11
+ parseAttributeValue: false,
12
+ trimValues: true, //Trim string values of tag and attributes
13
+ cdataPropName: false,
14
+ numberParseOptions: {
15
+ hex: true,
16
+ leadingZeros: true,
17
+ eNotation: true
18
+ },
19
+ tagValueProcessor: function (tagName, val) {
20
+ return val;
21
+ },
22
+ attributeValueProcessor: function (attrName, val) {
23
+ return val;
24
+ },
25
+ stopNodes: [], //nested tags will not be parsed even for errors
26
+ alwaysCreateTextNode: false,
27
+ isArray: () => false,
28
+ commentPropName: false,
29
+ unpairedTags: [],
30
+ processEntities: true,
31
+ htmlEntities: false,
32
+ ignoreDeclaration: false,
33
+ ignorePiTags: false,
34
+ transformTagName: false,
35
+ transformAttributeName: false,
36
+ updateTag: function (tagName, jPath, attrs) {
37
+ return tagName;
38
+ },
39
+ // skipEmptyListItem: false
40
+ captureMetaData: false
41
+ };
42
+
43
+ export const buildOptions = function (options) {
44
+ return Object.assign({}, defaultOptions, options);
45
+ };