@cj-tech-master/excelts 1.6.3-canary.20251224193141.8bff9dd → 1.6.3-canary.20251226035947.ef0b4f2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -4
- package/README_zh.md +5 -4
- package/dist/browser/excelts.esm.js +2447 -3094
- package/dist/browser/excelts.esm.js.map +1 -1
- package/dist/browser/excelts.esm.min.js +23 -25
- package/dist/browser/excelts.iife.js +2448 -3095
- package/dist/browser/excelts.iife.js.map +1 -1
- package/dist/browser/excelts.iife.min.js +23 -25
- package/dist/cjs/csv/csv-core.js +2 -2
- package/dist/cjs/csv/csv-stream.js +1 -1
- package/dist/cjs/csv/csv.base.js +10 -27
- package/dist/cjs/csv/csv.js +4 -12
- package/dist/cjs/doc/column.js +21 -0
- package/dist/cjs/doc/worksheet.js +4 -0
- package/dist/cjs/stream/xlsx/workbook-writer.js +4 -4
- package/dist/cjs/stream/xlsx/worksheet-writer.js +8 -1
- package/dist/cjs/utils/datetime.js +648 -0
- package/dist/cjs/utils/parse-sax.js +1190 -12
- package/dist/cjs/utils/unzip/zip-parser.js +11 -0
- package/dist/cjs/utils/zip/compress.base.js +3 -0
- package/dist/cjs/utils/zip/compress.browser.js +74 -30
- package/dist/cjs/utils/zip/deflate-fallback.js +575 -0
- package/dist/cjs/utils/zip/streaming-zip.js +264 -0
- package/dist/cjs/xlsx/xform/sheet/col-breaks-xform.js +38 -0
- package/dist/cjs/xlsx/xform/sheet/page-breaks-xform.js +13 -1
- package/dist/cjs/xlsx/xform/sheet/row-breaks-xform.js +11 -13
- package/dist/cjs/xlsx/xform/sheet/worksheet-xform.js +7 -2
- package/dist/cjs/xlsx/xlsx.browser.js +10 -53
- package/dist/cjs/xlsx/xlsx.js +27 -59
- package/dist/esm/csv/csv-core.js +2 -2
- package/dist/esm/csv/csv-stream.js +1 -1
- package/dist/esm/csv/csv.base.js +10 -24
- package/dist/esm/csv/csv.js +4 -12
- package/dist/esm/doc/column.js +21 -0
- package/dist/esm/doc/worksheet.js +4 -0
- package/dist/esm/stream/xlsx/workbook-writer.js +1 -1
- package/dist/esm/stream/xlsx/worksheet-writer.js +8 -1
- package/dist/esm/utils/datetime.js +639 -0
- package/dist/esm/utils/parse-sax.js +1188 -12
- package/dist/esm/utils/unzip/zip-parser.js +11 -0
- package/dist/esm/utils/zip/compress.base.js +3 -0
- package/dist/esm/utils/zip/compress.browser.js +76 -31
- package/dist/esm/utils/zip/deflate-fallback.js +570 -0
- package/dist/esm/utils/zip/streaming-zip.js +259 -0
- package/dist/esm/xlsx/xform/sheet/col-breaks-xform.js +35 -0
- package/dist/esm/xlsx/xform/sheet/page-breaks-xform.js +13 -1
- package/dist/esm/xlsx/xform/sheet/row-breaks-xform.js +11 -13
- package/dist/esm/xlsx/xform/sheet/worksheet-xform.js +7 -2
- package/dist/esm/xlsx/xlsx.browser.js +10 -53
- package/dist/esm/xlsx/xlsx.js +27 -59
- package/dist/types/csv/csv-core.d.ts +6 -6
- package/dist/types/csv/csv.base.d.ts +4 -3
- package/dist/types/doc/column.d.ts +6 -0
- package/dist/types/doc/worksheet.d.ts +3 -1
- package/dist/types/stream/xlsx/workbook-writer.d.ts +1 -1
- package/dist/types/stream/xlsx/worksheet-writer.d.ts +3 -1
- package/dist/types/types.d.ts +6 -0
- package/dist/types/utils/datetime.d.ts +85 -0
- package/dist/types/utils/parse-sax.d.ts +108 -1
- package/dist/types/utils/unzip/zip-parser.d.ts +5 -0
- package/dist/types/utils/zip/compress.base.d.ts +3 -0
- package/dist/types/utils/zip/compress.browser.d.ts +27 -18
- package/dist/types/utils/zip/deflate-fallback.d.ts +39 -0
- package/dist/types/utils/zip/streaming-zip.d.ts +96 -0
- package/dist/types/xlsx/xform/sheet/col-breaks-xform.d.ts +16 -0
- package/dist/types/xlsx/xform/sheet/page-breaks-xform.d.ts +4 -0
- package/dist/types/xlsx/xform/sheet/row-breaks-xform.d.ts +4 -0
- package/package.json +7 -28
|
@@ -1,31 +1,1209 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* High-performance SAX XML parser
|
|
4
|
+
*
|
|
5
|
+
* Minimal implementation optimized for Excel XML parsing.
|
|
6
|
+
* Supports: opentag, text, closetag, error events.
|
|
7
|
+
* Zero external dependencies.
|
|
8
|
+
*
|
|
9
|
+
* Based on XML 1.0 specification with optimizations for common Excel XML patterns.
|
|
10
|
+
*/
|
|
2
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.SaxesParser = void 0;
|
|
3
13
|
exports.parseSax = parseSax;
|
|
4
|
-
const saxes_1 = require("saxes");
|
|
5
14
|
const utils_1 = require("./utils");
|
|
15
|
+
// ============================================================================
|
|
16
|
+
// Character codes (for fast comparison)
|
|
17
|
+
// ============================================================================
|
|
18
|
+
const TAB = 9;
|
|
19
|
+
const NL = 0xa;
|
|
20
|
+
const CR = 0xd;
|
|
21
|
+
const SPACE = 0x20;
|
|
22
|
+
const BANG = 0x21; // !
|
|
23
|
+
const DQUOTE = 0x22; // "
|
|
24
|
+
const AMP = 0x26; // &
|
|
25
|
+
const SQUOTE = 0x27; // '
|
|
26
|
+
const MINUS = 0x2d; // -
|
|
27
|
+
const FORWARD_SLASH = 0x2f; // /
|
|
28
|
+
const SEMICOLON = 0x3b; // ;
|
|
29
|
+
const LESS = 0x3c; // <
|
|
30
|
+
const EQUAL = 0x3d; // =
|
|
31
|
+
const GREATER = 0x3e; // >
|
|
32
|
+
const QUESTION = 0x3f; // ?
|
|
33
|
+
const OPEN_BRACKET = 0x5b; // [
|
|
34
|
+
const CLOSE_BRACKET = 0x5d; // ]
|
|
35
|
+
const HASH = 0x23; // #
|
|
36
|
+
// ============================================================================
|
|
37
|
+
// Pre-computed lookup tables for performance
|
|
38
|
+
// ============================================================================
|
|
39
|
+
// ASCII character lookup (0-127) for String.fromCharCode
|
|
40
|
+
const ASCII_CHARS = new Array(128);
|
|
41
|
+
for (let i = 0; i < 128; i++) {
|
|
42
|
+
ASCII_CHARS[i] = String.fromCharCode(i);
|
|
43
|
+
}
|
|
44
|
+
// Fast charFromCode - use lookup for ASCII, fallback for others
|
|
45
|
+
function charFromCode(c) {
|
|
46
|
+
return c < 128 ? ASCII_CHARS[c] : String.fromCodePoint(c);
|
|
47
|
+
}
|
|
48
|
+
// Bitmap for ASCII name start chars (a-zA-Z_:)
|
|
49
|
+
const NAME_START_CHAR_ASCII = new Uint8Array(128);
|
|
50
|
+
for (let i = 0x61; i <= 0x7a; i++) {
|
|
51
|
+
NAME_START_CHAR_ASCII[i] = 1;
|
|
52
|
+
} // a-z
|
|
53
|
+
for (let i = 0x41; i <= 0x5a; i++) {
|
|
54
|
+
NAME_START_CHAR_ASCII[i] = 1;
|
|
55
|
+
} // A-Z
|
|
56
|
+
NAME_START_CHAR_ASCII[0x5f] = 1; // _
|
|
57
|
+
NAME_START_CHAR_ASCII[0x3a] = 1; // :
|
|
58
|
+
// Bitmap for ASCII name chars (a-zA-Z0-9_:-.)
|
|
59
|
+
const NAME_CHAR_ASCII = new Uint8Array(128);
|
|
60
|
+
for (let i = 0x61; i <= 0x7a; i++) {
|
|
61
|
+
NAME_CHAR_ASCII[i] = 1;
|
|
62
|
+
} // a-z
|
|
63
|
+
for (let i = 0x41; i <= 0x5a; i++) {
|
|
64
|
+
NAME_CHAR_ASCII[i] = 1;
|
|
65
|
+
} // A-Z
|
|
66
|
+
for (let i = 0x30; i <= 0x39; i++) {
|
|
67
|
+
NAME_CHAR_ASCII[i] = 1;
|
|
68
|
+
} // 0-9
|
|
69
|
+
NAME_CHAR_ASCII[0x5f] = 1; // _
|
|
70
|
+
NAME_CHAR_ASCII[0x3a] = 1; // :
|
|
71
|
+
NAME_CHAR_ASCII[0x2d] = 1; // -
|
|
72
|
+
NAME_CHAR_ASCII[0x2e] = 1; // .
|
|
73
|
+
// ============================================================================
|
|
74
|
+
// Character classification (inlined for performance)
|
|
75
|
+
// ============================================================================
|
|
76
|
+
// isS: space characters (XML whitespace)
|
|
77
|
+
function isS(c) {
|
|
78
|
+
return c === SPACE || c === NL || c === CR || c === TAB;
|
|
79
|
+
}
|
|
80
|
+
// isQuote: quote characters
|
|
81
|
+
function isQuote(c) {
|
|
82
|
+
return c === DQUOTE || c === SQUOTE;
|
|
83
|
+
}
|
|
84
|
+
// isNameStartChar: valid first character of XML name
|
|
85
|
+
// Optimized for common ASCII range first
|
|
86
|
+
function isNameStartChar(c) {
|
|
87
|
+
// Fast path: ASCII lookup
|
|
88
|
+
if (c < 128) {
|
|
89
|
+
return NAME_START_CHAR_ASCII[c] === 1;
|
|
90
|
+
}
|
|
91
|
+
// Extended ranges (less common in Excel XML)
|
|
92
|
+
return ((c >= 0xc0 && c <= 0xd6) ||
|
|
93
|
+
(c >= 0xd8 && c <= 0xf6) ||
|
|
94
|
+
(c >= 0xf8 && c <= 0x2ff) ||
|
|
95
|
+
(c >= 0x370 && c <= 0x37d) ||
|
|
96
|
+
(c >= 0x37f && c <= 0x1fff) ||
|
|
97
|
+
c === 0x200c ||
|
|
98
|
+
c === 0x200d ||
|
|
99
|
+
(c >= 0x2070 && c <= 0x218f) ||
|
|
100
|
+
(c >= 0x2c00 && c <= 0x2fef) ||
|
|
101
|
+
(c >= 0x3001 && c <= 0xd7ff) ||
|
|
102
|
+
(c >= 0xf900 && c <= 0xfdcf) ||
|
|
103
|
+
(c >= 0xfdf0 && c <= 0xfffd) ||
|
|
104
|
+
(c >= 0x10000 && c <= 0xeffff));
|
|
105
|
+
}
|
|
106
|
+
// isNameChar: valid character in XML name (includes digits, hyphen, period)
|
|
107
|
+
function isNameChar(c) {
|
|
108
|
+
// Fast path: ASCII lookup
|
|
109
|
+
if (c < 128) {
|
|
110
|
+
return NAME_CHAR_ASCII[c] === 1;
|
|
111
|
+
}
|
|
112
|
+
// Extended ranges
|
|
113
|
+
return (c === 0xb7 ||
|
|
114
|
+
(c >= 0xc0 && c <= 0xd6) ||
|
|
115
|
+
(c >= 0xd8 && c <= 0xf6) ||
|
|
116
|
+
(c >= 0xf8 && c <= 0x2ff) ||
|
|
117
|
+
(c >= 0x300 && c <= 0x36f) ||
|
|
118
|
+
(c >= 0x370 && c <= 0x37d) ||
|
|
119
|
+
(c >= 0x37f && c <= 0x1fff) ||
|
|
120
|
+
c === 0x200c ||
|
|
121
|
+
c === 0x200d ||
|
|
122
|
+
(c >= 0x203f && c <= 0x2040) ||
|
|
123
|
+
(c >= 0x2070 && c <= 0x218f) ||
|
|
124
|
+
(c >= 0x2c00 && c <= 0x2fef) ||
|
|
125
|
+
(c >= 0x3001 && c <= 0xd7ff) ||
|
|
126
|
+
(c >= 0xf900 && c <= 0xfdcf) ||
|
|
127
|
+
(c >= 0xfdf0 && c <= 0xfffd) ||
|
|
128
|
+
(c >= 0x10000 && c <= 0xeffff));
|
|
129
|
+
}
|
|
130
|
+
// isChar10: valid XML 1.0 character
|
|
131
|
+
function isChar10(c) {
|
|
132
|
+
return ((c >= SPACE && c <= 0xd7ff) ||
|
|
133
|
+
c === NL ||
|
|
134
|
+
c === CR ||
|
|
135
|
+
c === TAB ||
|
|
136
|
+
(c >= 0xe000 && c <= 0xfffd) ||
|
|
137
|
+
(c >= 0x10000 && c <= 0x10ffff));
|
|
138
|
+
}
|
|
139
|
+
// ============================================================================
|
|
140
|
+
// Built-in XML entities
|
|
141
|
+
// ============================================================================
|
|
142
|
+
const XML_ENTITIES = {
|
|
143
|
+
amp: "&",
|
|
144
|
+
gt: ">",
|
|
145
|
+
lt: "<",
|
|
146
|
+
quot: '"',
|
|
147
|
+
apos: "'"
|
|
148
|
+
};
|
|
149
|
+
// ============================================================================
|
|
150
|
+
// Parser States
|
|
151
|
+
// ============================================================================
|
|
152
|
+
const S_TEXT = 0;
|
|
153
|
+
const S_OPEN_WAKA = 1; // <
|
|
154
|
+
const S_OPEN_WAKA_BANG = 2; // <!
|
|
155
|
+
const S_OPEN_TAG = 3; // <tagname
|
|
156
|
+
const S_OPEN_TAG_SLASH = 4; // <tagname /
|
|
157
|
+
const S_ATTRIB = 5; // <tagname attr
|
|
158
|
+
const S_ATTRIB_NAME = 6; // <tagname attr
|
|
159
|
+
const S_ATTRIB_NAME_SAW_WHITE = 7;
|
|
160
|
+
const S_ATTRIB_VALUE = 8; // <tagname attr=
|
|
161
|
+
const S_ATTRIB_VALUE_QUOTED = 9; // <tagname attr="
|
|
162
|
+
const S_ATTRIB_VALUE_CLOSED = 10;
|
|
163
|
+
const S_CLOSE_TAG = 11; // </tagname
|
|
164
|
+
const S_CLOSE_TAG_SAW_WHITE = 12;
|
|
165
|
+
const S_COMMENT = 13; // <!--
|
|
166
|
+
const S_COMMENT_ENDING = 14; // <!-- text -
|
|
167
|
+
const S_COMMENT_ENDED = 15; // <!-- text --
|
|
168
|
+
const S_CDATA = 16; // <![CDATA[
|
|
169
|
+
const S_CDATA_ENDING = 17; // <![CDATA[ text ]
|
|
170
|
+
const S_CDATA_ENDING_2 = 18; // <![CDATA[ text ]]
|
|
171
|
+
const S_PI = 19; // <?
|
|
172
|
+
const S_PI_ENDING = 20; // <? text ?
|
|
173
|
+
const S_DOCTYPE = 21; // <!DOCTYPE
|
|
174
|
+
const S_DOCTYPE_QUOTE = 22;
|
|
175
|
+
const S_DOCTYPE_DTD = 23;
|
|
176
|
+
const S_DOCTYPE_DTD_QUOTED = 24;
|
|
177
|
+
const S_ENTITY = 25; // &entity;
|
|
178
|
+
// ============================================================================
|
|
179
|
+
// SaxesParser Class - Minimal implementation for Excel XML
|
|
180
|
+
// ============================================================================
|
|
181
|
+
class SaxesParser {
|
|
182
|
+
constructor(opt) {
|
|
183
|
+
// Parser state
|
|
184
|
+
this.state = S_TEXT;
|
|
185
|
+
this.chunk = "";
|
|
186
|
+
this.i = 0;
|
|
187
|
+
this.prevI = 0;
|
|
188
|
+
this.text = "";
|
|
189
|
+
this.name = "";
|
|
190
|
+
this.q = null;
|
|
191
|
+
this.tags = [];
|
|
192
|
+
this.tag = null;
|
|
193
|
+
this.attribList = [];
|
|
194
|
+
this.entity = "";
|
|
195
|
+
this.entityReturnState = S_TEXT;
|
|
196
|
+
this.openWakaBang = "";
|
|
197
|
+
this.sawRoot = false;
|
|
198
|
+
this.closedRoot = false;
|
|
199
|
+
this._closed = false;
|
|
200
|
+
this.reportedTextBeforeRoot = false;
|
|
201
|
+
this.reportedTextAfterRoot = false;
|
|
202
|
+
// Position tracking
|
|
203
|
+
this.line = 1;
|
|
204
|
+
this.column = 0;
|
|
205
|
+
this.positionAtNewLine = 0;
|
|
206
|
+
this.chunkPosition = 0;
|
|
207
|
+
// Entity storage
|
|
208
|
+
this.ENTITIES = { ...XML_ENTITIES };
|
|
209
|
+
this.trackPosition = opt?.position !== false;
|
|
210
|
+
this.fileName = opt?.fileName;
|
|
211
|
+
this.fragment = opt?.fragment ?? false;
|
|
212
|
+
this._init();
|
|
213
|
+
}
|
|
214
|
+
get closed() {
|
|
215
|
+
return this._closed;
|
|
216
|
+
}
|
|
217
|
+
get position() {
|
|
218
|
+
return this.chunkPosition + this.i;
|
|
219
|
+
}
|
|
220
|
+
_init() {
|
|
221
|
+
this.state = this.fragment ? S_TEXT : S_TEXT;
|
|
222
|
+
this.text = "";
|
|
223
|
+
this.name = "";
|
|
224
|
+
this.q = null;
|
|
225
|
+
this.tags = [];
|
|
226
|
+
this.tag = null;
|
|
227
|
+
this.attribList = [];
|
|
228
|
+
this.entity = "";
|
|
229
|
+
this.openWakaBang = "";
|
|
230
|
+
this.sawRoot = this.fragment;
|
|
231
|
+
this.closedRoot = this.fragment;
|
|
232
|
+
this.reportedTextBeforeRoot = this.fragment;
|
|
233
|
+
this.reportedTextAfterRoot = this.fragment;
|
|
234
|
+
this.carriedFromPrevious = undefined;
|
|
235
|
+
this._closed = false;
|
|
236
|
+
this.line = 1;
|
|
237
|
+
this.column = 0;
|
|
238
|
+
this.positionAtNewLine = 0;
|
|
239
|
+
this.chunkPosition = 0;
|
|
240
|
+
this.chunk = "";
|
|
241
|
+
this.i = 0;
|
|
242
|
+
this.prevI = 0;
|
|
243
|
+
}
|
|
244
|
+
on(name, handler) {
|
|
245
|
+
switch (name) {
|
|
246
|
+
case "text":
|
|
247
|
+
this.textHandler = handler;
|
|
248
|
+
break;
|
|
249
|
+
case "opentag":
|
|
250
|
+
this.openTagHandler = handler;
|
|
251
|
+
break;
|
|
252
|
+
case "closetag":
|
|
253
|
+
this.closeTagHandler = handler;
|
|
254
|
+
break;
|
|
255
|
+
case "error":
|
|
256
|
+
this.errorHandler = handler;
|
|
257
|
+
break;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
off(name) {
|
|
261
|
+
switch (name) {
|
|
262
|
+
case "text":
|
|
263
|
+
this.textHandler = undefined;
|
|
264
|
+
break;
|
|
265
|
+
case "opentag":
|
|
266
|
+
this.openTagHandler = undefined;
|
|
267
|
+
break;
|
|
268
|
+
case "closetag":
|
|
269
|
+
this.closeTagHandler = undefined;
|
|
270
|
+
break;
|
|
271
|
+
case "error":
|
|
272
|
+
this.errorHandler = undefined;
|
|
273
|
+
break;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
// Error handling
|
|
277
|
+
makeError(message) {
|
|
278
|
+
let msg = this.fileName ?? "";
|
|
279
|
+
if (this.trackPosition) {
|
|
280
|
+
if (msg.length > 0) {
|
|
281
|
+
msg += ":";
|
|
282
|
+
}
|
|
283
|
+
msg += `${this.line}:${this.column}`;
|
|
284
|
+
}
|
|
285
|
+
if (msg.length > 0) {
|
|
286
|
+
msg += ": ";
|
|
287
|
+
}
|
|
288
|
+
return new Error(msg + message);
|
|
289
|
+
}
|
|
290
|
+
fail(message) {
|
|
291
|
+
const err = this.makeError(message);
|
|
292
|
+
if (this.errorHandler) {
|
|
293
|
+
this.errorHandler(err);
|
|
294
|
+
}
|
|
295
|
+
else {
|
|
296
|
+
throw err;
|
|
297
|
+
}
|
|
298
|
+
return this;
|
|
299
|
+
}
|
|
300
|
+
// Main write method
|
|
301
|
+
write(chunk) {
|
|
302
|
+
if (this._closed) {
|
|
303
|
+
return this.fail("cannot write after close");
|
|
304
|
+
}
|
|
305
|
+
let end = false;
|
|
306
|
+
if (chunk === null) {
|
|
307
|
+
end = true;
|
|
308
|
+
chunk = "";
|
|
309
|
+
}
|
|
310
|
+
if (this.carriedFromPrevious !== undefined) {
|
|
311
|
+
chunk = this.carriedFromPrevious + chunk;
|
|
312
|
+
this.carriedFromPrevious = undefined;
|
|
313
|
+
}
|
|
314
|
+
let limit = chunk.length;
|
|
315
|
+
if (!end && limit > 0) {
|
|
316
|
+
const lastCode = chunk.charCodeAt(limit - 1);
|
|
317
|
+
// Carry CR or surrogate to next chunk
|
|
318
|
+
if (lastCode === CR || (lastCode >= 0xd800 && lastCode <= 0xdbff)) {
|
|
319
|
+
this.carriedFromPrevious = chunk[limit - 1];
|
|
320
|
+
limit--;
|
|
321
|
+
chunk = chunk.slice(0, limit);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
this.chunk = chunk;
|
|
325
|
+
this.i = 0;
|
|
326
|
+
while (this.i < limit) {
|
|
327
|
+
this.processState();
|
|
328
|
+
}
|
|
329
|
+
this.chunkPosition += limit;
|
|
330
|
+
return end ? this.end() : this;
|
|
331
|
+
}
|
|
332
|
+
close() {
|
|
333
|
+
return this.write(null);
|
|
334
|
+
}
|
|
335
|
+
// Get next character code, handling newlines
|
|
336
|
+
// Optimized: split into fast path (no position) and slow path
|
|
337
|
+
getCode() {
|
|
338
|
+
const { chunk, i } = this;
|
|
339
|
+
this.prevI = i;
|
|
340
|
+
this.i = i + 1;
|
|
341
|
+
if (i >= chunk.length) {
|
|
342
|
+
return -1; // EOC
|
|
343
|
+
}
|
|
344
|
+
const code = chunk.charCodeAt(i);
|
|
345
|
+
// Fast path: common ASCII chars (no surrogates, no CR/LF)
|
|
346
|
+
// 0x0a = LF, 0x0d = CR - both need special handling
|
|
347
|
+
if (code < 0x0a || (code > 0x0d && code < 0xd800)) {
|
|
348
|
+
if (this.trackPosition) {
|
|
349
|
+
this.column++;
|
|
350
|
+
}
|
|
351
|
+
return code;
|
|
352
|
+
}
|
|
353
|
+
// Handle surrogates
|
|
354
|
+
if (code >= 0xd800 && code <= 0xdbff) {
|
|
355
|
+
const next = chunk.charCodeAt(i + 1);
|
|
356
|
+
if (next >= 0xdc00 && next <= 0xdfff) {
|
|
357
|
+
this.i = i + 2;
|
|
358
|
+
if (this.trackPosition) {
|
|
359
|
+
this.column++;
|
|
360
|
+
}
|
|
361
|
+
return 0x10000 + ((code - 0xd800) * 0x400 + (next - 0xdc00));
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
// Handle CR
|
|
365
|
+
if (code === CR) {
|
|
366
|
+
if (chunk.charCodeAt(i + 1) === NL) {
|
|
367
|
+
this.i = i + 2;
|
|
368
|
+
}
|
|
369
|
+
if (this.trackPosition) {
|
|
370
|
+
this.line++;
|
|
371
|
+
this.column = 0;
|
|
372
|
+
this.positionAtNewLine = this.position;
|
|
373
|
+
}
|
|
374
|
+
return NL;
|
|
375
|
+
}
|
|
376
|
+
// Handle LF (code === 0x0a) or other codes between 0x0a-0x0d
|
|
377
|
+
if (code === NL && this.trackPosition) {
|
|
378
|
+
this.line++;
|
|
379
|
+
this.column = 0;
|
|
380
|
+
this.positionAtNewLine = this.position;
|
|
381
|
+
}
|
|
382
|
+
else if (this.trackPosition) {
|
|
383
|
+
this.column++;
|
|
384
|
+
}
|
|
385
|
+
return code;
|
|
386
|
+
}
|
|
387
|
+
unget() {
|
|
388
|
+
this.i = this.prevI;
|
|
389
|
+
if (this.trackPosition) {
|
|
390
|
+
this.column--;
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
// State machine dispatcher
|
|
394
|
+
processState() {
|
|
395
|
+
switch (this.state) {
|
|
396
|
+
case S_TEXT:
|
|
397
|
+
this.sText();
|
|
398
|
+
break;
|
|
399
|
+
case S_OPEN_WAKA:
|
|
400
|
+
this.sOpenWaka();
|
|
401
|
+
break;
|
|
402
|
+
case S_OPEN_WAKA_BANG:
|
|
403
|
+
this.sOpenWakaBang();
|
|
404
|
+
break;
|
|
405
|
+
case S_OPEN_TAG:
|
|
406
|
+
this.sOpenTag();
|
|
407
|
+
break;
|
|
408
|
+
case S_OPEN_TAG_SLASH:
|
|
409
|
+
this.sOpenTagSlash();
|
|
410
|
+
break;
|
|
411
|
+
case S_ATTRIB:
|
|
412
|
+
this.sAttrib();
|
|
413
|
+
break;
|
|
414
|
+
case S_ATTRIB_NAME:
|
|
415
|
+
this.sAttribName();
|
|
416
|
+
break;
|
|
417
|
+
case S_ATTRIB_NAME_SAW_WHITE:
|
|
418
|
+
this.sAttribNameSawWhite();
|
|
419
|
+
break;
|
|
420
|
+
case S_ATTRIB_VALUE:
|
|
421
|
+
this.sAttribValue();
|
|
422
|
+
break;
|
|
423
|
+
case S_ATTRIB_VALUE_QUOTED:
|
|
424
|
+
this.sAttribValueQuoted();
|
|
425
|
+
break;
|
|
426
|
+
case S_ATTRIB_VALUE_CLOSED:
|
|
427
|
+
this.sAttribValueClosed();
|
|
428
|
+
break;
|
|
429
|
+
case S_CLOSE_TAG:
|
|
430
|
+
this.sCloseTag();
|
|
431
|
+
break;
|
|
432
|
+
case S_CLOSE_TAG_SAW_WHITE:
|
|
433
|
+
this.sCloseTagSawWhite();
|
|
434
|
+
break;
|
|
435
|
+
case S_COMMENT:
|
|
436
|
+
this.sComment();
|
|
437
|
+
break;
|
|
438
|
+
case S_COMMENT_ENDING:
|
|
439
|
+
this.sCommentEnding();
|
|
440
|
+
break;
|
|
441
|
+
case S_COMMENT_ENDED:
|
|
442
|
+
this.sCommentEnded();
|
|
443
|
+
break;
|
|
444
|
+
case S_CDATA:
|
|
445
|
+
this.sCData();
|
|
446
|
+
break;
|
|
447
|
+
case S_CDATA_ENDING:
|
|
448
|
+
this.sCDataEnding();
|
|
449
|
+
break;
|
|
450
|
+
case S_CDATA_ENDING_2:
|
|
451
|
+
this.sCDataEnding2();
|
|
452
|
+
break;
|
|
453
|
+
case S_PI:
|
|
454
|
+
this.sPI();
|
|
455
|
+
break;
|
|
456
|
+
case S_PI_ENDING:
|
|
457
|
+
this.sPIEnding();
|
|
458
|
+
break;
|
|
459
|
+
case S_DOCTYPE:
|
|
460
|
+
this.sDoctype();
|
|
461
|
+
break;
|
|
462
|
+
case S_DOCTYPE_QUOTE:
|
|
463
|
+
this.sDoctypeQuote();
|
|
464
|
+
break;
|
|
465
|
+
case S_DOCTYPE_DTD:
|
|
466
|
+
this.sDoctypeDTD();
|
|
467
|
+
break;
|
|
468
|
+
case S_DOCTYPE_DTD_QUOTED:
|
|
469
|
+
this.sDoctypeDTDQuoted();
|
|
470
|
+
break;
|
|
471
|
+
case S_ENTITY:
|
|
472
|
+
this.sEntity();
|
|
473
|
+
break;
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
// ============================================================================
|
|
477
|
+
// State handlers
|
|
478
|
+
// ============================================================================
|
|
479
|
+
sText() {
|
|
480
|
+
// Check if we're inside or outside the root element
|
|
481
|
+
if (this.tags.length !== 0) {
|
|
482
|
+
this.handleTextInRoot();
|
|
483
|
+
}
|
|
484
|
+
else {
|
|
485
|
+
this.handleTextOutsideRoot();
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
handleTextInRoot() {
|
|
489
|
+
const { chunk } = this;
|
|
490
|
+
let { i: start } = this;
|
|
491
|
+
const handler = this.textHandler;
|
|
492
|
+
while (true) {
|
|
493
|
+
const c = this.getCode();
|
|
494
|
+
if (c === -1) {
|
|
495
|
+
// End of chunk
|
|
496
|
+
if (handler && start < this.i) {
|
|
497
|
+
this.text += chunk.slice(start, this.i);
|
|
498
|
+
}
|
|
499
|
+
return;
|
|
500
|
+
}
|
|
501
|
+
if (c === LESS) {
|
|
502
|
+
// Start of tag
|
|
503
|
+
if (handler) {
|
|
504
|
+
const slice = chunk.slice(start, this.prevI);
|
|
505
|
+
if (this.text.length > 0) {
|
|
506
|
+
handler(this.text + slice);
|
|
507
|
+
this.text = "";
|
|
508
|
+
}
|
|
509
|
+
else if (slice.length > 0) {
|
|
510
|
+
handler(slice);
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
this.state = S_OPEN_WAKA;
|
|
514
|
+
return;
|
|
515
|
+
}
|
|
516
|
+
if (c === AMP) {
|
|
517
|
+
// Entity reference
|
|
518
|
+
if (handler) {
|
|
519
|
+
this.text += chunk.slice(start, this.prevI);
|
|
520
|
+
}
|
|
521
|
+
this.state = S_ENTITY;
|
|
522
|
+
this.entityReturnState = S_TEXT;
|
|
523
|
+
this.entity = "";
|
|
524
|
+
return;
|
|
525
|
+
}
|
|
526
|
+
if (c === NL) {
|
|
527
|
+
// Handle newline in text
|
|
528
|
+
if (handler) {
|
|
529
|
+
this.text += chunk.slice(start, this.prevI) + "\n";
|
|
530
|
+
}
|
|
531
|
+
start = this.i;
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
handleTextOutsideRoot() {
|
|
536
|
+
const { chunk } = this;
|
|
537
|
+
let { i: start } = this;
|
|
538
|
+
const handler = this.textHandler;
|
|
539
|
+
let nonSpace = false;
|
|
540
|
+
while (true) {
|
|
541
|
+
const c = this.getCode();
|
|
542
|
+
if (c === -1) {
|
|
543
|
+
// End of chunk
|
|
544
|
+
if (handler && start < this.i) {
|
|
545
|
+
this.text += chunk.slice(start, this.i);
|
|
546
|
+
}
|
|
547
|
+
break;
|
|
548
|
+
}
|
|
549
|
+
if (c === LESS) {
|
|
550
|
+
// Start of tag
|
|
551
|
+
if (handler) {
|
|
552
|
+
const slice = chunk.slice(start, this.prevI);
|
|
553
|
+
if (this.text.length > 0) {
|
|
554
|
+
handler(this.text + slice);
|
|
555
|
+
this.text = "";
|
|
556
|
+
}
|
|
557
|
+
else if (slice.length > 0) {
|
|
558
|
+
handler(slice);
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
this.state = S_OPEN_WAKA;
|
|
562
|
+
break;
|
|
563
|
+
}
|
|
564
|
+
if (c === AMP) {
|
|
565
|
+
// Entity reference
|
|
566
|
+
if (handler) {
|
|
567
|
+
this.text += chunk.slice(start, this.prevI);
|
|
568
|
+
}
|
|
569
|
+
this.state = S_ENTITY;
|
|
570
|
+
this.entityReturnState = S_TEXT;
|
|
571
|
+
this.entity = "";
|
|
572
|
+
nonSpace = true;
|
|
573
|
+
break;
|
|
574
|
+
}
|
|
575
|
+
if (c === NL) {
|
|
576
|
+
// Handle newline in text
|
|
577
|
+
if (handler) {
|
|
578
|
+
this.text += chunk.slice(start, this.prevI) + "\n";
|
|
579
|
+
}
|
|
580
|
+
start = this.i;
|
|
581
|
+
}
|
|
582
|
+
else if (!isS(c)) {
|
|
583
|
+
nonSpace = true;
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
// Report error for non-whitespace text outside root
|
|
587
|
+
if (nonSpace) {
|
|
588
|
+
if (!this.sawRoot && !this.reportedTextBeforeRoot) {
|
|
589
|
+
this.fail("text data outside of root node.");
|
|
590
|
+
this.reportedTextBeforeRoot = true;
|
|
591
|
+
}
|
|
592
|
+
if (this.closedRoot && !this.reportedTextAfterRoot) {
|
|
593
|
+
this.fail("text data outside of root node.");
|
|
594
|
+
this.reportedTextAfterRoot = true;
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
sOpenWaka() {
|
|
599
|
+
const c = this.getCode();
|
|
600
|
+
if (isNameStartChar(c)) {
|
|
601
|
+
this.state = S_OPEN_TAG;
|
|
602
|
+
this.name = charFromCode(c);
|
|
603
|
+
return;
|
|
604
|
+
}
|
|
605
|
+
switch (c) {
|
|
606
|
+
case FORWARD_SLASH:
|
|
607
|
+
this.state = S_CLOSE_TAG;
|
|
608
|
+
this.name = "";
|
|
609
|
+
break;
|
|
610
|
+
case BANG:
|
|
611
|
+
this.state = S_OPEN_WAKA_BANG;
|
|
612
|
+
this.openWakaBang = "";
|
|
613
|
+
break;
|
|
614
|
+
case QUESTION:
|
|
615
|
+
this.state = S_PI;
|
|
616
|
+
this.text = "";
|
|
617
|
+
break;
|
|
618
|
+
default:
|
|
619
|
+
this.fail("unexpected character in tag");
|
|
620
|
+
this.state = S_TEXT;
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
sOpenWakaBang() {
|
|
624
|
+
const c = this.getCode();
|
|
625
|
+
this.openWakaBang += charFromCode(c);
|
|
626
|
+
switch (this.openWakaBang) {
|
|
627
|
+
case "[CDATA[":
|
|
628
|
+
this.state = S_CDATA;
|
|
629
|
+
this.text = "";
|
|
630
|
+
this.openWakaBang = "";
|
|
631
|
+
break;
|
|
632
|
+
case "--":
|
|
633
|
+
this.state = S_COMMENT;
|
|
634
|
+
this.text = "";
|
|
635
|
+
this.openWakaBang = "";
|
|
636
|
+
break;
|
|
637
|
+
case "DOCTYPE":
|
|
638
|
+
this.state = S_DOCTYPE;
|
|
639
|
+
this.text = "";
|
|
640
|
+
this.openWakaBang = "";
|
|
641
|
+
break;
|
|
642
|
+
default:
|
|
643
|
+
if (this.openWakaBang.length >= 7) {
|
|
644
|
+
this.fail("incorrect syntax");
|
|
645
|
+
this.state = S_TEXT;
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
sOpenTag() {
|
|
650
|
+
const c = this.getCode();
|
|
651
|
+
if (c === -1) {
|
|
652
|
+
return;
|
|
653
|
+
}
|
|
654
|
+
if (isNameChar(c)) {
|
|
655
|
+
this.name += charFromCode(c);
|
|
656
|
+
return;
|
|
657
|
+
}
|
|
658
|
+
// Tag name complete
|
|
659
|
+
this.tag = {
|
|
660
|
+
name: this.name,
|
|
661
|
+
attributes: Object.create(null),
|
|
662
|
+
isSelfClosing: false
|
|
663
|
+
};
|
|
664
|
+
this.attribList = [];
|
|
665
|
+
this.sawRoot = true;
|
|
666
|
+
if (c === GREATER) {
|
|
667
|
+
this.openTag();
|
|
668
|
+
}
|
|
669
|
+
else if (c === FORWARD_SLASH) {
|
|
670
|
+
this.state = S_OPEN_TAG_SLASH;
|
|
671
|
+
}
|
|
672
|
+
else if (isS(c)) {
|
|
673
|
+
this.state = S_ATTRIB;
|
|
674
|
+
}
|
|
675
|
+
else {
|
|
676
|
+
this.fail("unexpected character in tag");
|
|
677
|
+
this.state = S_ATTRIB;
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
sOpenTagSlash() {
|
|
681
|
+
const c = this.getCode();
|
|
682
|
+
if (c === GREATER) {
|
|
683
|
+
this.openSelfClosingTag();
|
|
684
|
+
}
|
|
685
|
+
else {
|
|
686
|
+
this.fail("expected >");
|
|
687
|
+
this.state = S_ATTRIB;
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
sAttrib() {
|
|
691
|
+
const c = this.skipSpaces();
|
|
692
|
+
if (c === -1) {
|
|
693
|
+
return;
|
|
694
|
+
}
|
|
695
|
+
if (isNameStartChar(c)) {
|
|
696
|
+
this.name = charFromCode(c);
|
|
697
|
+
this.state = S_ATTRIB_NAME;
|
|
698
|
+
}
|
|
699
|
+
else if (c === GREATER) {
|
|
700
|
+
this.openTag();
|
|
701
|
+
}
|
|
702
|
+
else if (c === FORWARD_SLASH) {
|
|
703
|
+
this.state = S_OPEN_TAG_SLASH;
|
|
704
|
+
}
|
|
705
|
+
else {
|
|
706
|
+
this.fail("unexpected character in attribute");
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
sAttribName() {
|
|
710
|
+
const c = this.getCode();
|
|
711
|
+
if (c === -1) {
|
|
712
|
+
return;
|
|
713
|
+
}
|
|
714
|
+
if (isNameChar(c)) {
|
|
715
|
+
this.name += charFromCode(c);
|
|
716
|
+
return;
|
|
717
|
+
}
|
|
718
|
+
if (c === EQUAL) {
|
|
719
|
+
this.state = S_ATTRIB_VALUE;
|
|
720
|
+
}
|
|
721
|
+
else if (isS(c)) {
|
|
722
|
+
this.state = S_ATTRIB_NAME_SAW_WHITE;
|
|
723
|
+
}
|
|
724
|
+
else if (c === GREATER) {
|
|
725
|
+
this.fail("attribute without value");
|
|
726
|
+
this.attribList.push({ name: this.name, value: this.name });
|
|
727
|
+
this.name = "";
|
|
728
|
+
this.openTag();
|
|
729
|
+
}
|
|
730
|
+
else {
|
|
731
|
+
this.fail("unexpected character in attribute name");
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
sAttribNameSawWhite() {
|
|
735
|
+
const c = this.skipSpaces();
|
|
736
|
+
if (c === -1) {
|
|
737
|
+
return;
|
|
738
|
+
}
|
|
739
|
+
if (c === EQUAL) {
|
|
740
|
+
this.state = S_ATTRIB_VALUE;
|
|
741
|
+
}
|
|
742
|
+
else {
|
|
743
|
+
this.fail("attribute without value");
|
|
744
|
+
this.name = "";
|
|
745
|
+
this.text = "";
|
|
746
|
+
if (c === GREATER) {
|
|
747
|
+
this.openTag();
|
|
748
|
+
}
|
|
749
|
+
else if (isNameStartChar(c)) {
|
|
750
|
+
this.name = charFromCode(c);
|
|
751
|
+
this.state = S_ATTRIB_NAME;
|
|
752
|
+
}
|
|
753
|
+
else {
|
|
754
|
+
this.fail("unexpected character");
|
|
755
|
+
this.state = S_ATTRIB;
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
sAttribValue() {
|
|
760
|
+
const c = this.skipSpaces();
|
|
761
|
+
if (c === -1) {
|
|
762
|
+
return;
|
|
763
|
+
}
|
|
764
|
+
if (isQuote(c)) {
|
|
765
|
+
this.q = c;
|
|
766
|
+
this.text = "";
|
|
767
|
+
this.state = S_ATTRIB_VALUE_QUOTED;
|
|
768
|
+
}
|
|
769
|
+
else {
|
|
770
|
+
this.fail("unquoted attribute value");
|
|
771
|
+
this.state = S_TEXT;
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
sAttribValueQuoted() {
|
|
775
|
+
const { q, chunk } = this;
|
|
776
|
+
let { i: start } = this;
|
|
777
|
+
while (true) {
|
|
778
|
+
const c = this.getCode();
|
|
779
|
+
if (c === -1) {
|
|
780
|
+
this.text += chunk.slice(start, this.i);
|
|
781
|
+
return;
|
|
782
|
+
}
|
|
783
|
+
if (c === q) {
|
|
784
|
+
// End of attribute value
|
|
785
|
+
this.attribList.push({
|
|
786
|
+
name: this.name,
|
|
787
|
+
value: this.text + chunk.slice(start, this.prevI)
|
|
788
|
+
});
|
|
789
|
+
this.name = "";
|
|
790
|
+
this.text = "";
|
|
791
|
+
this.q = null;
|
|
792
|
+
this.state = S_ATTRIB_VALUE_CLOSED;
|
|
793
|
+
return;
|
|
794
|
+
}
|
|
795
|
+
if (c === AMP) {
|
|
796
|
+
this.text += chunk.slice(start, this.prevI);
|
|
797
|
+
this.state = S_ENTITY;
|
|
798
|
+
this.entityReturnState = S_ATTRIB_VALUE_QUOTED;
|
|
799
|
+
this.entity = "";
|
|
800
|
+
return;
|
|
801
|
+
}
|
|
802
|
+
if (c === NL || c === TAB) {
|
|
803
|
+
// Normalize whitespace in attributes
|
|
804
|
+
this.text += chunk.slice(start, this.prevI) + " ";
|
|
805
|
+
start = this.i;
|
|
806
|
+
}
|
|
807
|
+
if (c === LESS) {
|
|
808
|
+
this.text += chunk.slice(start, this.prevI);
|
|
809
|
+
this.fail("< not allowed in attribute value");
|
|
810
|
+
return;
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
sAttribValueClosed() {
|
|
815
|
+
const c = this.getCode();
|
|
816
|
+
if (c === -1) {
|
|
817
|
+
return;
|
|
818
|
+
}
|
|
819
|
+
if (isS(c)) {
|
|
820
|
+
this.state = S_ATTRIB;
|
|
821
|
+
}
|
|
822
|
+
else if (c === GREATER) {
|
|
823
|
+
this.openTag();
|
|
824
|
+
}
|
|
825
|
+
else if (c === FORWARD_SLASH) {
|
|
826
|
+
this.state = S_OPEN_TAG_SLASH;
|
|
827
|
+
}
|
|
828
|
+
else if (isNameStartChar(c)) {
|
|
829
|
+
this.fail("no whitespace between attributes");
|
|
830
|
+
this.name = charFromCode(c);
|
|
831
|
+
this.state = S_ATTRIB_NAME;
|
|
832
|
+
}
|
|
833
|
+
else {
|
|
834
|
+
this.fail("unexpected character after attribute");
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
sCloseTag() {
|
|
838
|
+
const c = this.getCode();
|
|
839
|
+
if (c === -1) {
|
|
840
|
+
return;
|
|
841
|
+
}
|
|
842
|
+
if (isNameChar(c)) {
|
|
843
|
+
this.name += charFromCode(c);
|
|
844
|
+
}
|
|
845
|
+
else if (c === GREATER) {
|
|
846
|
+
this.closeTag();
|
|
847
|
+
}
|
|
848
|
+
else if (isS(c)) {
|
|
849
|
+
this.state = S_CLOSE_TAG_SAW_WHITE;
|
|
850
|
+
}
|
|
851
|
+
else {
|
|
852
|
+
this.fail("unexpected character in close tag");
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
sCloseTagSawWhite() {
|
|
856
|
+
const c = this.skipSpaces();
|
|
857
|
+
if (c === -1) {
|
|
858
|
+
return;
|
|
859
|
+
}
|
|
860
|
+
if (c === GREATER) {
|
|
861
|
+
this.closeTag();
|
|
862
|
+
}
|
|
863
|
+
else {
|
|
864
|
+
this.fail("unexpected character in close tag");
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
sComment() {
|
|
868
|
+
const c = this.getCode();
|
|
869
|
+
if (c === -1) {
|
|
870
|
+
return;
|
|
871
|
+
}
|
|
872
|
+
if (c === MINUS) {
|
|
873
|
+
this.state = S_COMMENT_ENDING;
|
|
874
|
+
}
|
|
875
|
+
else {
|
|
876
|
+
this.text += charFromCode(c);
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
sCommentEnding() {
|
|
880
|
+
const c = this.getCode();
|
|
881
|
+
if (c === MINUS) {
|
|
882
|
+
this.state = S_COMMENT_ENDED;
|
|
883
|
+
}
|
|
884
|
+
else {
|
|
885
|
+
this.text += "-" + charFromCode(c);
|
|
886
|
+
this.state = S_COMMENT;
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
sCommentEnded() {
|
|
890
|
+
const c = this.getCode();
|
|
891
|
+
if (c === GREATER) {
|
|
892
|
+
// Comment done, emit nothing (we don't have a comment handler)
|
|
893
|
+
this.text = "";
|
|
894
|
+
this.state = S_TEXT;
|
|
895
|
+
}
|
|
896
|
+
else if (c === MINUS) {
|
|
897
|
+
this.text += "-";
|
|
898
|
+
}
|
|
899
|
+
else {
|
|
900
|
+
this.fail("malformed comment");
|
|
901
|
+
this.text += "--" + charFromCode(c);
|
|
902
|
+
this.state = S_COMMENT;
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
sCData() {
|
|
906
|
+
const c = this.getCode();
|
|
907
|
+
if (c === -1) {
|
|
908
|
+
return;
|
|
909
|
+
}
|
|
910
|
+
if (c === CLOSE_BRACKET) {
|
|
911
|
+
this.state = S_CDATA_ENDING;
|
|
912
|
+
}
|
|
913
|
+
else {
|
|
914
|
+
this.text += charFromCode(c);
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
sCDataEnding() {
|
|
918
|
+
const c = this.getCode();
|
|
919
|
+
if (c === CLOSE_BRACKET) {
|
|
920
|
+
this.state = S_CDATA_ENDING_2;
|
|
921
|
+
}
|
|
922
|
+
else {
|
|
923
|
+
this.text += "]" + charFromCode(c);
|
|
924
|
+
this.state = S_CDATA;
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
sCDataEnding2() {
|
|
928
|
+
const c = this.getCode();
|
|
929
|
+
if (c === GREATER) {
|
|
930
|
+
// CDATA done - emit as text
|
|
931
|
+
if (this.textHandler && this.text.length > 0) {
|
|
932
|
+
this.textHandler(this.text);
|
|
933
|
+
}
|
|
934
|
+
this.text = "";
|
|
935
|
+
this.state = S_TEXT;
|
|
936
|
+
}
|
|
937
|
+
else if (c === CLOSE_BRACKET) {
|
|
938
|
+
this.text += "]";
|
|
939
|
+
}
|
|
940
|
+
else {
|
|
941
|
+
this.text += "]]" + charFromCode(c);
|
|
942
|
+
this.state = S_CDATA;
|
|
943
|
+
}
|
|
944
|
+
}
|
|
945
|
+
sPI() {
|
|
946
|
+
const c = this.getCode();
|
|
947
|
+
if (c === -1) {
|
|
948
|
+
return;
|
|
949
|
+
}
|
|
950
|
+
if (c === QUESTION) {
|
|
951
|
+
this.state = S_PI_ENDING;
|
|
952
|
+
}
|
|
953
|
+
else {
|
|
954
|
+
this.text += charFromCode(c);
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
sPIEnding() {
|
|
958
|
+
const c = this.getCode();
|
|
959
|
+
if (c === GREATER) {
|
|
960
|
+
// PI done, we don't emit PI events
|
|
961
|
+
this.text = "";
|
|
962
|
+
this.state = S_TEXT;
|
|
963
|
+
}
|
|
964
|
+
else if (c === QUESTION) {
|
|
965
|
+
this.text += "?";
|
|
966
|
+
}
|
|
967
|
+
else {
|
|
968
|
+
this.text += "?" + charFromCode(c);
|
|
969
|
+
this.state = S_PI;
|
|
970
|
+
}
|
|
971
|
+
}
|
|
972
|
+
sDoctype() {
|
|
973
|
+
const c = this.getCode();
|
|
974
|
+
if (c === -1) {
|
|
975
|
+
return;
|
|
976
|
+
}
|
|
977
|
+
if (c === GREATER) {
|
|
978
|
+
// DOCTYPE done
|
|
979
|
+
this.text = "";
|
|
980
|
+
this.state = S_TEXT;
|
|
981
|
+
}
|
|
982
|
+
else if (isQuote(c)) {
|
|
983
|
+
this.q = c;
|
|
984
|
+
this.state = S_DOCTYPE_QUOTE;
|
|
985
|
+
}
|
|
986
|
+
else if (c === OPEN_BRACKET) {
|
|
987
|
+
this.state = S_DOCTYPE_DTD;
|
|
988
|
+
}
|
|
989
|
+
else {
|
|
990
|
+
this.text += charFromCode(c);
|
|
991
|
+
}
|
|
992
|
+
}
|
|
993
|
+
sDoctypeQuote() {
|
|
994
|
+
const c = this.getCode();
|
|
995
|
+
if (c === -1) {
|
|
996
|
+
return;
|
|
997
|
+
}
|
|
998
|
+
if (c === this.q) {
|
|
999
|
+
this.q = null;
|
|
1000
|
+
this.state = S_DOCTYPE;
|
|
1001
|
+
}
|
|
1002
|
+
else {
|
|
1003
|
+
this.text += charFromCode(c);
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
sDoctypeDTD() {
|
|
1007
|
+
const c = this.getCode();
|
|
1008
|
+
if (c === -1) {
|
|
1009
|
+
return;
|
|
1010
|
+
}
|
|
1011
|
+
if (c === CLOSE_BRACKET) {
|
|
1012
|
+
this.state = S_DOCTYPE;
|
|
1013
|
+
}
|
|
1014
|
+
else if (isQuote(c)) {
|
|
1015
|
+
this.q = c;
|
|
1016
|
+
this.state = S_DOCTYPE_DTD_QUOTED;
|
|
1017
|
+
}
|
|
1018
|
+
}
|
|
1019
|
+
sDoctypeDTDQuoted() {
|
|
1020
|
+
const c = this.getCode();
|
|
1021
|
+
if (c === -1) {
|
|
1022
|
+
return;
|
|
1023
|
+
}
|
|
1024
|
+
if (c === this.q) {
|
|
1025
|
+
this.q = null;
|
|
1026
|
+
this.state = S_DOCTYPE_DTD;
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
sEntity() {
|
|
1030
|
+
const c = this.getCode();
|
|
1031
|
+
if (c === -1) {
|
|
1032
|
+
return;
|
|
1033
|
+
}
|
|
1034
|
+
if (c === SEMICOLON) {
|
|
1035
|
+
// Entity complete
|
|
1036
|
+
const entity = this.entity;
|
|
1037
|
+
let resolved;
|
|
1038
|
+
if (entity === "") {
|
|
1039
|
+
this.fail("empty entity");
|
|
1040
|
+
resolved = "&;";
|
|
1041
|
+
}
|
|
1042
|
+
else {
|
|
1043
|
+
resolved = this.parseEntity(entity);
|
|
1044
|
+
}
|
|
1045
|
+
this.text += resolved;
|
|
1046
|
+
this.state = this.entityReturnState;
|
|
1047
|
+
this.entity = "";
|
|
1048
|
+
}
|
|
1049
|
+
else if (isNameChar(c) || c === HASH) {
|
|
1050
|
+
this.entity += charFromCode(c);
|
|
1051
|
+
}
|
|
1052
|
+
else {
|
|
1053
|
+
this.fail("invalid entity character");
|
|
1054
|
+
this.text += "&" + this.entity + charFromCode(c);
|
|
1055
|
+
this.state = this.entityReturnState;
|
|
1056
|
+
this.entity = "";
|
|
1057
|
+
}
|
|
1058
|
+
}
|
|
1059
|
+
// Entity resolution
|
|
1060
|
+
parseEntity(entity) {
|
|
1061
|
+
if (entity[0] !== "#") {
|
|
1062
|
+
// Named entity
|
|
1063
|
+
const resolved = this.ENTITIES[entity];
|
|
1064
|
+
if (resolved !== undefined) {
|
|
1065
|
+
return resolved;
|
|
1066
|
+
}
|
|
1067
|
+
this.fail("undefined entity: " + entity);
|
|
1068
|
+
return "&" + entity + ";";
|
|
1069
|
+
}
|
|
1070
|
+
// Numeric entity
|
|
1071
|
+
let num;
|
|
1072
|
+
if (entity[1] === "x" || entity[1] === "X") {
|
|
1073
|
+
// Hexadecimal
|
|
1074
|
+
num = parseInt(entity.slice(2), 16);
|
|
1075
|
+
}
|
|
1076
|
+
else {
|
|
1077
|
+
// Decimal
|
|
1078
|
+
num = parseInt(entity.slice(1), 10);
|
|
1079
|
+
}
|
|
1080
|
+
if (isNaN(num) || !isChar10(num)) {
|
|
1081
|
+
this.fail("invalid character entity");
|
|
1082
|
+
return "&" + entity + ";";
|
|
1083
|
+
}
|
|
1084
|
+
return String.fromCodePoint(num);
|
|
1085
|
+
}
|
|
1086
|
+
// Helper to skip whitespace
|
|
1087
|
+
skipSpaces() {
|
|
1088
|
+
while (true) {
|
|
1089
|
+
const c = this.getCode();
|
|
1090
|
+
if (c === -1 || !isS(c)) {
|
|
1091
|
+
return c;
|
|
1092
|
+
}
|
|
1093
|
+
}
|
|
1094
|
+
}
|
|
1095
|
+
// Tag handling
|
|
1096
|
+
openTag() {
|
|
1097
|
+
const tag = this.tag;
|
|
1098
|
+
tag.isSelfClosing = false;
|
|
1099
|
+
// Copy attributes from list to object
|
|
1100
|
+
for (const { name, value } of this.attribList) {
|
|
1101
|
+
tag.attributes[name] = value;
|
|
1102
|
+
}
|
|
1103
|
+
this.attribList = [];
|
|
1104
|
+
this.openTagHandler?.(tag);
|
|
1105
|
+
this.tags.push(tag);
|
|
1106
|
+
this.name = "";
|
|
1107
|
+
this.state = S_TEXT;
|
|
1108
|
+
}
|
|
1109
|
+
openSelfClosingTag() {
|
|
1110
|
+
const tag = this.tag;
|
|
1111
|
+
tag.isSelfClosing = true;
|
|
1112
|
+
// Copy attributes from list to object
|
|
1113
|
+
for (const { name, value } of this.attribList) {
|
|
1114
|
+
tag.attributes[name] = value;
|
|
1115
|
+
}
|
|
1116
|
+
this.attribList = [];
|
|
1117
|
+
this.openTagHandler?.(tag);
|
|
1118
|
+
this.closeTagHandler?.(tag);
|
|
1119
|
+
if (this.tags.length === 0) {
|
|
1120
|
+
this.closedRoot = true;
|
|
1121
|
+
}
|
|
1122
|
+
this.name = "";
|
|
1123
|
+
this.state = S_TEXT;
|
|
1124
|
+
}
|
|
1125
|
+
closeTag() {
|
|
1126
|
+
const { tags, name } = this;
|
|
1127
|
+
this.state = S_TEXT;
|
|
1128
|
+
this.name = "";
|
|
1129
|
+
if (name === "") {
|
|
1130
|
+
this.fail("empty close tag");
|
|
1131
|
+
this.text += "</>";
|
|
1132
|
+
return;
|
|
1133
|
+
}
|
|
1134
|
+
// Find matching open tag
|
|
1135
|
+
let found = false;
|
|
1136
|
+
for (let i = tags.length - 1; i >= 0; i--) {
|
|
1137
|
+
const tag = tags[i];
|
|
1138
|
+
if (tag.name === name) {
|
|
1139
|
+
// Pop all tags including the matching one
|
|
1140
|
+
while (tags.length > i) {
|
|
1141
|
+
const t = tags.pop();
|
|
1142
|
+
this.closeTagHandler?.(t);
|
|
1143
|
+
if (tags.length > i) {
|
|
1144
|
+
this.fail("unclosed tag: " + t.name);
|
|
1145
|
+
}
|
|
1146
|
+
}
|
|
1147
|
+
found = true;
|
|
1148
|
+
break;
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
if (!found) {
|
|
1152
|
+
this.fail("unmatched close tag: " + name);
|
|
1153
|
+
this.text += "</" + name + ">";
|
|
1154
|
+
}
|
|
1155
|
+
if (tags.length === 0) {
|
|
1156
|
+
this.closedRoot = true;
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
1159
|
+
// End parsing
|
|
1160
|
+
end() {
|
|
1161
|
+
if (!this.sawRoot) {
|
|
1162
|
+
this.fail("document must contain a root element");
|
|
1163
|
+
}
|
|
1164
|
+
while (this.tags.length > 0) {
|
|
1165
|
+
const tag = this.tags.pop();
|
|
1166
|
+
this.fail("unclosed tag: " + tag.name);
|
|
1167
|
+
}
|
|
1168
|
+
if (this.text.length > 0 && this.textHandler) {
|
|
1169
|
+
this.textHandler(this.text);
|
|
1170
|
+
this.text = "";
|
|
1171
|
+
}
|
|
1172
|
+
this._closed = true;
|
|
1173
|
+
this._init();
|
|
1174
|
+
return this;
|
|
1175
|
+
}
|
|
1176
|
+
}
|
|
1177
|
+
exports.SaxesParser = SaxesParser;
|
|
1178
|
+
// ============================================================================
|
|
1179
|
+
// parseSax generator function
|
|
1180
|
+
// ============================================================================
|
|
6
1181
|
async function* parseSax(iterable) {
|
|
7
|
-
const
|
|
1182
|
+
const parser = new SaxesParser({
|
|
8
1183
|
xmlns: false,
|
|
9
|
-
|
|
1184
|
+
position: true // Keep position for error messages
|
|
10
1185
|
});
|
|
11
1186
|
let error;
|
|
12
|
-
|
|
1187
|
+
parser.on("error", (err) => {
|
|
13
1188
|
error = err;
|
|
14
1189
|
});
|
|
15
1190
|
let events = [];
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
1191
|
+
parser.on("opentag", (value) => events.push({ eventType: "opentag", value }));
|
|
1192
|
+
parser.on("text", (value) => events.push({ eventType: "text", value }));
|
|
1193
|
+
parser.on("closetag", (value) => events.push({ eventType: "closetag", value }));
|
|
19
1194
|
for await (const chunk of iterable) {
|
|
20
|
-
|
|
21
|
-
// saxesParser.write and saxesParser.on() are synchronous,
|
|
22
|
-
// so we can only reach the below line once all events have been emitted
|
|
1195
|
+
parser.write((0, utils_1.bufferToString)(chunk));
|
|
23
1196
|
if (error) {
|
|
24
1197
|
throw error;
|
|
25
1198
|
}
|
|
26
|
-
// As a performance optimization, we gather all events instead of passing
|
|
27
|
-
// them one by one, which would cause each event to go through the event queue
|
|
28
1199
|
yield events;
|
|
29
1200
|
events = [];
|
|
30
1201
|
}
|
|
1202
|
+
parser.close();
|
|
1203
|
+
if (error) {
|
|
1204
|
+
throw error;
|
|
1205
|
+
}
|
|
1206
|
+
if (events.length > 0) {
|
|
1207
|
+
yield events;
|
|
1208
|
+
}
|
|
31
1209
|
}
|