@langchain/core 0.1.24 → 0.1.25-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1557 @@
1
+ "use strict";
2
+ // @ts-nocheck
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ exports.sax = void 0;
5
+ // Inlined to deal with portability issues
6
+ const initializeSax = function () {
7
+ const sax = {};
8
+ sax.parser = function (strict, opt) {
9
+ return new SAXParser(strict, opt);
10
+ };
11
+ sax.SAXParser = SAXParser;
12
+ sax.SAXStream = SAXStream;
13
+ sax.createStream = createStream;
14
+ // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
15
+ // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
16
+ // since that's the earliest that a buffer overrun could occur. This way, checks are
17
+ // as rare as required, but as often as necessary to ensure never crossing this bound.
18
+ // Furthermore, buffers are only tested at most once per write(), so passing a very
19
+ // large string into write() might have undesirable effects, but this is manageable by
20
+ // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
21
+ // edge case, result in creating at most one complete copy of the string passed in.
22
+ // Set to Infinity to have unlimited buffers.
23
+ sax.MAX_BUFFER_LENGTH = 64 * 1024;
24
+ const buffers = [
25
+ "comment",
26
+ "sgmlDecl",
27
+ "textNode",
28
+ "tagName",
29
+ "doctype",
30
+ "procInstName",
31
+ "procInstBody",
32
+ "entity",
33
+ "attribName",
34
+ "attribValue",
35
+ "cdata",
36
+ "script",
37
+ ];
38
+ sax.EVENTS = [
39
+ "text",
40
+ "processinginstruction",
41
+ "sgmldeclaration",
42
+ "doctype",
43
+ "comment",
44
+ "opentagstart",
45
+ "attribute",
46
+ "opentag",
47
+ "closetag",
48
+ "opencdata",
49
+ "cdata",
50
+ "closecdata",
51
+ "error",
52
+ "end",
53
+ "ready",
54
+ "script",
55
+ "opennamespace",
56
+ "closenamespace",
57
+ ];
58
+ function SAXParser(strict, opt) {
59
+ if (!(this instanceof SAXParser)) {
60
+ return new SAXParser(strict, opt);
61
+ }
62
+ var parser = this;
63
+ clearBuffers(parser);
64
+ parser.q = parser.c = "";
65
+ parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH;
66
+ parser.opt = opt || {};
67
+ parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags;
68
+ parser.looseCase = parser.opt.lowercase ? "toLowerCase" : "toUpperCase";
69
+ parser.tags = [];
70
+ parser.closed = parser.closedRoot = parser.sawRoot = false;
71
+ parser.tag = parser.error = null;
72
+ parser.strict = !!strict;
73
+ parser.noscript = !!(strict || parser.opt.noscript);
74
+ parser.state = S.BEGIN;
75
+ parser.strictEntities = parser.opt.strictEntities;
76
+ parser.ENTITIES = parser.strictEntities
77
+ ? Object.create(sax.XML_ENTITIES)
78
+ : Object.create(sax.ENTITIES);
79
+ parser.attribList = [];
80
+ // namespaces form a prototype chain.
81
+ // it always points at the current tag,
82
+ // which protos to its parent tag.
83
+ if (parser.opt.xmlns) {
84
+ parser.ns = Object.create(rootNS);
85
+ }
86
+ // mostly just for error reporting
87
+ parser.trackPosition = parser.opt.position !== false;
88
+ if (parser.trackPosition) {
89
+ parser.position = parser.line = parser.column = 0;
90
+ }
91
+ emit(parser, "onready");
92
+ }
93
+ if (!Object.create) {
94
+ Object.create = function (o) {
95
+ function F() { }
96
+ F.prototype = o;
97
+ var newf = new F();
98
+ return newf;
99
+ };
100
+ }
101
+ if (!Object.keys) {
102
+ Object.keys = function (o) {
103
+ var a = [];
104
+ for (var i in o)
105
+ if (o.hasOwnProperty(i))
106
+ a.push(i);
107
+ return a;
108
+ };
109
+ }
110
+ function checkBufferLength(parser) {
111
+ var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10);
112
+ var maxActual = 0;
113
+ for (var i = 0, l = buffers.length; i < l; i++) {
114
+ var len = parser[buffers[i]].length;
115
+ if (len > maxAllowed) {
116
+ // Text/cdata nodes can get big, and since they're buffered,
117
+ // we can get here under normal conditions.
118
+ // Avoid issues by emitting the text node now,
119
+ // so at least it won't get any bigger.
120
+ switch (buffers[i]) {
121
+ case "textNode":
122
+ closeText(parser);
123
+ break;
124
+ case "cdata":
125
+ emitNode(parser, "oncdata", parser.cdata);
126
+ parser.cdata = "";
127
+ break;
128
+ case "script":
129
+ emitNode(parser, "onscript", parser.script);
130
+ parser.script = "";
131
+ break;
132
+ default:
133
+ error(parser, "Max buffer length exceeded: " + buffers[i]);
134
+ }
135
+ }
136
+ maxActual = Math.max(maxActual, len);
137
+ }
138
+ // schedule the next check for the earliest possible buffer overrun.
139
+ var m = sax.MAX_BUFFER_LENGTH - maxActual;
140
+ parser.bufferCheckPosition = m + parser.position;
141
+ }
142
+ function clearBuffers(parser) {
143
+ for (var i = 0, l = buffers.length; i < l; i++) {
144
+ parser[buffers[i]] = "";
145
+ }
146
+ }
147
+ function flushBuffers(parser) {
148
+ closeText(parser);
149
+ if (parser.cdata !== "") {
150
+ emitNode(parser, "oncdata", parser.cdata);
151
+ parser.cdata = "";
152
+ }
153
+ if (parser.script !== "") {
154
+ emitNode(parser, "onscript", parser.script);
155
+ parser.script = "";
156
+ }
157
+ }
158
+ SAXParser.prototype = {
159
+ end: function () {
160
+ end(this);
161
+ },
162
+ write: write,
163
+ resume: function () {
164
+ this.error = null;
165
+ return this;
166
+ },
167
+ close: function () {
168
+ return this.write(null);
169
+ },
170
+ flush: function () {
171
+ flushBuffers(this);
172
+ },
173
+ };
174
+ var Stream = ReadableStream;
175
+ if (!Stream)
176
+ Stream = function () { };
177
+ var streamWraps = sax.EVENTS.filter(function (ev) {
178
+ return ev !== "error" && ev !== "end";
179
+ });
180
+ function createStream(strict, opt) {
181
+ return new SAXStream(strict, opt);
182
+ }
183
+ function SAXStream(strict, opt) {
184
+ if (!(this instanceof SAXStream)) {
185
+ return new SAXStream(strict, opt);
186
+ }
187
+ Stream.apply(this);
188
+ this._parser = new SAXParser(strict, opt);
189
+ this.writable = true;
190
+ this.readable = true;
191
+ var me = this;
192
+ this._parser.onend = function () {
193
+ me.emit("end");
194
+ };
195
+ this._parser.onerror = function (er) {
196
+ me.emit("error", er);
197
+ // if didn't throw, then means error was handled.
198
+ // go ahead and clear error, so we can write again.
199
+ me._parser.error = null;
200
+ };
201
+ this._decoder = null;
202
+ streamWraps.forEach(function (ev) {
203
+ Object.defineProperty(me, "on" + ev, {
204
+ get: function () {
205
+ return me._parser["on" + ev];
206
+ },
207
+ set: function (h) {
208
+ if (!h) {
209
+ me.removeAllListeners(ev);
210
+ me._parser["on" + ev] = h;
211
+ return h;
212
+ }
213
+ me.on(ev, h);
214
+ },
215
+ enumerable: true,
216
+ configurable: false,
217
+ });
218
+ });
219
+ }
220
+ SAXStream.prototype = Object.create(Stream.prototype, {
221
+ constructor: {
222
+ value: SAXStream,
223
+ },
224
+ });
225
+ SAXStream.prototype.write = function (data) {
226
+ this._parser.write(data.toString());
227
+ this.emit("data", data);
228
+ return true;
229
+ };
230
+ SAXStream.prototype.end = function (chunk) {
231
+ if (chunk && chunk.length) {
232
+ this.write(chunk);
233
+ }
234
+ this._parser.end();
235
+ return true;
236
+ };
237
+ SAXStream.prototype.on = function (ev, handler) {
238
+ var me = this;
239
+ if (!me._parser["on" + ev] && streamWraps.indexOf(ev) !== -1) {
240
+ me._parser["on" + ev] = function () {
241
+ var args = arguments.length === 1
242
+ ? [arguments[0]]
243
+ : Array.apply(null, arguments);
244
+ args.splice(0, 0, ev);
245
+ me.emit.apply(me, args);
246
+ };
247
+ }
248
+ return Stream.prototype.on.call(me, ev, handler);
249
+ };
250
+ // this really needs to be replaced with character classes.
251
+ // XML allows all manner of ridiculous numbers and digits.
252
+ var CDATA = "[CDATA[";
253
+ var DOCTYPE = "DOCTYPE";
254
+ var XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace";
255
+ var XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/";
256
+ var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE };
257
+ // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
258
+ // This implementation works on strings, a single character at a time
259
+ // as such, it cannot ever support astral-plane characters (10000-EFFFF)
260
+ // without a significant breaking change to either this parser, or the
261
+ // JavaScript language. Implementation of an emoji-capable xml parser
262
+ // is left as an exercise for the reader.
263
+ var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/;
264
+ var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/;
265
+ var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/;
266
+ var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/;
267
+ function isWhitespace(c) {
268
+ return c === " " || c === "\n" || c === "\r" || c === "\t";
269
+ }
270
+ function isQuote(c) {
271
+ return c === '"' || c === "'";
272
+ }
273
+ function isAttribEnd(c) {
274
+ return c === ">" || isWhitespace(c);
275
+ }
276
+ function isMatch(regex, c) {
277
+ return regex.test(c);
278
+ }
279
+ function notMatch(regex, c) {
280
+ return !isMatch(regex, c);
281
+ }
282
+ var S = 0;
283
+ sax.STATE = {
284
+ BEGIN: S++,
285
+ BEGIN_WHITESPACE: S++,
286
+ TEXT: S++,
287
+ TEXT_ENTITY: S++,
288
+ OPEN_WAKA: S++,
289
+ SGML_DECL: S++,
290
+ SGML_DECL_QUOTED: S++,
291
+ DOCTYPE: S++,
292
+ DOCTYPE_QUOTED: S++,
293
+ DOCTYPE_DTD: S++,
294
+ DOCTYPE_DTD_QUOTED: S++,
295
+ COMMENT_STARTING: S++,
296
+ COMMENT: S++,
297
+ COMMENT_ENDING: S++,
298
+ COMMENT_ENDED: S++,
299
+ CDATA: S++,
300
+ CDATA_ENDING: S++,
301
+ CDATA_ENDING_2: S++,
302
+ PROC_INST: S++,
303
+ PROC_INST_BODY: S++,
304
+ PROC_INST_ENDING: S++,
305
+ OPEN_TAG: S++,
306
+ OPEN_TAG_SLASH: S++,
307
+ ATTRIB: S++,
308
+ ATTRIB_NAME: S++,
309
+ ATTRIB_NAME_SAW_WHITE: S++,
310
+ ATTRIB_VALUE: S++,
311
+ ATTRIB_VALUE_QUOTED: S++,
312
+ ATTRIB_VALUE_CLOSED: S++,
313
+ ATTRIB_VALUE_UNQUOTED: S++,
314
+ ATTRIB_VALUE_ENTITY_Q: S++,
315
+ ATTRIB_VALUE_ENTITY_U: S++,
316
+ CLOSE_TAG: S++,
317
+ CLOSE_TAG_SAW_WHITE: S++,
318
+ SCRIPT: S++,
319
+ SCRIPT_ENDING: S++, // <script> ... <
320
+ };
321
+ sax.XML_ENTITIES = {
322
+ amp: "&",
323
+ gt: ">",
324
+ lt: "<",
325
+ quot: '"',
326
+ apos: "'",
327
+ };
328
+ sax.ENTITIES = {
329
+ amp: "&",
330
+ gt: ">",
331
+ lt: "<",
332
+ quot: '"',
333
+ apos: "'",
334
+ AElig: 198,
335
+ Aacute: 193,
336
+ Acirc: 194,
337
+ Agrave: 192,
338
+ Aring: 197,
339
+ Atilde: 195,
340
+ Auml: 196,
341
+ Ccedil: 199,
342
+ ETH: 208,
343
+ Eacute: 201,
344
+ Ecirc: 202,
345
+ Egrave: 200,
346
+ Euml: 203,
347
+ Iacute: 205,
348
+ Icirc: 206,
349
+ Igrave: 204,
350
+ Iuml: 207,
351
+ Ntilde: 209,
352
+ Oacute: 211,
353
+ Ocirc: 212,
354
+ Ograve: 210,
355
+ Oslash: 216,
356
+ Otilde: 213,
357
+ Ouml: 214,
358
+ THORN: 222,
359
+ Uacute: 218,
360
+ Ucirc: 219,
361
+ Ugrave: 217,
362
+ Uuml: 220,
363
+ Yacute: 221,
364
+ aacute: 225,
365
+ acirc: 226,
366
+ aelig: 230,
367
+ agrave: 224,
368
+ aring: 229,
369
+ atilde: 227,
370
+ auml: 228,
371
+ ccedil: 231,
372
+ eacute: 233,
373
+ ecirc: 234,
374
+ egrave: 232,
375
+ eth: 240,
376
+ euml: 235,
377
+ iacute: 237,
378
+ icirc: 238,
379
+ igrave: 236,
380
+ iuml: 239,
381
+ ntilde: 241,
382
+ oacute: 243,
383
+ ocirc: 244,
384
+ ograve: 242,
385
+ oslash: 248,
386
+ otilde: 245,
387
+ ouml: 246,
388
+ szlig: 223,
389
+ thorn: 254,
390
+ uacute: 250,
391
+ ucirc: 251,
392
+ ugrave: 249,
393
+ uuml: 252,
394
+ yacute: 253,
395
+ yuml: 255,
396
+ copy: 169,
397
+ reg: 174,
398
+ nbsp: 160,
399
+ iexcl: 161,
400
+ cent: 162,
401
+ pound: 163,
402
+ curren: 164,
403
+ yen: 165,
404
+ brvbar: 166,
405
+ sect: 167,
406
+ uml: 168,
407
+ ordf: 170,
408
+ laquo: 171,
409
+ not: 172,
410
+ shy: 173,
411
+ macr: 175,
412
+ deg: 176,
413
+ plusmn: 177,
414
+ sup1: 185,
415
+ sup2: 178,
416
+ sup3: 179,
417
+ acute: 180,
418
+ micro: 181,
419
+ para: 182,
420
+ middot: 183,
421
+ cedil: 184,
422
+ ordm: 186,
423
+ raquo: 187,
424
+ frac14: 188,
425
+ frac12: 189,
426
+ frac34: 190,
427
+ iquest: 191,
428
+ times: 215,
429
+ divide: 247,
430
+ OElig: 338,
431
+ oelig: 339,
432
+ Scaron: 352,
433
+ scaron: 353,
434
+ Yuml: 376,
435
+ fnof: 402,
436
+ circ: 710,
437
+ tilde: 732,
438
+ Alpha: 913,
439
+ Beta: 914,
440
+ Gamma: 915,
441
+ Delta: 916,
442
+ Epsilon: 917,
443
+ Zeta: 918,
444
+ Eta: 919,
445
+ Theta: 920,
446
+ Iota: 921,
447
+ Kappa: 922,
448
+ Lambda: 923,
449
+ Mu: 924,
450
+ Nu: 925,
451
+ Xi: 926,
452
+ Omicron: 927,
453
+ Pi: 928,
454
+ Rho: 929,
455
+ Sigma: 931,
456
+ Tau: 932,
457
+ Upsilon: 933,
458
+ Phi: 934,
459
+ Chi: 935,
460
+ Psi: 936,
461
+ Omega: 937,
462
+ alpha: 945,
463
+ beta: 946,
464
+ gamma: 947,
465
+ delta: 948,
466
+ epsilon: 949,
467
+ zeta: 950,
468
+ eta: 951,
469
+ theta: 952,
470
+ iota: 953,
471
+ kappa: 954,
472
+ lambda: 955,
473
+ mu: 956,
474
+ nu: 957,
475
+ xi: 958,
476
+ omicron: 959,
477
+ pi: 960,
478
+ rho: 961,
479
+ sigmaf: 962,
480
+ sigma: 963,
481
+ tau: 964,
482
+ upsilon: 965,
483
+ phi: 966,
484
+ chi: 967,
485
+ psi: 968,
486
+ omega: 969,
487
+ thetasym: 977,
488
+ upsih: 978,
489
+ piv: 982,
490
+ ensp: 8194,
491
+ emsp: 8195,
492
+ thinsp: 8201,
493
+ zwnj: 8204,
494
+ zwj: 8205,
495
+ lrm: 8206,
496
+ rlm: 8207,
497
+ ndash: 8211,
498
+ mdash: 8212,
499
+ lsquo: 8216,
500
+ rsquo: 8217,
501
+ sbquo: 8218,
502
+ ldquo: 8220,
503
+ rdquo: 8221,
504
+ bdquo: 8222,
505
+ dagger: 8224,
506
+ Dagger: 8225,
507
+ bull: 8226,
508
+ hellip: 8230,
509
+ permil: 8240,
510
+ prime: 8242,
511
+ Prime: 8243,
512
+ lsaquo: 8249,
513
+ rsaquo: 8250,
514
+ oline: 8254,
515
+ frasl: 8260,
516
+ euro: 8364,
517
+ image: 8465,
518
+ weierp: 8472,
519
+ real: 8476,
520
+ trade: 8482,
521
+ alefsym: 8501,
522
+ larr: 8592,
523
+ uarr: 8593,
524
+ rarr: 8594,
525
+ darr: 8595,
526
+ harr: 8596,
527
+ crarr: 8629,
528
+ lArr: 8656,
529
+ uArr: 8657,
530
+ rArr: 8658,
531
+ dArr: 8659,
532
+ hArr: 8660,
533
+ forall: 8704,
534
+ part: 8706,
535
+ exist: 8707,
536
+ empty: 8709,
537
+ nabla: 8711,
538
+ isin: 8712,
539
+ notin: 8713,
540
+ ni: 8715,
541
+ prod: 8719,
542
+ sum: 8721,
543
+ minus: 8722,
544
+ lowast: 8727,
545
+ radic: 8730,
546
+ prop: 8733,
547
+ infin: 8734,
548
+ ang: 8736,
549
+ and: 8743,
550
+ or: 8744,
551
+ cap: 8745,
552
+ cup: 8746,
553
+ int: 8747,
554
+ there4: 8756,
555
+ sim: 8764,
556
+ cong: 8773,
557
+ asymp: 8776,
558
+ ne: 8800,
559
+ equiv: 8801,
560
+ le: 8804,
561
+ ge: 8805,
562
+ sub: 8834,
563
+ sup: 8835,
564
+ nsub: 8836,
565
+ sube: 8838,
566
+ supe: 8839,
567
+ oplus: 8853,
568
+ otimes: 8855,
569
+ perp: 8869,
570
+ sdot: 8901,
571
+ lceil: 8968,
572
+ rceil: 8969,
573
+ lfloor: 8970,
574
+ rfloor: 8971,
575
+ lang: 9001,
576
+ rang: 9002,
577
+ loz: 9674,
578
+ spades: 9824,
579
+ clubs: 9827,
580
+ hearts: 9829,
581
+ diams: 9830,
582
+ };
583
+ Object.keys(sax.ENTITIES).forEach(function (key) {
584
+ var e = sax.ENTITIES[key];
585
+ var s = typeof e === "number" ? String.fromCharCode(e) : e;
586
+ sax.ENTITIES[key] = s;
587
+ });
588
+ for (var s in sax.STATE) {
589
+ sax.STATE[sax.STATE[s]] = s;
590
+ }
591
+ // shorthand
592
+ S = sax.STATE;
593
+ function emit(parser, event, data) {
594
+ parser[event] && parser[event](data);
595
+ }
596
+ function emitNode(parser, nodeType, data) {
597
+ if (parser.textNode)
598
+ closeText(parser);
599
+ emit(parser, nodeType, data);
600
+ }
601
+ function closeText(parser) {
602
+ parser.textNode = textopts(parser.opt, parser.textNode);
603
+ if (parser.textNode)
604
+ emit(parser, "ontext", parser.textNode);
605
+ parser.textNode = "";
606
+ }
607
+ function textopts(opt, text) {
608
+ if (opt.trim)
609
+ text = text.trim();
610
+ if (opt.normalize)
611
+ text = text.replace(/\s+/g, " ");
612
+ return text;
613
+ }
614
+ function error(parser, er) {
615
+ closeText(parser);
616
+ if (parser.trackPosition) {
617
+ er +=
618
+ "\nLine: " +
619
+ parser.line +
620
+ "\nColumn: " +
621
+ parser.column +
622
+ "\nChar: " +
623
+ parser.c;
624
+ }
625
+ er = new Error(er);
626
+ parser.error = er;
627
+ emit(parser, "onerror", er);
628
+ return parser;
629
+ }
630
+ function end(parser) {
631
+ if (parser.sawRoot && !parser.closedRoot)
632
+ strictFail(parser, "Unclosed root tag");
633
+ if (parser.state !== S.BEGIN &&
634
+ parser.state !== S.BEGIN_WHITESPACE &&
635
+ parser.state !== S.TEXT) {
636
+ error(parser, "Unexpected end");
637
+ }
638
+ closeText(parser);
639
+ parser.c = "";
640
+ parser.closed = true;
641
+ emit(parser, "onend");
642
+ SAXParser.call(parser, parser.strict, parser.opt);
643
+ return parser;
644
+ }
645
+ function strictFail(parser, message) {
646
+ if (typeof parser !== "object" || !(parser instanceof SAXParser)) {
647
+ throw new Error("bad call to strictFail");
648
+ }
649
+ if (parser.strict) {
650
+ error(parser, message);
651
+ }
652
+ }
653
+ function newTag(parser) {
654
+ if (!parser.strict)
655
+ parser.tagName = parser.tagName[parser.looseCase]();
656
+ var parent = parser.tags[parser.tags.length - 1] || parser;
657
+ var tag = (parser.tag = { name: parser.tagName, attributes: {} });
658
+ // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
659
+ if (parser.opt.xmlns) {
660
+ tag.ns = parent.ns;
661
+ }
662
+ parser.attribList.length = 0;
663
+ emitNode(parser, "onopentagstart", tag);
664
+ }
665
+ function qname(name, attribute) {
666
+ var i = name.indexOf(":");
667
+ var qualName = i < 0 ? ["", name] : name.split(":");
668
+ var prefix = qualName[0];
669
+ var local = qualName[1];
670
+ // <x "xmlns"="http://foo">
671
+ if (attribute && name === "xmlns") {
672
+ prefix = "xmlns";
673
+ local = "";
674
+ }
675
+ return { prefix: prefix, local: local };
676
+ }
677
+ function attrib(parser) {
678
+ if (!parser.strict) {
679
+ parser.attribName = parser.attribName[parser.looseCase]();
680
+ }
681
+ if (parser.attribList.indexOf(parser.attribName) !== -1 ||
682
+ parser.tag.attributes.hasOwnProperty(parser.attribName)) {
683
+ parser.attribName = parser.attribValue = "";
684
+ return;
685
+ }
686
+ if (parser.opt.xmlns) {
687
+ var qn = qname(parser.attribName, true);
688
+ var prefix = qn.prefix;
689
+ var local = qn.local;
690
+ if (prefix === "xmlns") {
691
+ // namespace binding attribute. push the binding into scope
692
+ if (local === "xml" && parser.attribValue !== XML_NAMESPACE) {
693
+ strictFail(parser, "xml: prefix must be bound to " +
694
+ XML_NAMESPACE +
695
+ "\n" +
696
+ "Actual: " +
697
+ parser.attribValue);
698
+ }
699
+ else if (local === "xmlns" &&
700
+ parser.attribValue !== XMLNS_NAMESPACE) {
701
+ strictFail(parser, "xmlns: prefix must be bound to " +
702
+ XMLNS_NAMESPACE +
703
+ "\n" +
704
+ "Actual: " +
705
+ parser.attribValue);
706
+ }
707
+ else {
708
+ var tag = parser.tag;
709
+ var parent = parser.tags[parser.tags.length - 1] || parser;
710
+ if (tag.ns === parent.ns) {
711
+ tag.ns = Object.create(parent.ns);
712
+ }
713
+ tag.ns[local] = parser.attribValue;
714
+ }
715
+ }
716
+ // defer onattribute events until all attributes have been seen
717
+ // so any new bindings can take effect. preserve attribute order
718
+ // so deferred events can be emitted in document order
719
+ parser.attribList.push([parser.attribName, parser.attribValue]);
720
+ }
721
+ else {
722
+ // in non-xmlns mode, we can emit the event right away
723
+ parser.tag.attributes[parser.attribName] = parser.attribValue;
724
+ emitNode(parser, "onattribute", {
725
+ name: parser.attribName,
726
+ value: parser.attribValue,
727
+ });
728
+ }
729
+ parser.attribName = parser.attribValue = "";
730
+ }
731
+ function openTag(parser, selfClosing) {
732
+ if (parser.opt.xmlns) {
733
+ // emit namespace binding events
734
+ var tag = parser.tag;
735
+ // add namespace info to tag
736
+ var qn = qname(parser.tagName);
737
+ tag.prefix = qn.prefix;
738
+ tag.local = qn.local;
739
+ tag.uri = tag.ns[qn.prefix] || "";
740
+ if (tag.prefix && !tag.uri) {
741
+ strictFail(parser, "Unbound namespace prefix: " + JSON.stringify(parser.tagName));
742
+ tag.uri = qn.prefix;
743
+ }
744
+ var parent = parser.tags[parser.tags.length - 1] || parser;
745
+ if (tag.ns && parent.ns !== tag.ns) {
746
+ Object.keys(tag.ns).forEach(function (p) {
747
+ emitNode(parser, "onopennamespace", {
748
+ prefix: p,
749
+ uri: tag.ns[p],
750
+ });
751
+ });
752
+ }
753
+ // handle deferred onattribute events
754
+ // Note: do not apply default ns to attributes:
755
+ // http://www.w3.org/TR/REC-xml-names/#defaulting
756
+ for (var i = 0, l = parser.attribList.length; i < l; i++) {
757
+ var nv = parser.attribList[i];
758
+ var name = nv[0];
759
+ var value = nv[1];
760
+ var qualName = qname(name, true);
761
+ var prefix = qualName.prefix;
762
+ var local = qualName.local;
763
+ var uri = prefix === "" ? "" : tag.ns[prefix] || "";
764
+ var a = {
765
+ name: name,
766
+ value: value,
767
+ prefix: prefix,
768
+ local: local,
769
+ uri: uri,
770
+ };
771
+ // if there's any attributes with an undefined namespace,
772
+ // then fail on them now.
773
+ if (prefix && prefix !== "xmlns" && !uri) {
774
+ strictFail(parser, "Unbound namespace prefix: " + JSON.stringify(prefix));
775
+ a.uri = prefix;
776
+ }
777
+ parser.tag.attributes[name] = a;
778
+ emitNode(parser, "onattribute", a);
779
+ }
780
+ parser.attribList.length = 0;
781
+ }
782
+ parser.tag.isSelfClosing = !!selfClosing;
783
+ // process the tag
784
+ parser.sawRoot = true;
785
+ parser.tags.push(parser.tag);
786
+ emitNode(parser, "onopentag", parser.tag);
787
+ if (!selfClosing) {
788
+ // special case for <script> in non-strict mode.
789
+ if (!parser.noscript && parser.tagName.toLowerCase() === "script") {
790
+ parser.state = S.SCRIPT;
791
+ }
792
+ else {
793
+ parser.state = S.TEXT;
794
+ }
795
+ parser.tag = null;
796
+ parser.tagName = "";
797
+ }
798
+ parser.attribName = parser.attribValue = "";
799
+ parser.attribList.length = 0;
800
+ }
801
+ function closeTag(parser) {
802
+ if (!parser.tagName) {
803
+ strictFail(parser, "Weird empty close tag.");
804
+ parser.textNode += "</>";
805
+ parser.state = S.TEXT;
806
+ return;
807
+ }
808
+ if (parser.script) {
809
+ if (parser.tagName !== "script") {
810
+ parser.script += "</" + parser.tagName + ">";
811
+ parser.tagName = "";
812
+ parser.state = S.SCRIPT;
813
+ return;
814
+ }
815
+ emitNode(parser, "onscript", parser.script);
816
+ parser.script = "";
817
+ }
818
+ // first make sure that the closing tag actually exists.
819
+ // <a><b></c></b></a> will close everything, otherwise.
820
+ var t = parser.tags.length;
821
+ var tagName = parser.tagName;
822
+ if (!parser.strict) {
823
+ tagName = tagName[parser.looseCase]();
824
+ }
825
+ var closeTo = tagName;
826
+ while (t--) {
827
+ var close = parser.tags[t];
828
+ if (close.name !== closeTo) {
829
+ // fail the first time in strict mode
830
+ strictFail(parser, "Unexpected close tag");
831
+ }
832
+ else {
833
+ break;
834
+ }
835
+ }
836
+ // didn't find it. we already failed for strict, so just abort.
837
+ if (t < 0) {
838
+ strictFail(parser, "Unmatched closing tag: " + parser.tagName);
839
+ parser.textNode += "</" + parser.tagName + ">";
840
+ parser.state = S.TEXT;
841
+ return;
842
+ }
843
+ parser.tagName = tagName;
844
+ var s = parser.tags.length;
845
+ while (s-- > t) {
846
+ var tag = (parser.tag = parser.tags.pop());
847
+ parser.tagName = parser.tag.name;
848
+ emitNode(parser, "onclosetag", parser.tagName);
849
+ var x = {};
850
+ for (var i in tag.ns) {
851
+ x[i] = tag.ns[i];
852
+ }
853
+ var parent = parser.tags[parser.tags.length - 1] || parser;
854
+ if (parser.opt.xmlns && tag.ns !== parent.ns) {
855
+ // remove namespace bindings introduced by tag
856
+ Object.keys(tag.ns).forEach(function (p) {
857
+ var n = tag.ns[p];
858
+ emitNode(parser, "onclosenamespace", { prefix: p, uri: n });
859
+ });
860
+ }
861
+ }
862
+ if (t === 0)
863
+ parser.closedRoot = true;
864
+ parser.tagName = parser.attribValue = parser.attribName = "";
865
+ parser.attribList.length = 0;
866
+ parser.state = S.TEXT;
867
+ }
868
+ function parseEntity(parser) {
869
+ var entity = parser.entity;
870
+ var entityLC = entity.toLowerCase();
871
+ var num;
872
+ var numStr = "";
873
+ if (parser.ENTITIES[entity]) {
874
+ return parser.ENTITIES[entity];
875
+ }
876
+ if (parser.ENTITIES[entityLC]) {
877
+ return parser.ENTITIES[entityLC];
878
+ }
879
+ entity = entityLC;
880
+ if (entity.charAt(0) === "#") {
881
+ if (entity.charAt(1) === "x") {
882
+ entity = entity.slice(2);
883
+ num = parseInt(entity, 16);
884
+ numStr = num.toString(16);
885
+ }
886
+ else {
887
+ entity = entity.slice(1);
888
+ num = parseInt(entity, 10);
889
+ numStr = num.toString(10);
890
+ }
891
+ }
892
+ entity = entity.replace(/^0+/, "");
893
+ if (isNaN(num) || numStr.toLowerCase() !== entity) {
894
+ strictFail(parser, "Invalid character entity");
895
+ return "&" + parser.entity + ";";
896
+ }
897
+ return String.fromCodePoint(num);
898
+ }
899
+ function beginWhiteSpace(parser, c) {
900
+ if (c === "<") {
901
+ parser.state = S.OPEN_WAKA;
902
+ parser.startTagPosition = parser.position;
903
+ }
904
+ else if (!isWhitespace(c)) {
905
+ // have to process this as a text node.
906
+ // weird, but happens.
907
+ strictFail(parser, "Non-whitespace before first tag.");
908
+ parser.textNode = c;
909
+ parser.state = S.TEXT;
910
+ }
911
+ }
912
+ function charAt(chunk, i) {
913
+ var result = "";
914
+ if (i < chunk.length) {
915
+ result = chunk.charAt(i);
916
+ }
917
+ return result;
918
+ }
919
+ function write(chunk) {
920
+ var parser = this;
921
+ if (this.error) {
922
+ throw this.error;
923
+ }
924
+ if (parser.closed) {
925
+ return error(parser, "Cannot write after close. Assign an onready handler.");
926
+ }
927
+ if (chunk === null) {
928
+ return end(parser);
929
+ }
930
+ if (typeof chunk === "object") {
931
+ chunk = chunk.toString();
932
+ }
933
+ var i = 0;
934
+ var c = "";
935
+ while (true) {
936
+ c = charAt(chunk, i++);
937
+ parser.c = c;
938
+ if (!c) {
939
+ break;
940
+ }
941
+ if (parser.trackPosition) {
942
+ parser.position++;
943
+ if (c === "\n") {
944
+ parser.line++;
945
+ parser.column = 0;
946
+ }
947
+ else {
948
+ parser.column++;
949
+ }
950
+ }
951
+ switch (parser.state) {
952
+ case S.BEGIN:
953
+ parser.state = S.BEGIN_WHITESPACE;
954
+ if (c === "\uFEFF") {
955
+ continue;
956
+ }
957
+ beginWhiteSpace(parser, c);
958
+ continue;
959
+ case S.BEGIN_WHITESPACE:
960
+ beginWhiteSpace(parser, c);
961
+ continue;
962
+ case S.TEXT:
963
+ if (parser.sawRoot && !parser.closedRoot) {
964
+ var starti = i - 1;
965
+ while (c && c !== "<" && c !== "&") {
966
+ c = charAt(chunk, i++);
967
+ if (c && parser.trackPosition) {
968
+ parser.position++;
969
+ if (c === "\n") {
970
+ parser.line++;
971
+ parser.column = 0;
972
+ }
973
+ else {
974
+ parser.column++;
975
+ }
976
+ }
977
+ }
978
+ parser.textNode += chunk.substring(starti, i - 1);
979
+ }
980
+ if (c === "<" &&
981
+ !(parser.sawRoot && parser.closedRoot && !parser.strict)) {
982
+ parser.state = S.OPEN_WAKA;
983
+ parser.startTagPosition = parser.position;
984
+ }
985
+ else {
986
+ if (!isWhitespace(c) && (!parser.sawRoot || parser.closedRoot)) {
987
+ strictFail(parser, "Text data outside of root node.");
988
+ }
989
+ if (c === "&") {
990
+ parser.state = S.TEXT_ENTITY;
991
+ }
992
+ else {
993
+ parser.textNode += c;
994
+ }
995
+ }
996
+ continue;
997
+ case S.SCRIPT:
998
+ // only non-strict
999
+ if (c === "<") {
1000
+ parser.state = S.SCRIPT_ENDING;
1001
+ }
1002
+ else {
1003
+ parser.script += c;
1004
+ }
1005
+ continue;
1006
+ case S.SCRIPT_ENDING:
1007
+ if (c === "/") {
1008
+ parser.state = S.CLOSE_TAG;
1009
+ }
1010
+ else {
1011
+ parser.script += "<" + c;
1012
+ parser.state = S.SCRIPT;
1013
+ }
1014
+ continue;
1015
+ case S.OPEN_WAKA:
1016
+ // either a /, ?, !, or text is coming next.
1017
+ if (c === "!") {
1018
+ parser.state = S.SGML_DECL;
1019
+ parser.sgmlDecl = "";
1020
+ }
1021
+ else if (isWhitespace(c)) {
1022
+ // wait for it...
1023
+ }
1024
+ else if (isMatch(nameStart, c)) {
1025
+ parser.state = S.OPEN_TAG;
1026
+ parser.tagName = c;
1027
+ }
1028
+ else if (c === "/") {
1029
+ parser.state = S.CLOSE_TAG;
1030
+ parser.tagName = "";
1031
+ }
1032
+ else if (c === "?") {
1033
+ parser.state = S.PROC_INST;
1034
+ parser.procInstName = parser.procInstBody = "";
1035
+ }
1036
+ else {
1037
+ strictFail(parser, "Unencoded <");
1038
+ // if there was some whitespace, then add that in.
1039
+ if (parser.startTagPosition + 1 < parser.position) {
1040
+ var pad = parser.position - parser.startTagPosition;
1041
+ c = new Array(pad).join(" ") + c;
1042
+ }
1043
+ parser.textNode += "<" + c;
1044
+ parser.state = S.TEXT;
1045
+ }
1046
+ continue;
1047
+ case S.SGML_DECL:
1048
+ if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
1049
+ emitNode(parser, "onopencdata");
1050
+ parser.state = S.CDATA;
1051
+ parser.sgmlDecl = "";
1052
+ parser.cdata = "";
1053
+ }
1054
+ else if (parser.sgmlDecl + c === "--") {
1055
+ parser.state = S.COMMENT;
1056
+ parser.comment = "";
1057
+ parser.sgmlDecl = "";
1058
+ }
1059
+ else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
1060
+ parser.state = S.DOCTYPE;
1061
+ if (parser.doctype || parser.sawRoot) {
1062
+ strictFail(parser, "Inappropriately located doctype declaration");
1063
+ }
1064
+ parser.doctype = "";
1065
+ parser.sgmlDecl = "";
1066
+ }
1067
+ else if (c === ">") {
1068
+ emitNode(parser, "onsgmldeclaration", parser.sgmlDecl);
1069
+ parser.sgmlDecl = "";
1070
+ parser.state = S.TEXT;
1071
+ }
1072
+ else if (isQuote(c)) {
1073
+ parser.state = S.SGML_DECL_QUOTED;
1074
+ parser.sgmlDecl += c;
1075
+ }
1076
+ else {
1077
+ parser.sgmlDecl += c;
1078
+ }
1079
+ continue;
1080
+ case S.SGML_DECL_QUOTED:
1081
+ if (c === parser.q) {
1082
+ parser.state = S.SGML_DECL;
1083
+ parser.q = "";
1084
+ }
1085
+ parser.sgmlDecl += c;
1086
+ continue;
1087
+ case S.DOCTYPE:
1088
+ if (c === ">") {
1089
+ parser.state = S.TEXT;
1090
+ emitNode(parser, "ondoctype", parser.doctype);
1091
+ parser.doctype = true; // just remember that we saw it.
1092
+ }
1093
+ else {
1094
+ parser.doctype += c;
1095
+ if (c === "[") {
1096
+ parser.state = S.DOCTYPE_DTD;
1097
+ }
1098
+ else if (isQuote(c)) {
1099
+ parser.state = S.DOCTYPE_QUOTED;
1100
+ parser.q = c;
1101
+ }
1102
+ }
1103
+ continue;
1104
+ case S.DOCTYPE_QUOTED:
1105
+ parser.doctype += c;
1106
+ if (c === parser.q) {
1107
+ parser.q = "";
1108
+ parser.state = S.DOCTYPE;
1109
+ }
1110
+ continue;
1111
+ case S.DOCTYPE_DTD:
1112
+ parser.doctype += c;
1113
+ if (c === "]") {
1114
+ parser.state = S.DOCTYPE;
1115
+ }
1116
+ else if (isQuote(c)) {
1117
+ parser.state = S.DOCTYPE_DTD_QUOTED;
1118
+ parser.q = c;
1119
+ }
1120
+ continue;
1121
+ case S.DOCTYPE_DTD_QUOTED:
1122
+ parser.doctype += c;
1123
+ if (c === parser.q) {
1124
+ parser.state = S.DOCTYPE_DTD;
1125
+ parser.q = "";
1126
+ }
1127
+ continue;
1128
+ case S.COMMENT:
1129
+ if (c === "-") {
1130
+ parser.state = S.COMMENT_ENDING;
1131
+ }
1132
+ else {
1133
+ parser.comment += c;
1134
+ }
1135
+ continue;
1136
+ case S.COMMENT_ENDING:
1137
+ if (c === "-") {
1138
+ parser.state = S.COMMENT_ENDED;
1139
+ parser.comment = textopts(parser.opt, parser.comment);
1140
+ if (parser.comment) {
1141
+ emitNode(parser, "oncomment", parser.comment);
1142
+ }
1143
+ parser.comment = "";
1144
+ }
1145
+ else {
1146
+ parser.comment += "-" + c;
1147
+ parser.state = S.COMMENT;
1148
+ }
1149
+ continue;
1150
+ case S.COMMENT_ENDED:
1151
+ if (c !== ">") {
1152
+ strictFail(parser, "Malformed comment");
1153
+ // allow <!-- blah -- bloo --> in non-strict mode,
1154
+ // which is a comment of " blah -- bloo "
1155
+ parser.comment += "--" + c;
1156
+ parser.state = S.COMMENT;
1157
+ }
1158
+ else {
1159
+ parser.state = S.TEXT;
1160
+ }
1161
+ continue;
1162
+ case S.CDATA:
1163
+ if (c === "]") {
1164
+ parser.state = S.CDATA_ENDING;
1165
+ }
1166
+ else {
1167
+ parser.cdata += c;
1168
+ }
1169
+ continue;
1170
+ case S.CDATA_ENDING:
1171
+ if (c === "]") {
1172
+ parser.state = S.CDATA_ENDING_2;
1173
+ }
1174
+ else {
1175
+ parser.cdata += "]" + c;
1176
+ parser.state = S.CDATA;
1177
+ }
1178
+ continue;
1179
+ case S.CDATA_ENDING_2:
1180
+ if (c === ">") {
1181
+ if (parser.cdata) {
1182
+ emitNode(parser, "oncdata", parser.cdata);
1183
+ }
1184
+ emitNode(parser, "onclosecdata");
1185
+ parser.cdata = "";
1186
+ parser.state = S.TEXT;
1187
+ }
1188
+ else if (c === "]") {
1189
+ parser.cdata += "]";
1190
+ }
1191
+ else {
1192
+ parser.cdata += "]]" + c;
1193
+ parser.state = S.CDATA;
1194
+ }
1195
+ continue;
1196
+ case S.PROC_INST:
1197
+ if (c === "?") {
1198
+ parser.state = S.PROC_INST_ENDING;
1199
+ }
1200
+ else if (isWhitespace(c)) {
1201
+ parser.state = S.PROC_INST_BODY;
1202
+ }
1203
+ else {
1204
+ parser.procInstName += c;
1205
+ }
1206
+ continue;
1207
+ case S.PROC_INST_BODY:
1208
+ if (!parser.procInstBody && isWhitespace(c)) {
1209
+ continue;
1210
+ }
1211
+ else if (c === "?") {
1212
+ parser.state = S.PROC_INST_ENDING;
1213
+ }
1214
+ else {
1215
+ parser.procInstBody += c;
1216
+ }
1217
+ continue;
1218
+ case S.PROC_INST_ENDING:
1219
+ if (c === ">") {
1220
+ emitNode(parser, "onprocessinginstruction", {
1221
+ name: parser.procInstName,
1222
+ body: parser.procInstBody,
1223
+ });
1224
+ parser.procInstName = parser.procInstBody = "";
1225
+ parser.state = S.TEXT;
1226
+ }
1227
+ else {
1228
+ parser.procInstBody += "?" + c;
1229
+ parser.state = S.PROC_INST_BODY;
1230
+ }
1231
+ continue;
1232
+ case S.OPEN_TAG:
1233
+ if (isMatch(nameBody, c)) {
1234
+ parser.tagName += c;
1235
+ }
1236
+ else {
1237
+ newTag(parser);
1238
+ if (c === ">") {
1239
+ openTag(parser);
1240
+ }
1241
+ else if (c === "/") {
1242
+ parser.state = S.OPEN_TAG_SLASH;
1243
+ }
1244
+ else {
1245
+ if (!isWhitespace(c)) {
1246
+ strictFail(parser, "Invalid character in tag name");
1247
+ }
1248
+ parser.state = S.ATTRIB;
1249
+ }
1250
+ }
1251
+ continue;
1252
+ case S.OPEN_TAG_SLASH:
1253
+ if (c === ">") {
1254
+ openTag(parser, true);
1255
+ closeTag(parser);
1256
+ }
1257
+ else {
1258
+ strictFail(parser, "Forward-slash in opening tag not followed by >");
1259
+ parser.state = S.ATTRIB;
1260
+ }
1261
+ continue;
1262
+ case S.ATTRIB:
1263
+ // haven't read the attribute name yet.
1264
+ if (isWhitespace(c)) {
1265
+ continue;
1266
+ }
1267
+ else if (c === ">") {
1268
+ openTag(parser);
1269
+ }
1270
+ else if (c === "/") {
1271
+ parser.state = S.OPEN_TAG_SLASH;
1272
+ }
1273
+ else if (isMatch(nameStart, c)) {
1274
+ parser.attribName = c;
1275
+ parser.attribValue = "";
1276
+ parser.state = S.ATTRIB_NAME;
1277
+ }
1278
+ else {
1279
+ strictFail(parser, "Invalid attribute name");
1280
+ }
1281
+ continue;
1282
+ case S.ATTRIB_NAME:
1283
+ if (c === "=") {
1284
+ parser.state = S.ATTRIB_VALUE;
1285
+ }
1286
+ else if (c === ">") {
1287
+ strictFail(parser, "Attribute without value");
1288
+ parser.attribValue = parser.attribName;
1289
+ attrib(parser);
1290
+ openTag(parser);
1291
+ }
1292
+ else if (isWhitespace(c)) {
1293
+ parser.state = S.ATTRIB_NAME_SAW_WHITE;
1294
+ }
1295
+ else if (isMatch(nameBody, c)) {
1296
+ parser.attribName += c;
1297
+ }
1298
+ else {
1299
+ strictFail(parser, "Invalid attribute name");
1300
+ }
1301
+ continue;
1302
+ case S.ATTRIB_NAME_SAW_WHITE:
1303
+ if (c === "=") {
1304
+ parser.state = S.ATTRIB_VALUE;
1305
+ }
1306
+ else if (isWhitespace(c)) {
1307
+ continue;
1308
+ }
1309
+ else {
1310
+ strictFail(parser, "Attribute without value");
1311
+ parser.tag.attributes[parser.attribName] = "";
1312
+ parser.attribValue = "";
1313
+ emitNode(parser, "onattribute", {
1314
+ name: parser.attribName,
1315
+ value: "",
1316
+ });
1317
+ parser.attribName = "";
1318
+ if (c === ">") {
1319
+ openTag(parser);
1320
+ }
1321
+ else if (isMatch(nameStart, c)) {
1322
+ parser.attribName = c;
1323
+ parser.state = S.ATTRIB_NAME;
1324
+ }
1325
+ else {
1326
+ strictFail(parser, "Invalid attribute name");
1327
+ parser.state = S.ATTRIB;
1328
+ }
1329
+ }
1330
+ continue;
1331
+ case S.ATTRIB_VALUE:
1332
+ if (isWhitespace(c)) {
1333
+ continue;
1334
+ }
1335
+ else if (isQuote(c)) {
1336
+ parser.q = c;
1337
+ parser.state = S.ATTRIB_VALUE_QUOTED;
1338
+ }
1339
+ else {
1340
+ strictFail(parser, "Unquoted attribute value");
1341
+ parser.state = S.ATTRIB_VALUE_UNQUOTED;
1342
+ parser.attribValue = c;
1343
+ }
1344
+ continue;
1345
+ case S.ATTRIB_VALUE_QUOTED:
1346
+ if (c !== parser.q) {
1347
+ if (c === "&") {
1348
+ parser.state = S.ATTRIB_VALUE_ENTITY_Q;
1349
+ }
1350
+ else {
1351
+ parser.attribValue += c;
1352
+ }
1353
+ continue;
1354
+ }
1355
+ attrib(parser);
1356
+ parser.q = "";
1357
+ parser.state = S.ATTRIB_VALUE_CLOSED;
1358
+ continue;
1359
+ case S.ATTRIB_VALUE_CLOSED:
1360
+ if (isWhitespace(c)) {
1361
+ parser.state = S.ATTRIB;
1362
+ }
1363
+ else if (c === ">") {
1364
+ openTag(parser);
1365
+ }
1366
+ else if (c === "/") {
1367
+ parser.state = S.OPEN_TAG_SLASH;
1368
+ }
1369
+ else if (isMatch(nameStart, c)) {
1370
+ strictFail(parser, "No whitespace between attributes");
1371
+ parser.attribName = c;
1372
+ parser.attribValue = "";
1373
+ parser.state = S.ATTRIB_NAME;
1374
+ }
1375
+ else {
1376
+ strictFail(parser, "Invalid attribute name");
1377
+ }
1378
+ continue;
1379
+ case S.ATTRIB_VALUE_UNQUOTED:
1380
+ if (!isAttribEnd(c)) {
1381
+ if (c === "&") {
1382
+ parser.state = S.ATTRIB_VALUE_ENTITY_U;
1383
+ }
1384
+ else {
1385
+ parser.attribValue += c;
1386
+ }
1387
+ continue;
1388
+ }
1389
+ attrib(parser);
1390
+ if (c === ">") {
1391
+ openTag(parser);
1392
+ }
1393
+ else {
1394
+ parser.state = S.ATTRIB;
1395
+ }
1396
+ continue;
1397
+ case S.CLOSE_TAG:
1398
+ if (!parser.tagName) {
1399
+ if (isWhitespace(c)) {
1400
+ continue;
1401
+ }
1402
+ else if (notMatch(nameStart, c)) {
1403
+ if (parser.script) {
1404
+ parser.script += "</" + c;
1405
+ parser.state = S.SCRIPT;
1406
+ }
1407
+ else {
1408
+ strictFail(parser, "Invalid tagname in closing tag.");
1409
+ }
1410
+ }
1411
+ else {
1412
+ parser.tagName = c;
1413
+ }
1414
+ }
1415
+ else if (c === ">") {
1416
+ closeTag(parser);
1417
+ }
1418
+ else if (isMatch(nameBody, c)) {
1419
+ parser.tagName += c;
1420
+ }
1421
+ else if (parser.script) {
1422
+ parser.script += "</" + parser.tagName;
1423
+ parser.tagName = "";
1424
+ parser.state = S.SCRIPT;
1425
+ }
1426
+ else {
1427
+ if (!isWhitespace(c)) {
1428
+ strictFail(parser, "Invalid tagname in closing tag");
1429
+ }
1430
+ parser.state = S.CLOSE_TAG_SAW_WHITE;
1431
+ }
1432
+ continue;
1433
+ case S.CLOSE_TAG_SAW_WHITE:
1434
+ if (isWhitespace(c)) {
1435
+ continue;
1436
+ }
1437
+ if (c === ">") {
1438
+ closeTag(parser);
1439
+ }
1440
+ else {
1441
+ strictFail(parser, "Invalid characters in closing tag");
1442
+ }
1443
+ continue;
1444
+ case S.TEXT_ENTITY:
1445
+ case S.ATTRIB_VALUE_ENTITY_Q:
1446
+ case S.ATTRIB_VALUE_ENTITY_U:
1447
+ var returnState;
1448
+ var buffer;
1449
+ switch (parser.state) {
1450
+ case S.TEXT_ENTITY:
1451
+ returnState = S.TEXT;
1452
+ buffer = "textNode";
1453
+ break;
1454
+ case S.ATTRIB_VALUE_ENTITY_Q:
1455
+ returnState = S.ATTRIB_VALUE_QUOTED;
1456
+ buffer = "attribValue";
1457
+ break;
1458
+ case S.ATTRIB_VALUE_ENTITY_U:
1459
+ returnState = S.ATTRIB_VALUE_UNQUOTED;
1460
+ buffer = "attribValue";
1461
+ break;
1462
+ }
1463
+ if (c === ";") {
1464
+ if (parser.opt.unparsedEntities) {
1465
+ var parsedEntity = parseEntity(parser);
1466
+ parser.entity = "";
1467
+ parser.state = returnState;
1468
+ parser.write(parsedEntity);
1469
+ }
1470
+ else {
1471
+ parser[buffer] += parseEntity(parser);
1472
+ parser.entity = "";
1473
+ parser.state = returnState;
1474
+ }
1475
+ }
1476
+ else if (isMatch(parser.entity.length ? entityBody : entityStart, c)) {
1477
+ parser.entity += c;
1478
+ }
1479
+ else {
1480
+ strictFail(parser, "Invalid character in entity name");
1481
+ parser[buffer] += "&" + parser.entity + c;
1482
+ parser.entity = "";
1483
+ parser.state = returnState;
1484
+ }
1485
+ continue;
1486
+ default: /* istanbul ignore next */ {
1487
+ throw new Error(parser, "Unknown state: " + parser.state);
1488
+ }
1489
+ }
1490
+ } // while
1491
+ if (parser.position >= parser.bufferCheckPosition) {
1492
+ checkBufferLength(parser);
1493
+ }
1494
+ return parser;
1495
+ }
1496
+ /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
1497
+ /* istanbul ignore next */
1498
+ if (!String.fromCodePoint) {
1499
+ (function () {
1500
+ var stringFromCharCode = String.fromCharCode;
1501
+ var floor = Math.floor;
1502
+ var fromCodePoint = function () {
1503
+ var MAX_SIZE = 0x4000;
1504
+ var codeUnits = [];
1505
+ var highSurrogate;
1506
+ var lowSurrogate;
1507
+ var index = -1;
1508
+ var length = arguments.length;
1509
+ if (!length) {
1510
+ return "";
1511
+ }
1512
+ var result = "";
1513
+ while (++index < length) {
1514
+ var codePoint = Number(arguments[index]);
1515
+ if (!isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
1516
+ codePoint < 0 || // not a valid Unicode code point
1517
+ codePoint > 0x10ffff || // not a valid Unicode code point
1518
+ floor(codePoint) !== codePoint // not an integer
1519
+ ) {
1520
+ throw RangeError("Invalid code point: " + codePoint);
1521
+ }
1522
+ if (codePoint <= 0xffff) {
1523
+ // BMP code point
1524
+ codeUnits.push(codePoint);
1525
+ }
1526
+ else {
1527
+ // Astral code point; split in surrogate halves
1528
+ // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
1529
+ codePoint -= 0x10000;
1530
+ highSurrogate = (codePoint >> 10) + 0xd800;
1531
+ lowSurrogate = (codePoint % 0x400) + 0xdc00;
1532
+ codeUnits.push(highSurrogate, lowSurrogate);
1533
+ }
1534
+ if (index + 1 === length || codeUnits.length > MAX_SIZE) {
1535
+ result += stringFromCharCode.apply(null, codeUnits);
1536
+ codeUnits.length = 0;
1537
+ }
1538
+ }
1539
+ return result;
1540
+ };
1541
+ /* istanbul ignore next */
1542
+ if (Object.defineProperty) {
1543
+ Object.defineProperty(String, "fromCodePoint", {
1544
+ value: fromCodePoint,
1545
+ configurable: true,
1546
+ writable: true,
1547
+ });
1548
+ }
1549
+ else {
1550
+ String.fromCodePoint = fromCodePoint;
1551
+ }
1552
+ })();
1553
+ }
1554
+ return sax;
1555
+ };
1556
+ const sax = /** #__PURE__ */ initializeSax();
1557
+ exports.sax = sax;