xml-sax-ts 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -44,7 +44,6 @@ function decodeEntities(input, onError) {
44
44
  const semi = input.indexOf(";", amp + 1);
45
45
  if (semi === -1) {
46
46
  const err = new XmlSaxError("Unterminated entity", amp, 0, 0);
47
- onError?.(err);
48
47
  throw err;
49
48
  }
50
49
  let decoded;
@@ -58,7 +57,6 @@ function decodeEntities(input, onError) {
58
57
  if (decoded === void 0) {
59
58
  const entity = input.slice(amp + 1, semi);
60
59
  const err = new XmlSaxError(`Unknown entity: &${entity};`, amp, 0, 0);
61
- onError?.(err);
62
60
  throw err;
63
61
  }
64
62
  result += decoded;
@@ -169,12 +167,71 @@ function splitTextForEntities(text) {
169
167
  return { emit: text, carry: "" };
170
168
  }
171
169
 
170
+ // src/tokens.ts
171
+ var XmlToken = class {
172
+ constructor(kind, position) {
173
+ this.kind = kind;
174
+ this.position = position;
175
+ }
176
+ };
177
+ var OpenTagToken = class extends XmlToken {
178
+ constructor(tag, depth, path, position) {
179
+ super("open-tag", position);
180
+ this.tag = tag;
181
+ this.depth = depth;
182
+ this.path = path;
183
+ }
184
+ };
185
+ var CloseTagToken = class extends XmlToken {
186
+ constructor(tag, depth, path, position) {
187
+ super("close-tag", position);
188
+ this.tag = tag;
189
+ this.depth = depth;
190
+ this.path = path;
191
+ }
192
+ };
193
+ var TextToken = class extends XmlToken {
194
+ constructor(text, position) {
195
+ super("text", position);
196
+ this.text = text;
197
+ }
198
+ };
199
+ var CdataToken = class extends XmlToken {
200
+ constructor(text, position) {
201
+ super("cdata", position);
202
+ this.text = text;
203
+ }
204
+ };
205
+ var CommentToken = class extends XmlToken {
206
+ constructor(text, position) {
207
+ super("comment", position);
208
+ this.text = text;
209
+ }
210
+ };
211
+ var ProcessingInstructionToken = class extends XmlToken {
212
+ constructor(processingInstruction, position) {
213
+ super("processing-instruction", position);
214
+ this.processingInstruction = processingInstruction;
215
+ }
216
+ };
217
+ var DoctypeToken = class extends XmlToken {
218
+ constructor(doctype, position) {
219
+ super("doctype", position);
220
+ this.doctype = doctype;
221
+ }
222
+ };
223
+ var EndToken = class extends XmlToken {
224
+ constructor(position) {
225
+ super("end", position);
226
+ }
227
+ };
228
+
172
229
  // src/parser.ts
173
230
  var DEFAULT_OPTIONS = {
174
231
  xmlns: true,
175
232
  includeNamespaceAttributes: false,
176
233
  allowDoctype: true,
177
- coalesceText: false,
234
+ coalesceText: true,
178
235
  trackPosition: true
179
236
  };
180
237
  var XML_NAMESPACE_URI = "http://www.w3.org/XML/1998/namespace";
@@ -205,6 +262,8 @@ var XmlSaxParser = class {
205
262
  this.offset = 0;
206
263
  this.line = 1;
207
264
  this.column = 1;
265
+ this.pathStack = [];
266
+ this.tokenQueue = [];
208
267
  this.elementStack = [];
209
268
  this.nsStack = [
210
269
  Object.assign(/* @__PURE__ */ Object.create(null), {
@@ -222,28 +281,21 @@ var XmlSaxParser = class {
222
281
  this.allowDoctype = resolved.allowDoctype;
223
282
  this.coalesceText = resolved.coalesceText;
224
283
  this.trackPosition = resolved.trackPosition;
225
- this.onOpenTag = resolved.onOpenTag;
226
- this.onCloseTag = resolved.onCloseTag;
227
- this.onText = resolved.onText;
228
- this.onCdata = resolved.onCdata;
229
- this.onComment = resolved.onComment;
230
- this.onProcessingInstruction = resolved.onProcessingInstruction;
231
- this.onDoctype = resolved.onDoctype;
232
- this.onError = resolved.onError;
233
284
  }
234
285
  feed(chunk) {
235
286
  if (this.closed) {
236
287
  this._error("Parser is closed");
237
288
  }
238
289
  if (!chunk) {
239
- return;
290
+ return this.drainTokens();
240
291
  }
241
292
  this.buffer += chunk;
242
293
  this._parseBuffer(false);
294
+ return this.drainTokens();
243
295
  }
244
296
  close() {
245
297
  if (this.closed) {
246
- return;
298
+ return this.drainTokens();
247
299
  }
248
300
  this._parseBuffer(true);
249
301
  this._flushPendingCR();
@@ -255,6 +307,30 @@ var XmlSaxParser = class {
255
307
  this._error("Unclosed tag(s) remaining");
256
308
  }
257
309
  this.closed = true;
310
+ this._pushToken(new EndToken(this._position()));
311
+ return this.drainTokens();
312
+ }
313
+ drainTokens() {
314
+ if (this.tokenQueue.length === 0) {
315
+ return [];
316
+ }
317
+ return this.tokenQueue.splice(0, this.tokenQueue.length);
318
+ }
319
+ *[Symbol.iterator]() {
320
+ while (this.tokenQueue.length > 0) {
321
+ const token = this.tokenQueue.shift();
322
+ if (token) {
323
+ yield token;
324
+ }
325
+ }
326
+ }
327
+ async *iterateChunks(chunks) {
328
+ for await (const chunk of chunks) {
329
+ this.feed(chunk);
330
+ yield* this;
331
+ }
332
+ this.close();
333
+ yield* this;
258
334
  }
259
335
  _parseBuffer(final) {
260
336
  let i = 0;
@@ -317,7 +393,7 @@ var XmlSaxParser = class {
317
393
  const data = split === -1 ? "" : body.slice(split).trim();
318
394
  const pi = { target, body: data };
319
395
  this._flushTextBuffer();
320
- this.onProcessingInstruction?.(pi);
396
+ this._pushToken(new ProcessingInstructionToken(pi, this._position()));
321
397
  return end + 2 - start;
322
398
  }
323
399
  if (secondCode === 33) {
@@ -332,7 +408,7 @@ var XmlSaxParser = class {
332
408
  }
333
409
  const comment = this.buffer.slice(start + 4, end);
334
410
  this._flushTextBuffer();
335
- this.onComment?.(comment);
411
+ this._pushToken(new CommentToken(comment, this._position()));
336
412
  return end + 3 - start;
337
413
  }
338
414
  if (thirdCode === 91 && this.buffer.startsWith("<![CDATA[", start)) {
@@ -347,7 +423,7 @@ var XmlSaxParser = class {
347
423
  const normalized = this._normalizeText(cdata, false);
348
424
  if (normalized.length > 0) {
349
425
  this._flushTextBuffer();
350
- this.onCdata?.(normalized);
426
+ this._pushToken(new CdataToken(normalized, this._position()));
351
427
  }
352
428
  return end + 3 - start;
353
429
  }
@@ -365,7 +441,7 @@ var XmlSaxParser = class {
365
441
  const raw = this.buffer.slice(start + 9, end).trim();
366
442
  const doctype = { raw };
367
443
  this._flushTextBuffer();
368
- this.onDoctype?.(doctype);
444
+ this._pushToken(new DoctypeToken(doctype, this._position()));
369
445
  return end + 1 - start;
370
446
  }
371
447
  }
@@ -411,15 +487,18 @@ var XmlSaxParser = class {
411
487
  attributes: attributes2,
412
488
  isSelfClosing: selfClosing
413
489
  };
414
- this.onOpenTag?.(tag2);
490
+ const openPath2 = Object.freeze([...this.pathStack, plainName]);
491
+ const depth2 = openPath2.length;
492
+ this._pushToken(new OpenTagToken(tag2, depth2, openPath2, this._position()));
415
493
  if (selfClosing) {
416
- this.onCloseTag?.({ name: plainName });
494
+ this._pushToken(new CloseTagToken({ name: plainName }, depth2, openPath2, this._position()));
417
495
  return;
418
496
  }
419
497
  this.elementStack.push({
420
498
  rawName: parsed.name,
421
499
  closeTag: { name: plainName }
422
500
  });
501
+ this.pathStack.push(plainName);
423
502
  return;
424
503
  }
425
504
  const parentNs = this._currentNs();
@@ -462,14 +541,23 @@ var XmlSaxParser = class {
462
541
  attributes,
463
542
  isSelfClosing: selfClosing
464
543
  };
465
- this.onOpenTag?.(tag);
544
+ const openPath = Object.freeze([...this.pathStack, resolvedName.name]);
545
+ const depth = openPath.length;
546
+ this._pushToken(new OpenTagToken(tag, depth, openPath, this._position()));
466
547
  if (selfClosing) {
467
- this.onCloseTag?.({
468
- name: resolvedName.name,
469
- prefix: resolvedName.prefix,
470
- local: resolvedName.local,
471
- uri: resolvedName.uri
472
- });
548
+ this._pushToken(
549
+ new CloseTagToken(
550
+ {
551
+ name: resolvedName.name,
552
+ prefix: resolvedName.prefix,
553
+ local: resolvedName.local,
554
+ uri: resolvedName.uri
555
+ },
556
+ depth,
557
+ openPath,
558
+ this._position()
559
+ )
560
+ );
473
561
  return;
474
562
  }
475
563
  this.elementStack.push({
@@ -481,6 +569,7 @@ var XmlSaxParser = class {
481
569
  uri: resolvedName.uri
482
570
  }
483
571
  });
572
+ this.pathStack.push(resolvedName.name);
484
573
  this.nsStack.push(ns);
485
574
  }
486
575
  _parseStartTagRange(start, end) {
@@ -524,7 +613,7 @@ var XmlSaxParser = class {
524
613
  }
525
614
  const rawValue = this.buffer.slice(i, valueEnd);
526
615
  const normalized = rawValue.includes("\r") ? rawValue.replace(CRLF_RE, "\n") : rawValue;
527
- const value = !normalized.includes("&") ? normalized : decodeEntities(normalized, this.onError);
616
+ const value = !normalized.includes("&") ? normalized : decodeEntities(normalized);
528
617
  attributes.push({ name: attrName.name, value });
529
618
  i = valueEnd + 1;
530
619
  }
@@ -540,7 +629,12 @@ var XmlSaxParser = class {
540
629
  if (entry.rawName !== rawName) {
541
630
  this._error(`Mismatched closing tag: expected </${entry.rawName}>`);
542
631
  }
543
- this.onCloseTag?.(entry.closeTag);
632
+ const closePath = Object.freeze([...this.pathStack]);
633
+ const depth = closePath.length;
634
+ if (depth > 0) {
635
+ this.pathStack.pop();
636
+ }
637
+ this._pushToken(new CloseTagToken(entry.closeTag, depth, closePath, this._position()));
544
638
  }
545
639
  _emitText(text, allowPendingCR) {
546
640
  const normalized = this._normalizeText(text, allowPendingCR);
@@ -551,14 +645,14 @@ var XmlSaxParser = class {
551
645
  this._emitDecodedText(normalized);
552
646
  return;
553
647
  }
554
- const decoded = decodeEntities(normalized, this.onError);
648
+ const decoded = decodeEntities(normalized);
555
649
  if (decoded.length > 0) {
556
650
  this._emitDecodedText(decoded);
557
651
  }
558
652
  }
559
653
  _emitDecodedText(text) {
560
654
  if (!this.coalesceText) {
561
- this.onText?.(text);
655
+ this._pushToken(new TextToken(text, this._position()));
562
656
  return;
563
657
  }
564
658
  this.pendingTextParts.push(text);
@@ -570,7 +664,7 @@ var XmlSaxParser = class {
570
664
  const first = this.pendingTextParts[0];
571
665
  const text = this.pendingTextParts.length === 1 && first !== void 0 ? first : this.pendingTextParts.join("");
572
666
  this.pendingTextParts.length = 0;
573
- this.onText?.(text);
667
+ this._pushToken(new TextToken(text, this._position()));
574
668
  }
575
669
  _resolveName(rawName, ns) {
576
670
  if (!this.xmlns) {
@@ -807,55 +901,88 @@ var XmlSaxParser = class {
807
901
  const line = this.trackPosition ? this.line : 0;
808
902
  const column = this.trackPosition ? this.column : 0;
809
903
  const error = new XmlSaxError(message, this.offset, line, column);
810
- this.onError?.(error);
811
904
  throw error;
812
905
  }
906
+ _position() {
907
+ return {
908
+ offset: this.offset,
909
+ line: this.trackPosition ? this.line : 0,
910
+ column: this.trackPosition ? this.column : 0
911
+ };
912
+ }
913
+ _pushToken(token) {
914
+ this.tokenQueue.push(token);
915
+ }
813
916
  };
917
+ function tokenizeXml(xml, options = {}) {
918
+ const parser = new XmlSaxParser(options);
919
+ parser.feed(xml);
920
+ return parser.close();
921
+ }
922
+ async function* tokenizeXmlAsync(chunks, options = {}) {
923
+ const parser = new XmlSaxParser(options);
924
+ yield* parser.iterateChunks(chunks);
925
+ }
814
926
 
815
927
  // src/tree.ts
816
928
  var TreeBuilder = class {
817
929
  constructor() {
818
930
  this.stack = [];
819
931
  this.root = null;
820
- this.onOpenTag = (tag) => {
821
- const node = {
822
- name: tag.name,
823
- attributes: Object.fromEntries(
824
- Object.entries(tag.attributes).map(([key, attr]) => [key, typeof attr === "string" ? attr : attr.value])
825
- ),
826
- children: []
827
- };
828
- const parent = this.stack[this.stack.length - 1];
829
- if (parent) {
830
- parent.children?.push(node);
831
- } else {
832
- this.root = node;
833
- }
834
- this.stack.push(node);
835
- };
836
- this.onText = (text) => {
837
- if (!this.stack.length) {
838
- return;
839
- }
840
- const node = this.stack[this.stack.length - 1];
841
- if (!node) {
842
- return;
843
- }
844
- const children = node.children ?? [];
845
- const last = children[children.length - 1];
846
- if (typeof last === "string") {
847
- children[children.length - 1] = last + text;
848
- } else {
849
- children.push(text);
850
- }
851
- node.children = children;
852
- };
853
- this.onCdata = (text) => {
854
- this.onText(text);
855
- };
856
- this.onCloseTag = () => {
857
- this.stack.pop();
932
+ }
933
+ onOpenTag(tag) {
934
+ const node = {
935
+ name: tag.name,
936
+ attributes: Object.fromEntries(
937
+ Object.entries(tag.attributes).map(([key, attr]) => [key, typeof attr === "string" ? attr : attr.value])
938
+ ),
939
+ children: []
858
940
  };
941
+ const parent = this.stack[this.stack.length - 1];
942
+ if (parent) {
943
+ parent.children?.push(node);
944
+ } else {
945
+ this.root = node;
946
+ }
947
+ this.stack.push(node);
948
+ }
949
+ onText(text) {
950
+ if (!this.stack.length) {
951
+ return;
952
+ }
953
+ const node = this.stack[this.stack.length - 1];
954
+ if (!node) {
955
+ return;
956
+ }
957
+ const children = node.children ?? [];
958
+ const last = children[children.length - 1];
959
+ if (typeof last === "string") {
960
+ children[children.length - 1] = last + text;
961
+ } else {
962
+ children.push(text);
963
+ }
964
+ node.children = children;
965
+ }
966
+ onCdata(text) {
967
+ this.onText(text);
968
+ }
969
+ onCloseTag() {
970
+ this.stack.pop();
971
+ }
972
+ consume(token) {
973
+ if (token instanceof OpenTagToken) {
974
+ this.onOpenTag(token.tag);
975
+ return;
976
+ }
977
+ if (token instanceof TextToken) {
978
+ this.onText(token.text);
979
+ return;
980
+ }
981
+ if (token instanceof CdataToken) {
982
+ this.onCdata(token.text);
983
+ return;
984
+ }
985
+ this.onCloseTag();
859
986
  }
860
987
  getRoot() {
861
988
  if (!this.root) {
@@ -866,15 +993,17 @@ var TreeBuilder = class {
866
993
  };
867
994
  function parseXmlString(xml, options = {}) {
868
995
  const builder = new TreeBuilder();
869
- const parser = new XmlSaxParser({
870
- ...options,
871
- onOpenTag: builder.onOpenTag,
872
- onText: builder.onText,
873
- onCdata: builder.onCdata,
874
- onCloseTag: builder.onCloseTag
875
- });
876
- parser.feed(xml);
877
- parser.close();
996
+ const parser = new XmlSaxParser(options);
997
+ for (const token of parser.feed(xml)) {
998
+ if (token instanceof OpenTagToken || token instanceof TextToken || token instanceof CdataToken || token instanceof CloseTagToken) {
999
+ builder.consume(token);
1000
+ }
1001
+ }
1002
+ for (const token of parser.close()) {
1003
+ if (token instanceof OpenTagToken || token instanceof TextToken || token instanceof CdataToken || token instanceof CloseTagToken) {
1004
+ builder.consume(token);
1005
+ }
1006
+ }
878
1007
  return builder.getRoot();
879
1008
  }
880
1009
 
@@ -1000,47 +1129,62 @@ var ObjectBuilder = class {
1000
1129
  this.stack = [];
1001
1130
  this.root = null;
1002
1131
  this.rootName = null;
1003
- this.onOpenTag = (tag) => {
1004
- const name = normalizeName(tag.name, this.options);
1005
- const attributes = normalizeAttributes(tag.attributes, this.options);
1006
- const state = {
1007
- name,
1008
- attributes,
1009
- textParts: [],
1010
- children: /* @__PURE__ */ Object.create(null)
1011
- };
1012
- this.rootName ?? (this.rootName = name);
1013
- this.stack.push(state);
1014
- };
1015
- this.onText = (text) => {
1016
- if (!text) {
1017
- return;
1018
- }
1019
- const current = this.stack[this.stack.length - 1];
1020
- if (!current) {
1021
- return;
1022
- }
1023
- current.textParts.push(text);
1024
- };
1025
- this.onCdata = (text) => {
1026
- this.onText(text);
1027
- };
1028
- this.onCloseTag = () => {
1029
- const state = this.stack.pop();
1030
- if (!state) {
1031
- return;
1032
- }
1033
- const value = finalizeElement(state, this.options);
1034
- const parent = this.stack[this.stack.length - 1];
1035
- if (!parent) {
1036
- this.root = value;
1037
- return;
1038
- }
1039
- const path = this.stack.map((entry) => entry.name);
1040
- addChild(parent.children, state.name, value, this.options, path);
1041
- };
1042
1132
  this.options = buildSettings(options);
1043
1133
  }
1134
+ onOpenTag(tag) {
1135
+ const name = normalizeName(tag.name, this.options);
1136
+ const attributes = normalizeAttributes(tag.attributes, this.options);
1137
+ const state = {
1138
+ name,
1139
+ attributes,
1140
+ textParts: [],
1141
+ children: /* @__PURE__ */ Object.create(null)
1142
+ };
1143
+ this.rootName ?? (this.rootName = name);
1144
+ this.stack.push(state);
1145
+ }
1146
+ onText(text) {
1147
+ if (!text) {
1148
+ return;
1149
+ }
1150
+ const current = this.stack[this.stack.length - 1];
1151
+ if (!current) {
1152
+ return;
1153
+ }
1154
+ current.textParts.push(text);
1155
+ }
1156
+ onCdata(text) {
1157
+ this.onText(text);
1158
+ }
1159
+ onCloseTag() {
1160
+ const state = this.stack.pop();
1161
+ if (!state) {
1162
+ return;
1163
+ }
1164
+ const value = finalizeElement(state, this.options);
1165
+ const parent = this.stack[this.stack.length - 1];
1166
+ if (!parent) {
1167
+ this.root = value;
1168
+ return;
1169
+ }
1170
+ const path = this.stack.map((entry) => entry.name);
1171
+ addChild(parent.children, state.name, value, this.options, path);
1172
+ }
1173
+ consume(token) {
1174
+ if (token instanceof OpenTagToken) {
1175
+ this.onOpenTag(token.tag);
1176
+ return;
1177
+ }
1178
+ if (token instanceof TextToken) {
1179
+ this.onText(token.text);
1180
+ return;
1181
+ }
1182
+ if (token instanceof CdataToken) {
1183
+ this.onCdata(token.text);
1184
+ return;
1185
+ }
1186
+ this.onCloseTag();
1187
+ }
1044
1188
  getResult() {
1045
1189
  if (this.root === null) {
1046
1190
  throw new Error("No root element found");
@@ -1302,10 +1446,19 @@ function finalizeElement(state, options) {
1302
1446
  return result;
1303
1447
  }
1304
1448
 
1449
+ exports.CdataToken = CdataToken;
1450
+ exports.CloseTagToken = CloseTagToken;
1451
+ exports.CommentToken = CommentToken;
1452
+ exports.DoctypeToken = DoctypeToken;
1453
+ exports.EndToken = EndToken;
1305
1454
  exports.ObjectBuilder = ObjectBuilder;
1455
+ exports.OpenTagToken = OpenTagToken;
1456
+ exports.ProcessingInstructionToken = ProcessingInstructionToken;
1457
+ exports.TextToken = TextToken;
1306
1458
  exports.TreeBuilder = TreeBuilder;
1307
1459
  exports.XmlSaxError = XmlSaxError;
1308
1460
  exports.XmlSaxParser = XmlSaxParser;
1461
+ exports.XmlToken = XmlToken;
1309
1462
  exports.buildObject = buildObject;
1310
1463
  exports.buildXmlNode = buildXmlNode;
1311
1464
  exports.objectToXml = objectToXml;
@@ -1313,5 +1466,7 @@ exports.parseXmlString = parseXmlString;
1313
1466
  exports.resolveName = resolveName;
1314
1467
  exports.serializeXml = serializeXml;
1315
1468
  exports.stripNamespace = stripNamespace;
1469
+ exports.tokenizeXml = tokenizeXml;
1470
+ exports.tokenizeXmlAsync = tokenizeXmlAsync;
1316
1471
  //# sourceMappingURL=index.cjs.map
1317
1472
  //# sourceMappingURL=index.cjs.map