xml-sax-ts 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -42,7 +42,6 @@ function decodeEntities(input, onError) {
42
42
  const semi = input.indexOf(";", amp + 1);
43
43
  if (semi === -1) {
44
44
  const err = new XmlSaxError("Unterminated entity", amp, 0, 0);
45
- onError?.(err);
46
45
  throw err;
47
46
  }
48
47
  let decoded;
@@ -56,7 +55,6 @@ function decodeEntities(input, onError) {
56
55
  if (decoded === void 0) {
57
56
  const entity = input.slice(amp + 1, semi);
58
57
  const err = new XmlSaxError(`Unknown entity: &${entity};`, amp, 0, 0);
59
- onError?.(err);
60
58
  throw err;
61
59
  }
62
60
  result += decoded;
@@ -167,12 +165,71 @@ function splitTextForEntities(text) {
167
165
  return { emit: text, carry: "" };
168
166
  }
169
167
 
168
+ // src/tokens.ts
169
+ var XmlToken = class {
170
+ constructor(kind, position) {
171
+ this.kind = kind;
172
+ this.position = position;
173
+ }
174
+ };
175
+ var OpenTagToken = class extends XmlToken {
176
+ constructor(tag, depth, path, position) {
177
+ super("open-tag", position);
178
+ this.tag = tag;
179
+ this.depth = depth;
180
+ this.path = path;
181
+ }
182
+ };
183
+ var CloseTagToken = class extends XmlToken {
184
+ constructor(tag, depth, path, position) {
185
+ super("close-tag", position);
186
+ this.tag = tag;
187
+ this.depth = depth;
188
+ this.path = path;
189
+ }
190
+ };
191
+ var TextToken = class extends XmlToken {
192
+ constructor(text, position) {
193
+ super("text", position);
194
+ this.text = text;
195
+ }
196
+ };
197
+ var CdataToken = class extends XmlToken {
198
+ constructor(text, position) {
199
+ super("cdata", position);
200
+ this.text = text;
201
+ }
202
+ };
203
+ var CommentToken = class extends XmlToken {
204
+ constructor(text, position) {
205
+ super("comment", position);
206
+ this.text = text;
207
+ }
208
+ };
209
+ var ProcessingInstructionToken = class extends XmlToken {
210
+ constructor(processingInstruction, position) {
211
+ super("processing-instruction", position);
212
+ this.processingInstruction = processingInstruction;
213
+ }
214
+ };
215
+ var DoctypeToken = class extends XmlToken {
216
+ constructor(doctype, position) {
217
+ super("doctype", position);
218
+ this.doctype = doctype;
219
+ }
220
+ };
221
+ var EndToken = class extends XmlToken {
222
+ constructor(position) {
223
+ super("end", position);
224
+ }
225
+ };
226
+
170
227
  // src/parser.ts
171
228
  var DEFAULT_OPTIONS = {
172
229
  xmlns: true,
173
230
  includeNamespaceAttributes: false,
174
231
  allowDoctype: true,
175
- coalesceText: false,
232
+ coalesceText: true,
176
233
  trackPosition: true
177
234
  };
178
235
  var XML_NAMESPACE_URI = "http://www.w3.org/XML/1998/namespace";
@@ -203,6 +260,8 @@ var XmlSaxParser = class {
203
260
  this.offset = 0;
204
261
  this.line = 1;
205
262
  this.column = 1;
263
+ this.pathStack = [];
264
+ this.tokenQueue = [];
206
265
  this.elementStack = [];
207
266
  this.nsStack = [
208
267
  Object.assign(/* @__PURE__ */ Object.create(null), {
@@ -220,28 +279,21 @@ var XmlSaxParser = class {
220
279
  this.allowDoctype = resolved.allowDoctype;
221
280
  this.coalesceText = resolved.coalesceText;
222
281
  this.trackPosition = resolved.trackPosition;
223
- this.onOpenTag = resolved.onOpenTag;
224
- this.onCloseTag = resolved.onCloseTag;
225
- this.onText = resolved.onText;
226
- this.onCdata = resolved.onCdata;
227
- this.onComment = resolved.onComment;
228
- this.onProcessingInstruction = resolved.onProcessingInstruction;
229
- this.onDoctype = resolved.onDoctype;
230
- this.onError = resolved.onError;
231
282
  }
232
283
  feed(chunk) {
233
284
  if (this.closed) {
234
285
  this._error("Parser is closed");
235
286
  }
236
287
  if (!chunk) {
237
- return;
288
+ return this.drainTokens();
238
289
  }
239
290
  this.buffer += chunk;
240
291
  this._parseBuffer(false);
292
+ return this.drainTokens();
241
293
  }
242
294
  close() {
243
295
  if (this.closed) {
244
- return;
296
+ return this.drainTokens();
245
297
  }
246
298
  this._parseBuffer(true);
247
299
  this._flushPendingCR();
@@ -253,6 +305,30 @@ var XmlSaxParser = class {
253
305
  this._error("Unclosed tag(s) remaining");
254
306
  }
255
307
  this.closed = true;
308
+ this._pushToken(new EndToken(this._position()));
309
+ return this.drainTokens();
310
+ }
311
+ drainTokens() {
312
+ if (this.tokenQueue.length === 0) {
313
+ return [];
314
+ }
315
+ return this.tokenQueue.splice(0, this.tokenQueue.length);
316
+ }
317
+ *[Symbol.iterator]() {
318
+ while (this.tokenQueue.length > 0) {
319
+ const token = this.tokenQueue.shift();
320
+ if (token) {
321
+ yield token;
322
+ }
323
+ }
324
+ }
325
+ async *iterateChunks(chunks) {
326
+ for await (const chunk of chunks) {
327
+ this.feed(chunk);
328
+ yield* this;
329
+ }
330
+ this.close();
331
+ yield* this;
256
332
  }
257
333
  _parseBuffer(final) {
258
334
  let i = 0;
@@ -315,7 +391,7 @@ var XmlSaxParser = class {
315
391
  const data = split === -1 ? "" : body.slice(split).trim();
316
392
  const pi = { target, body: data };
317
393
  this._flushTextBuffer();
318
- this.onProcessingInstruction?.(pi);
394
+ this._pushToken(new ProcessingInstructionToken(pi, this._position()));
319
395
  return end + 2 - start;
320
396
  }
321
397
  if (secondCode === 33) {
@@ -330,7 +406,7 @@ var XmlSaxParser = class {
330
406
  }
331
407
  const comment = this.buffer.slice(start + 4, end);
332
408
  this._flushTextBuffer();
333
- this.onComment?.(comment);
409
+ this._pushToken(new CommentToken(comment, this._position()));
334
410
  return end + 3 - start;
335
411
  }
336
412
  if (thirdCode === 91 && this.buffer.startsWith("<![CDATA[", start)) {
@@ -345,7 +421,7 @@ var XmlSaxParser = class {
345
421
  const normalized = this._normalizeText(cdata, false);
346
422
  if (normalized.length > 0) {
347
423
  this._flushTextBuffer();
348
- this.onCdata?.(normalized);
424
+ this._pushToken(new CdataToken(normalized, this._position()));
349
425
  }
350
426
  return end + 3 - start;
351
427
  }
@@ -363,7 +439,7 @@ var XmlSaxParser = class {
363
439
  const raw = this.buffer.slice(start + 9, end).trim();
364
440
  const doctype = { raw };
365
441
  this._flushTextBuffer();
366
- this.onDoctype?.(doctype);
442
+ this._pushToken(new DoctypeToken(doctype, this._position()));
367
443
  return end + 1 - start;
368
444
  }
369
445
  }
@@ -409,15 +485,18 @@ var XmlSaxParser = class {
409
485
  attributes: attributes2,
410
486
  isSelfClosing: selfClosing
411
487
  };
412
- this.onOpenTag?.(tag2);
488
+ const openPath2 = Object.freeze([...this.pathStack, plainName]);
489
+ const depth2 = openPath2.length;
490
+ this._pushToken(new OpenTagToken(tag2, depth2, openPath2, this._position()));
413
491
  if (selfClosing) {
414
- this.onCloseTag?.({ name: plainName });
492
+ this._pushToken(new CloseTagToken({ name: plainName }, depth2, openPath2, this._position()));
415
493
  return;
416
494
  }
417
495
  this.elementStack.push({
418
496
  rawName: parsed.name,
419
497
  closeTag: { name: plainName }
420
498
  });
499
+ this.pathStack.push(plainName);
421
500
  return;
422
501
  }
423
502
  const parentNs = this._currentNs();
@@ -460,14 +539,23 @@ var XmlSaxParser = class {
460
539
  attributes,
461
540
  isSelfClosing: selfClosing
462
541
  };
463
- this.onOpenTag?.(tag);
542
+ const openPath = Object.freeze([...this.pathStack, resolvedName.name]);
543
+ const depth = openPath.length;
544
+ this._pushToken(new OpenTagToken(tag, depth, openPath, this._position()));
464
545
  if (selfClosing) {
465
- this.onCloseTag?.({
466
- name: resolvedName.name,
467
- prefix: resolvedName.prefix,
468
- local: resolvedName.local,
469
- uri: resolvedName.uri
470
- });
546
+ this._pushToken(
547
+ new CloseTagToken(
548
+ {
549
+ name: resolvedName.name,
550
+ prefix: resolvedName.prefix,
551
+ local: resolvedName.local,
552
+ uri: resolvedName.uri
553
+ },
554
+ depth,
555
+ openPath,
556
+ this._position()
557
+ )
558
+ );
471
559
  return;
472
560
  }
473
561
  this.elementStack.push({
@@ -479,6 +567,7 @@ var XmlSaxParser = class {
479
567
  uri: resolvedName.uri
480
568
  }
481
569
  });
570
+ this.pathStack.push(resolvedName.name);
482
571
  this.nsStack.push(ns);
483
572
  }
484
573
  _parseStartTagRange(start, end) {
@@ -522,7 +611,7 @@ var XmlSaxParser = class {
522
611
  }
523
612
  const rawValue = this.buffer.slice(i, valueEnd);
524
613
  const normalized = rawValue.includes("\r") ? rawValue.replace(CRLF_RE, "\n") : rawValue;
525
- const value = !normalized.includes("&") ? normalized : decodeEntities(normalized, this.onError);
614
+ const value = !normalized.includes("&") ? normalized : decodeEntities(normalized);
526
615
  attributes.push({ name: attrName.name, value });
527
616
  i = valueEnd + 1;
528
617
  }
@@ -538,7 +627,12 @@ var XmlSaxParser = class {
538
627
  if (entry.rawName !== rawName) {
539
628
  this._error(`Mismatched closing tag: expected </${entry.rawName}>`);
540
629
  }
541
- this.onCloseTag?.(entry.closeTag);
630
+ const closePath = Object.freeze([...this.pathStack]);
631
+ const depth = closePath.length;
632
+ if (depth > 0) {
633
+ this.pathStack.pop();
634
+ }
635
+ this._pushToken(new CloseTagToken(entry.closeTag, depth, closePath, this._position()));
542
636
  }
543
637
  _emitText(text, allowPendingCR) {
544
638
  const normalized = this._normalizeText(text, allowPendingCR);
@@ -549,14 +643,14 @@ var XmlSaxParser = class {
549
643
  this._emitDecodedText(normalized);
550
644
  return;
551
645
  }
552
- const decoded = decodeEntities(normalized, this.onError);
646
+ const decoded = decodeEntities(normalized);
553
647
  if (decoded.length > 0) {
554
648
  this._emitDecodedText(decoded);
555
649
  }
556
650
  }
557
651
  _emitDecodedText(text) {
558
652
  if (!this.coalesceText) {
559
- this.onText?.(text);
653
+ this._pushToken(new TextToken(text, this._position()));
560
654
  return;
561
655
  }
562
656
  this.pendingTextParts.push(text);
@@ -568,7 +662,7 @@ var XmlSaxParser = class {
568
662
  const first = this.pendingTextParts[0];
569
663
  const text = this.pendingTextParts.length === 1 && first !== void 0 ? first : this.pendingTextParts.join("");
570
664
  this.pendingTextParts.length = 0;
571
- this.onText?.(text);
665
+ this._pushToken(new TextToken(text, this._position()));
572
666
  }
573
667
  _resolveName(rawName, ns) {
574
668
  if (!this.xmlns) {
@@ -805,55 +899,88 @@ var XmlSaxParser = class {
805
899
  const line = this.trackPosition ? this.line : 0;
806
900
  const column = this.trackPosition ? this.column : 0;
807
901
  const error = new XmlSaxError(message, this.offset, line, column);
808
- this.onError?.(error);
809
902
  throw error;
810
903
  }
904
+ _position() {
905
+ return {
906
+ offset: this.offset,
907
+ line: this.trackPosition ? this.line : 0,
908
+ column: this.trackPosition ? this.column : 0
909
+ };
910
+ }
911
+ _pushToken(token) {
912
+ this.tokenQueue.push(token);
913
+ }
811
914
  };
915
+ function tokenizeXml(xml, options = {}) {
916
+ const parser = new XmlSaxParser(options);
917
+ parser.feed(xml);
918
+ return parser.close();
919
+ }
920
+ async function* tokenizeXmlAsync(chunks, options = {}) {
921
+ const parser = new XmlSaxParser(options);
922
+ yield* parser.iterateChunks(chunks);
923
+ }
812
924
 
813
925
  // src/tree.ts
814
926
  var TreeBuilder = class {
815
927
  constructor() {
816
928
  this.stack = [];
817
929
  this.root = null;
818
- this.onOpenTag = (tag) => {
819
- const node = {
820
- name: tag.name,
821
- attributes: Object.fromEntries(
822
- Object.entries(tag.attributes).map(([key, attr]) => [key, typeof attr === "string" ? attr : attr.value])
823
- ),
824
- children: []
825
- };
826
- const parent = this.stack[this.stack.length - 1];
827
- if (parent) {
828
- parent.children?.push(node);
829
- } else {
830
- this.root = node;
831
- }
832
- this.stack.push(node);
833
- };
834
- this.onText = (text) => {
835
- if (!this.stack.length) {
836
- return;
837
- }
838
- const node = this.stack[this.stack.length - 1];
839
- if (!node) {
840
- return;
841
- }
842
- const children = node.children ?? [];
843
- const last = children[children.length - 1];
844
- if (typeof last === "string") {
845
- children[children.length - 1] = last + text;
846
- } else {
847
- children.push(text);
848
- }
849
- node.children = children;
850
- };
851
- this.onCdata = (text) => {
852
- this.onText(text);
853
- };
854
- this.onCloseTag = () => {
855
- this.stack.pop();
930
+ }
931
+ onOpenTag(tag) {
932
+ const node = {
933
+ name: tag.name,
934
+ attributes: Object.fromEntries(
935
+ Object.entries(tag.attributes).map(([key, attr]) => [key, typeof attr === "string" ? attr : attr.value])
936
+ ),
937
+ children: []
856
938
  };
939
+ const parent = this.stack[this.stack.length - 1];
940
+ if (parent) {
941
+ parent.children?.push(node);
942
+ } else {
943
+ this.root = node;
944
+ }
945
+ this.stack.push(node);
946
+ }
947
+ onText(text) {
948
+ if (!this.stack.length) {
949
+ return;
950
+ }
951
+ const node = this.stack[this.stack.length - 1];
952
+ if (!node) {
953
+ return;
954
+ }
955
+ const children = node.children ?? [];
956
+ const last = children[children.length - 1];
957
+ if (typeof last === "string") {
958
+ children[children.length - 1] = last + text;
959
+ } else {
960
+ children.push(text);
961
+ }
962
+ node.children = children;
963
+ }
964
+ onCdata(text) {
965
+ this.onText(text);
966
+ }
967
+ onCloseTag() {
968
+ this.stack.pop();
969
+ }
970
+ consume(token) {
971
+ if (token instanceof OpenTagToken) {
972
+ this.onOpenTag(token.tag);
973
+ return;
974
+ }
975
+ if (token instanceof TextToken) {
976
+ this.onText(token.text);
977
+ return;
978
+ }
979
+ if (token instanceof CdataToken) {
980
+ this.onCdata(token.text);
981
+ return;
982
+ }
983
+ this.onCloseTag();
857
984
  }
858
985
  getRoot() {
859
986
  if (!this.root) {
@@ -864,15 +991,17 @@ var TreeBuilder = class {
864
991
  };
865
992
  function parseXmlString(xml, options = {}) {
866
993
  const builder = new TreeBuilder();
867
- const parser = new XmlSaxParser({
868
- ...options,
869
- onOpenTag: builder.onOpenTag,
870
- onText: builder.onText,
871
- onCdata: builder.onCdata,
872
- onCloseTag: builder.onCloseTag
873
- });
874
- parser.feed(xml);
875
- parser.close();
994
+ const parser = new XmlSaxParser(options);
995
+ for (const token of parser.feed(xml)) {
996
+ if (token instanceof OpenTagToken || token instanceof TextToken || token instanceof CdataToken || token instanceof CloseTagToken) {
997
+ builder.consume(token);
998
+ }
999
+ }
1000
+ for (const token of parser.close()) {
1001
+ if (token instanceof OpenTagToken || token instanceof TextToken || token instanceof CdataToken || token instanceof CloseTagToken) {
1002
+ builder.consume(token);
1003
+ }
1004
+ }
876
1005
  return builder.getRoot();
877
1006
  }
878
1007
 
@@ -998,47 +1127,62 @@ var ObjectBuilder = class {
998
1127
  this.stack = [];
999
1128
  this.root = null;
1000
1129
  this.rootName = null;
1001
- this.onOpenTag = (tag) => {
1002
- const name = normalizeName(tag.name, this.options);
1003
- const attributes = normalizeAttributes(tag.attributes, this.options);
1004
- const state = {
1005
- name,
1006
- attributes,
1007
- textParts: [],
1008
- children: /* @__PURE__ */ Object.create(null)
1009
- };
1010
- this.rootName ?? (this.rootName = name);
1011
- this.stack.push(state);
1012
- };
1013
- this.onText = (text) => {
1014
- if (!text) {
1015
- return;
1016
- }
1017
- const current = this.stack[this.stack.length - 1];
1018
- if (!current) {
1019
- return;
1020
- }
1021
- current.textParts.push(text);
1022
- };
1023
- this.onCdata = (text) => {
1024
- this.onText(text);
1025
- };
1026
- this.onCloseTag = () => {
1027
- const state = this.stack.pop();
1028
- if (!state) {
1029
- return;
1030
- }
1031
- const value = finalizeElement(state, this.options);
1032
- const parent = this.stack[this.stack.length - 1];
1033
- if (!parent) {
1034
- this.root = value;
1035
- return;
1036
- }
1037
- const path = this.stack.map((entry) => entry.name);
1038
- addChild(parent.children, state.name, value, this.options, path);
1039
- };
1040
1130
  this.options = buildSettings(options);
1041
1131
  }
1132
+ onOpenTag(tag) {
1133
+ const name = normalizeName(tag.name, this.options);
1134
+ const attributes = normalizeAttributes(tag.attributes, this.options);
1135
+ const state = {
1136
+ name,
1137
+ attributes,
1138
+ textParts: [],
1139
+ children: /* @__PURE__ */ Object.create(null)
1140
+ };
1141
+ this.rootName ?? (this.rootName = name);
1142
+ this.stack.push(state);
1143
+ }
1144
+ onText(text) {
1145
+ if (!text) {
1146
+ return;
1147
+ }
1148
+ const current = this.stack[this.stack.length - 1];
1149
+ if (!current) {
1150
+ return;
1151
+ }
1152
+ current.textParts.push(text);
1153
+ }
1154
+ onCdata(text) {
1155
+ this.onText(text);
1156
+ }
1157
+ onCloseTag() {
1158
+ const state = this.stack.pop();
1159
+ if (!state) {
1160
+ return;
1161
+ }
1162
+ const value = finalizeElement(state, this.options);
1163
+ const parent = this.stack[this.stack.length - 1];
1164
+ if (!parent) {
1165
+ this.root = value;
1166
+ return;
1167
+ }
1168
+ const path = this.stack.map((entry) => entry.name);
1169
+ addChild(parent.children, state.name, value, this.options, path);
1170
+ }
1171
+ consume(token) {
1172
+ if (token instanceof OpenTagToken) {
1173
+ this.onOpenTag(token.tag);
1174
+ return;
1175
+ }
1176
+ if (token instanceof TextToken) {
1177
+ this.onText(token.text);
1178
+ return;
1179
+ }
1180
+ if (token instanceof CdataToken) {
1181
+ this.onCdata(token.text);
1182
+ return;
1183
+ }
1184
+ this.onCloseTag();
1185
+ }
1042
1186
  getResult() {
1043
1187
  if (this.root === null) {
1044
1188
  throw new Error("No root element found");
@@ -1300,6 +1444,6 @@ function finalizeElement(state, options) {
1300
1444
  return result;
1301
1445
  }
1302
1446
 
1303
- export { ObjectBuilder, TreeBuilder, XmlSaxError, XmlSaxParser, buildObject, buildXmlNode, objectToXml, parseXmlString, resolveName, serializeXml, stripNamespace };
1447
+ export { CdataToken, CloseTagToken, CommentToken, DoctypeToken, EndToken, ObjectBuilder, OpenTagToken, ProcessingInstructionToken, TextToken, TreeBuilder, XmlSaxError, XmlSaxParser, XmlToken, buildObject, buildXmlNode, objectToXml, parseXmlString, resolveName, serializeXml, stripNamespace, tokenizeXml, tokenizeXmlAsync };
1304
1448
  //# sourceMappingURL=index.js.map
1305
1449
  //# sourceMappingURL=index.js.map