brepjs-bim 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,9 +24,15 @@ export interface XmlNode {
24
24
  readonly text: string;
25
25
  }
26
26
  /**
27
- * Parse an XML string into a tree. Tolerant of the XML declaration, comments,
28
- * whitespace, self-closing tags, and CDATA-free text. Throws on malformed
29
- * structure (unbalanced tags); callers wrap this in a `Result`.
27
+ * Parse an XML string into a tree. Tolerant of the XML declaration, processing
28
+ * instructions, comments, whitespace, self-closing tags, and CDATA-free text.
29
+ * Throws on malformed or unbalanced structure; callers wrap this in a `Result`.
30
+ *
31
+ * This is a hand-written cursor scan rather than a single tokenizing regex: the
32
+ * input is an untrusted `.bcfzip` payload, and a backtracking regex over
33
+ * uncontrolled data is a polynomial-ReDoS vector. Every construct here is
34
+ * consumed by an `indexOf` or a single-character advance, so the parse is linear
35
+ * in the input length. The sibling `ids/idsXml.ts` parser scans the same way.
30
36
  */
31
37
  export declare function parseXml(xml: string): XmlNode;
32
38
  export declare function findChild(node: XmlNode, tag: string): XmlNode | undefined;
@@ -15979,24 +15979,24 @@ var XML_DECLARATION = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
15979
15979
  function xmlDocument(rootXml) {
15980
15980
  return `${XML_DECLARATION}\n${rootXml}\n`;
15981
15981
  }
15982
- function parseAttrs(raw) {
15983
- const attrs = {};
15984
- const attrRe = /([\w:.-]+)\s*=\s*"([^"]*)"/g;
15985
- let m;
15986
- while ((m = attrRe.exec(raw)) !== null) {
15987
- const key = m[1];
15988
- const val = m[2];
15989
- if (key !== void 0 && val !== void 0) attrs[key] = unescapeXml(val);
15990
- }
15991
- return attrs;
15982
+ function isNameChar(c) {
15983
+ return /[\w:.-]/.test(c);
15984
+ }
15985
+ function isWhitespace(c) {
15986
+ return /\s/.test(c);
15992
15987
  }
15993
15988
  /**
15994
- * Parse an XML string into a tree. Tolerant of the XML declaration, comments,
15995
- * whitespace, self-closing tags, and CDATA-free text. Throws on malformed
15996
- * structure (unbalanced tags); callers wrap this in a `Result`.
15989
+ * Parse an XML string into a tree. Tolerant of the XML declaration, processing
15990
+ * instructions, comments, whitespace, self-closing tags, and CDATA-free text.
15991
+ * Throws on malformed or unbalanced structure; callers wrap this in a `Result`.
15992
+ *
15993
+ * This is a hand-written cursor scan rather than a single tokenizing regex: the
15994
+ * input is an untrusted `.bcfzip` payload, and a backtracking regex over
15995
+ * uncontrolled data is a polynomial-ReDoS vector. Every construct here is
15996
+ * consumed by an `indexOf` or a single-character advance, so the parse is linear
15997
+ * in the input length. The sibling `ids/idsXml.ts` parser scans the same way.
15997
15998
  */
15998
15999
  function parseXml(xml) {
15999
- const tokenRe = /<!--[\s\S]*?-->|<\?[\s\S]*?\?>|<\/([\w:.-]+)\s*>|<([\w:.-]+)((?:\s+[\w:.-]+\s*=\s*"[^"]*")*)\s*(\/?)>|([^<]+)/g;
16000
16000
  const root = {
16001
16001
  tag: "#root",
16002
16002
  attrs: {},
@@ -16004,36 +16004,84 @@ function parseXml(xml) {
16004
16004
  text: ""
16005
16005
  };
16006
16006
  const stack = [root];
16007
- let m;
16008
- while ((m = tokenRe.exec(xml)) !== null) {
16009
- const [full, closeTag, openTag, attrsRaw, selfClose, textRun] = m;
16010
- if (full.startsWith("<!--") || full.startsWith("<?")) continue;
16011
- if (closeTag !== void 0) {
16012
- const top = stack[stack.length - 1];
16013
- if (top === void 0 || top.tag !== closeTag) throw new Error(`Unbalanced XML: unexpected </${closeTag}>`);
16014
- stack.pop();
16015
- continue;
16016
- }
16017
- if (openTag !== void 0) {
16018
- const node = {
16019
- tag: openTag,
16020
- attrs: parseAttrs(attrsRaw ?? ""),
16021
- children: [],
16022
- text: ""
16023
- };
16024
- const parent = stack[stack.length - 1];
16025
- if (parent === void 0) throw new Error("Unbalanced XML: empty stack");
16026
- parent.children.push(node);
16027
- if (selfClose !== "/") stack.push(node);
16028
- continue;
16007
+ const len = xml.length;
16008
+ let i = 0;
16009
+ const fail = (msg) => {
16010
+ throw new Error(`Malformed XML: ${msg} at offset ${String(i)}`);
16011
+ };
16012
+ const skipWhitespace = () => {
16013
+ while (i < len && isWhitespace(xml.charAt(i))) i += 1;
16014
+ };
16015
+ const readName = () => {
16016
+ const start = i;
16017
+ while (i < len && isNameChar(xml.charAt(i))) i += 1;
16018
+ return xml.slice(start, i);
16019
+ };
16020
+ const readAttrs = () => {
16021
+ const attrs = {};
16022
+ for (;;) {
16023
+ skipWhitespace();
16024
+ const c = xml.charAt(i);
16025
+ if (i >= len || c === ">" || c === "/") return attrs;
16026
+ const name = readName();
16027
+ if (name.length === 0) fail("expected attribute name");
16028
+ skipWhitespace();
16029
+ if (xml.charAt(i) !== "=") fail(`expected '=' after attribute "${name}"`);
16030
+ i += 1;
16031
+ skipWhitespace();
16032
+ if (xml.charAt(i) !== "\"") fail(`expected '"' opening attribute "${name}"`);
16033
+ i += 1;
16034
+ const end = xml.indexOf("\"", i);
16035
+ if (end === -1) fail(`unterminated value for attribute "${name}"`);
16036
+ attrs[name] = unescapeXml(xml.slice(i, end));
16037
+ i = end + 1;
16029
16038
  }
16030
- if (textRun !== void 0) {
16031
- const decoded = unescapeXml(textRun);
16032
- if (decoded.trim().length > 0) {
16033
- const top = stack[stack.length - 1];
16034
- if (top !== void 0) top.text += decoded;
16035
- }
16039
+ };
16040
+ while (i < len) if (xml.startsWith("<!--", i)) {
16041
+ const end = xml.indexOf("-->", i + 4);
16042
+ if (end === -1) fail("unterminated comment");
16043
+ i = end + 3;
16044
+ } else if (xml.startsWith("<?", i)) {
16045
+ const end = xml.indexOf("?>", i + 2);
16046
+ if (end === -1) fail("unterminated processing instruction");
16047
+ i = end + 2;
16048
+ } else if (xml.startsWith("</", i)) {
16049
+ i += 2;
16050
+ const name = readName();
16051
+ skipWhitespace();
16052
+ if (xml.charAt(i) !== ">") fail(`expected '>' closing </${name}>`);
16053
+ i += 1;
16054
+ const top = stack[stack.length - 1];
16055
+ if (top === void 0 || top.tag !== name) throw new Error(`Unbalanced XML: unexpected </${name}>`);
16056
+ stack.pop();
16057
+ } else if (xml.charAt(i) === "<") {
16058
+ i += 1;
16059
+ const tag = readName();
16060
+ if (tag.length === 0) fail("expected element name");
16061
+ const node = {
16062
+ tag,
16063
+ attrs: readAttrs(),
16064
+ children: [],
16065
+ text: ""
16066
+ };
16067
+ const parent = stack[stack.length - 1];
16068
+ if (parent === void 0) throw new Error("Unbalanced XML: empty stack");
16069
+ parent.children.push(node);
16070
+ skipWhitespace();
16071
+ if (xml.startsWith("/>", i)) i += 2;
16072
+ else if (xml.charAt(i) === ">") {
16073
+ i += 1;
16074
+ stack.push(node);
16075
+ } else fail(`expected '>' in <${tag}>`);
16076
+ } else {
16077
+ const next = xml.indexOf("<", i);
16078
+ const end = next === -1 ? len : next;
16079
+ const decoded = unescapeXml(xml.slice(i, end));
16080
+ if (decoded.trim().length > 0) {
16081
+ const top = stack[stack.length - 1];
16082
+ if (top !== void 0) top.text += decoded;
16036
16083
  }
16084
+ i = end;
16037
16085
  }
16038
16086
  if (stack.length !== 1) throw new Error("Unbalanced XML: unclosed elements remain");
16039
16087
  const top = root.children[0];
@@ -15956,24 +15956,24 @@ var XML_DECLARATION = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
15956
15956
  function xmlDocument(rootXml) {
15957
15957
  return `${XML_DECLARATION}\n${rootXml}\n`;
15958
15958
  }
15959
- function parseAttrs(raw) {
15960
- const attrs = {};
15961
- const attrRe = /([\w:.-]+)\s*=\s*"([^"]*)"/g;
15962
- let m;
15963
- while ((m = attrRe.exec(raw)) !== null) {
15964
- const key = m[1];
15965
- const val = m[2];
15966
- if (key !== void 0 && val !== void 0) attrs[key] = unescapeXml(val);
15967
- }
15968
- return attrs;
15959
+ function isNameChar(c) {
15960
+ return /[\w:.-]/.test(c);
15961
+ }
15962
+ function isWhitespace(c) {
15963
+ return /\s/.test(c);
15969
15964
  }
15970
15965
  /**
15971
- * Parse an XML string into a tree. Tolerant of the XML declaration, comments,
15972
- * whitespace, self-closing tags, and CDATA-free text. Throws on malformed
15973
- * structure (unbalanced tags); callers wrap this in a `Result`.
15966
+ * Parse an XML string into a tree. Tolerant of the XML declaration, processing
15967
+ * instructions, comments, whitespace, self-closing tags, and CDATA-free text.
15968
+ * Throws on malformed or unbalanced structure; callers wrap this in a `Result`.
15969
+ *
15970
+ * This is a hand-written cursor scan rather than a single tokenizing regex: the
15971
+ * input is an untrusted `.bcfzip` payload, and a backtracking regex over
15972
+ * uncontrolled data is a polynomial-ReDoS vector. Every construct here is
15973
+ * consumed by an `indexOf` or a single-character advance, so the parse is linear
15974
+ * in the input length. The sibling `ids/idsXml.ts` parser scans the same way.
15974
15975
  */
15975
15976
  function parseXml(xml) {
15976
- const tokenRe = /<!--[\s\S]*?-->|<\?[\s\S]*?\?>|<\/([\w:.-]+)\s*>|<([\w:.-]+)((?:\s+[\w:.-]+\s*=\s*"[^"]*")*)\s*(\/?)>|([^<]+)/g;
15977
15977
  const root = {
15978
15978
  tag: "#root",
15979
15979
  attrs: {},
@@ -15981,36 +15981,84 @@ function parseXml(xml) {
15981
15981
  text: ""
15982
15982
  };
15983
15983
  const stack = [root];
15984
- let m;
15985
- while ((m = tokenRe.exec(xml)) !== null) {
15986
- const [full, closeTag, openTag, attrsRaw, selfClose, textRun] = m;
15987
- if (full.startsWith("<!--") || full.startsWith("<?")) continue;
15988
- if (closeTag !== void 0) {
15989
- const top = stack[stack.length - 1];
15990
- if (top === void 0 || top.tag !== closeTag) throw new Error(`Unbalanced XML: unexpected </${closeTag}>`);
15991
- stack.pop();
15992
- continue;
15993
- }
15994
- if (openTag !== void 0) {
15995
- const node = {
15996
- tag: openTag,
15997
- attrs: parseAttrs(attrsRaw ?? ""),
15998
- children: [],
15999
- text: ""
16000
- };
16001
- const parent = stack[stack.length - 1];
16002
- if (parent === void 0) throw new Error("Unbalanced XML: empty stack");
16003
- parent.children.push(node);
16004
- if (selfClose !== "/") stack.push(node);
16005
- continue;
15984
+ const len = xml.length;
15985
+ let i = 0;
15986
+ const fail = (msg) => {
15987
+ throw new Error(`Malformed XML: ${msg} at offset ${String(i)}`);
15988
+ };
15989
+ const skipWhitespace = () => {
15990
+ while (i < len && isWhitespace(xml.charAt(i))) i += 1;
15991
+ };
15992
+ const readName = () => {
15993
+ const start = i;
15994
+ while (i < len && isNameChar(xml.charAt(i))) i += 1;
15995
+ return xml.slice(start, i);
15996
+ };
15997
+ const readAttrs = () => {
15998
+ const attrs = {};
15999
+ for (;;) {
16000
+ skipWhitespace();
16001
+ const c = xml.charAt(i);
16002
+ if (i >= len || c === ">" || c === "/") return attrs;
16003
+ const name = readName();
16004
+ if (name.length === 0) fail("expected attribute name");
16005
+ skipWhitespace();
16006
+ if (xml.charAt(i) !== "=") fail(`expected '=' after attribute "${name}"`);
16007
+ i += 1;
16008
+ skipWhitespace();
16009
+ if (xml.charAt(i) !== "\"") fail(`expected '"' opening attribute "${name}"`);
16010
+ i += 1;
16011
+ const end = xml.indexOf("\"", i);
16012
+ if (end === -1) fail(`unterminated value for attribute "${name}"`);
16013
+ attrs[name] = unescapeXml(xml.slice(i, end));
16014
+ i = end + 1;
16006
16015
  }
16007
- if (textRun !== void 0) {
16008
- const decoded = unescapeXml(textRun);
16009
- if (decoded.trim().length > 0) {
16010
- const top = stack[stack.length - 1];
16011
- if (top !== void 0) top.text += decoded;
16012
- }
16016
+ };
16017
+ while (i < len) if (xml.startsWith("<!--", i)) {
16018
+ const end = xml.indexOf("-->", i + 4);
16019
+ if (end === -1) fail("unterminated comment");
16020
+ i = end + 3;
16021
+ } else if (xml.startsWith("<?", i)) {
16022
+ const end = xml.indexOf("?>", i + 2);
16023
+ if (end === -1) fail("unterminated processing instruction");
16024
+ i = end + 2;
16025
+ } else if (xml.startsWith("</", i)) {
16026
+ i += 2;
16027
+ const name = readName();
16028
+ skipWhitespace();
16029
+ if (xml.charAt(i) !== ">") fail(`expected '>' closing </${name}>`);
16030
+ i += 1;
16031
+ const top = stack[stack.length - 1];
16032
+ if (top === void 0 || top.tag !== name) throw new Error(`Unbalanced XML: unexpected </${name}>`);
16033
+ stack.pop();
16034
+ } else if (xml.charAt(i) === "<") {
16035
+ i += 1;
16036
+ const tag = readName();
16037
+ if (tag.length === 0) fail("expected element name");
16038
+ const node = {
16039
+ tag,
16040
+ attrs: readAttrs(),
16041
+ children: [],
16042
+ text: ""
16043
+ };
16044
+ const parent = stack[stack.length - 1];
16045
+ if (parent === void 0) throw new Error("Unbalanced XML: empty stack");
16046
+ parent.children.push(node);
16047
+ skipWhitespace();
16048
+ if (xml.startsWith("/>", i)) i += 2;
16049
+ else if (xml.charAt(i) === ">") {
16050
+ i += 1;
16051
+ stack.push(node);
16052
+ } else fail(`expected '>' in <${tag}>`);
16053
+ } else {
16054
+ const next = xml.indexOf("<", i);
16055
+ const end = next === -1 ? len : next;
16056
+ const decoded = unescapeXml(xml.slice(i, end));
16057
+ if (decoded.trim().length > 0) {
16058
+ const top = stack[stack.length - 1];
16059
+ if (top !== void 0) top.text += decoded;
16013
16060
  }
16061
+ i = end;
16014
16062
  }
16015
16063
  if (stack.length !== 1) throw new Error("Unbalanced XML: unclosed elements remain");
16016
16064
  const top = root.children[0];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "brepjs-bim",
3
- "version": "0.3.0",
3
+ "version": "0.3.1",
4
4
  "description": "BIM layer for brepjs — IFC4-aligned parametric building elements",
5
5
  "keywords": [
6
6
  "bim",