node-pptx-templater 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,6 +23,139 @@
23
23
  const { XMLParser: FastXMLParser, XMLBuilder } = require('fast-xml-parser')
24
24
  const { PPTXError } = require('../utils/errors.js')
25
25
 
26
+ const Z_ORDER_SYMBOL = Symbol('zOrder')
27
+
28
+ const drawingTags = new Set(['p:sp', 'p:pic', 'p:graphicFrame', 'p:grpSp', 'p:cxnSp'])
29
+
30
+ function findcNvPr(node) {
31
+ const tagName = Object.keys(node).find(k => k !== ':@')
32
+ if (!tagName) return null
33
+ const children = node[tagName]
34
+ if (!Array.isArray(children)) return null
35
+
36
+ const nvPrNode = children.find(child => {
37
+ const k = Object.keys(child).find(key => key !== ':@')
38
+ return (
39
+ k &&
40
+ (k === 'p:nvSpPr' ||
41
+ k === 'p:nvPicPr' ||
42
+ k === 'p:nvGraphicFramePr' ||
43
+ k === 'p:nvGrpSpPr' ||
44
+ k === 'p:nvCxnSpPr')
45
+ )
46
+ })
47
+
48
+ if (nvPrNode) {
49
+ const nvPrTagName = Object.keys(nvPrNode).find(k => k !== ':@')
50
+ const nvPrChildren = nvPrNode[nvPrTagName]
51
+ if (Array.isArray(nvPrChildren)) {
52
+ const cNvPrNode = nvPrChildren.find(child => Object.keys(child).includes('p:cNvPr'))
53
+ if (cNvPrNode) {
54
+ return cNvPrNode[':@']
55
+ }
56
+ }
57
+ }
58
+ return null
59
+ }
60
+
61
+ function buildZOrderMap(orderedNode, containerMap = new Map()) {
62
+ if (!orderedNode) return containerMap
63
+ const tagName = Object.keys(orderedNode).find(k => k !== ':@')
64
+ if (!tagName) return containerMap
65
+ const children = orderedNode[tagName]
66
+ if (!Array.isArray(children)) return containerMap
67
+
68
+ if (tagName === 'p:spTree') {
69
+ const order = []
70
+ for (const child of children) {
71
+ const childTagName = Object.keys(child).find(k => k !== ':@')
72
+ if (drawingTags.has(childTagName)) {
73
+ const attrs = findcNvPr(child)
74
+ if (attrs && attrs['@_id']) {
75
+ order.push(String(attrs['@_id']))
76
+ }
77
+ buildZOrderMap(child, containerMap)
78
+ }
79
+ }
80
+ containerMap.set('root', order)
81
+ } else if (tagName === 'p:grpSp') {
82
+ const attrs = findcNvPr(orderedNode)
83
+ const grpId = attrs ? String(attrs['@_id']) : null
84
+ const order = []
85
+ for (const child of children) {
86
+ const childTagName = Object.keys(child).find(k => k !== ':@')
87
+ if (drawingTags.has(childTagName)) {
88
+ const attrs = findcNvPr(child)
89
+ if (attrs && attrs['@_id']) {
90
+ order.push(String(attrs['@_id']))
91
+ }
92
+ buildZOrderMap(child, containerMap)
93
+ }
94
+ }
95
+ if (grpId) {
96
+ containerMap.set(grpId, order)
97
+ }
98
+ } else {
99
+ for (const child of children) {
100
+ buildZOrderMap(child, containerMap)
101
+ }
102
+ }
103
+
104
+ return containerMap
105
+ }
106
+
107
+ function attachZOrder(normalContainer, containerId, containerMap) {
108
+ if (!normalContainer) return
109
+
110
+ const order = containerMap.get(containerId)
111
+ if (order) {
112
+ normalContainer[Z_ORDER_SYMBOL] = order
113
+ }
114
+
115
+ let grpSps = normalContainer['p:grpSp'] || []
116
+ if (!Array.isArray(grpSps)) grpSps = [grpSps]
117
+
118
+ for (const grpSp of grpSps) {
119
+ const cNvPr = grpSp?.['p:nvGrpSpPr']?.['p:cNvPr']
120
+ const grpId = cNvPr ? String(cNvPr['@_id']) : null
121
+ if (grpId) {
122
+ attachZOrder(grpSp, grpId, containerMap)
123
+ }
124
+ }
125
+ }
126
+
127
+ /**
128
+ * Unescapes XML entities safely and non-recursively.
129
+ * Supports standard XML entities and numeric decimal/hex code points.
130
+ *
131
+ * @param {string} str - XML value to unescape.
132
+ * @returns {string} Unescaped string.
133
+ */
134
+ function unescapeXml(str) {
135
+ if (typeof str !== 'string') return str
136
+ if (str.indexOf('&') === -1) return str
137
+ return str
138
+ .replace(/&/g, '&')
139
+ .replace(/&lt;/g, '<')
140
+ .replace(/&gt;/g, '>')
141
+ .replace(/&quot;/g, '"')
142
+ .replace(/&apos;/g, "'")
143
+ .replace(/&#(\d+);/g, (match, dec) => {
144
+ try {
145
+ return String.fromCodePoint(parseInt(dec, 10))
146
+ } catch (e) {
147
+ return match
148
+ }
149
+ })
150
+ .replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => {
151
+ try {
152
+ return String.fromCodePoint(parseInt(hex, 16))
153
+ } catch (e) {
154
+ return match
155
+ }
156
+ })
157
+ }
158
+
26
159
  /**
27
160
  * Parser configuration for fast-xml-parser.
28
161
  * These settings ensure lossless round-trip XML parsing.
@@ -38,7 +171,9 @@ const PARSER_OPTIONS = {
38
171
  commentPropName: '__comment',
39
172
  preserveOrder: false,
40
173
  trimValues: false,
41
- processEntities: true,
174
+ processEntities: false,
175
+ tagValueProcessor: (tagName, val) => unescapeXml(val),
176
+ attributeValueProcessor: (attrName, val) => unescapeXml(val),
42
177
  htmlEntities: false,
43
178
  isArray: (name, jpath) => {
44
179
  // Elements that should ALWAYS be arrays (even when there's only one)
@@ -101,9 +236,25 @@ class XMLParser {
101
236
  */
102
237
  #builder
103
238
 
239
+ /**
240
+ * @private
241
+ * @type {FastXMLParser}
242
+ */
243
+ #orderedParser
244
+
104
245
  constructor() {
105
246
  this.#parser = new FastXMLParser(PARSER_OPTIONS)
106
247
  this.#builder = new XMLBuilder(BUILDER_OPTIONS)
248
+ this.#orderedParser = new FastXMLParser({
249
+ ignoreAttributes: false,
250
+ preserveOrder: true,
251
+ attributeNamePrefix: '@_',
252
+ parseAttributeValue: false,
253
+ parseTagValue: false,
254
+ processEntities: false,
255
+ tagValueProcessor: (tagName, val) => unescapeXml(val),
256
+ attributeValueProcessor: (attrName, val) => unescapeXml(val),
257
+ })
107
258
  }
108
259
 
109
260
  /**
@@ -124,7 +275,31 @@ class XMLParser {
124
275
  }
125
276
 
126
277
  try {
127
- return this.#parser.parse(xmlString)
278
+ const normalObj = this.#parser.parse(xmlString)
279
+
280
+ // Automatically inspect for spTree to build and attach Z-order
281
+ const spTree =
282
+ normalObj?.['p:sld']?.['p:cSld']?.['p:spTree'] ||
283
+ normalObj?.['p:sldLayout']?.['p:cSld']?.['p:spTree'] ||
284
+ normalObj?.['p:sldMaster']?.['p:cSld']?.['p:spTree']
285
+
286
+ if (spTree) {
287
+ try {
288
+ const orderedObj = this.#orderedParser.parse(xmlString)
289
+ const orderedRootNode = orderedObj.find(n => {
290
+ const keys = Object.keys(n)
291
+ return (
292
+ keys.includes('p:sld') || keys.includes('p:sldLayout') || keys.includes('p:sldMaster')
293
+ )
294
+ })
295
+ const containerMap = buildZOrderMap(orderedRootNode)
296
+ attachZOrder(spTree, 'root', containerMap)
297
+ } catch (err) {
298
+ // Fallback gracefully if ordered parsing fails
299
+ }
300
+ }
301
+
302
+ return normalObj
128
303
  } catch (err) {
129
304
  throw new PPTXError(`XML parse error${context ? ` in ${context}` : ''}: ${err.message}`, err)
130
305
  }
@@ -142,13 +317,101 @@ class XMLParser {
142
317
  */
143
318
  build(obj, xmlDeclaration = '') {
144
319
  try {
145
- const xml = this.#builder.build(obj)
320
+ const spTreeObj =
321
+ obj?.['p:sld']?.['p:cSld']?.['p:spTree'] ||
322
+ obj?.['p:sldLayout']?.['p:cSld']?.['p:spTree'] ||
323
+ obj?.['p:sldMaster']?.['p:cSld']?.['p:spTree']
324
+
325
+ let xml = this.#builder.build(obj)
326
+
327
+ if (spTreeObj) {
328
+ const correctSpTreeXml = this.serializeContainer(spTreeObj, 'p:spTree')
329
+ if (xml.includes('<p:spTree/>')) {
330
+ xml = xml.replace('<p:spTree/>', correctSpTreeXml)
331
+ } else {
332
+ xml = xml.replace(/<p:spTree>[\s\S]*<\/p:spTree>/, correctSpTreeXml)
333
+ }
334
+ }
335
+
146
336
  return xmlDeclaration ? `${xmlDeclaration}\n${xml}` : xml
147
337
  } catch (err) {
148
338
  throw new PPTXError(`XML build error: ${err.message}`, err)
149
339
  }
150
340
  }
151
341
 
342
+ /**
343
+ * Helper to serialize drawing containers (p:spTree, p:grpSp) recursively in Z-order.
344
+ */
345
+ serializeContainer(container, containerTagName) {
346
+ const zOrder = container[Z_ORDER_SYMBOL] || []
347
+
348
+ let headerXml = ''
349
+ if (container['p:nvGrpSpPr']) {
350
+ headerXml += this.#builder.build({ 'p:nvGrpSpPr': container['p:nvGrpSpPr'] })
351
+ }
352
+ if (container['p:grpSpPr']) {
353
+ headerXml += this.#builder.build({ 'p:grpSpPr': container['p:grpSpPr'] })
354
+ }
355
+
356
+ // Gather drawing children
357
+ const drawingElements = new Map()
358
+ for (const tag of ['p:sp', 'p:pic', 'p:graphicFrame', 'p:grpSp', 'p:cxnSp']) {
359
+ let items = container[tag] || []
360
+ if (!Array.isArray(items)) items = [items]
361
+ for (const item of items) {
362
+ let id = null
363
+ if (tag === 'p:sp') id = item?.['p:nvSpPr']?.['p:cNvPr']?.['@_id']
364
+ else if (tag === 'p:pic') id = item?.['p:nvPicPr']?.['p:cNvPr']?.['@_id']
365
+ else if (tag === 'p:graphicFrame') id = item?.['p:nvGraphicFramePr']?.['p:cNvPr']?.['@_id']
366
+ else if (tag === 'p:grpSp') id = item?.['p:nvGrpSpPr']?.['p:cNvPr']?.['@_id']
367
+ else if (tag === 'p:cxnSp') id = item?.['p:nvCxnSpPr']?.['p:cNvPr']?.['@_id']
368
+
369
+ if (id !== undefined && id !== null) {
370
+ drawingElements.set(String(id), { tag, obj: item })
371
+ }
372
+ }
373
+ }
374
+
375
+ // Append items not in the explicit Z-order list
376
+ const fullZOrder = [...zOrder]
377
+ for (const id of drawingElements.keys()) {
378
+ if (!fullZOrder.includes(id)) {
379
+ fullZOrder.push(id)
380
+ }
381
+ }
382
+
383
+ // Build children in Z-order
384
+ let childrenXml = ''
385
+ for (const id of fullZOrder) {
386
+ const el = drawingElements.get(id)
387
+ if (!el) continue
388
+
389
+ if (el.tag === 'p:grpSp') {
390
+ childrenXml += this.serializeContainer(el.obj, 'p:grpSp')
391
+ } else {
392
+ childrenXml += this.#builder.build({ [el.tag]: el.obj })
393
+ }
394
+ }
395
+
396
+ // Container attributes
397
+ let attrsStr = ''
398
+ const attrs = {}
399
+ for (const k in container) {
400
+ if (k.startsWith('@_')) {
401
+ attrs[k] = container[k]
402
+ }
403
+ }
404
+ if (Object.keys(attrs).length > 0) {
405
+ const attrXml = this.#builder.build({ [containerTagName]: { ...attrs } })
406
+ const match = attrXml.match(/<[^>]+>/)
407
+ if (match) {
408
+ attrsStr = match[0].slice(containerTagName.length + 1, -1)
409
+ }
410
+ }
411
+
412
+ return `<${containerTagName}${attrsStr}>${headerXml}${childrenXml}</${containerTagName}>`
413
+ }
414
+
152
415
  /**
153
416
  * Extracts the XML declaration line from an XML string.
154
417
  *
@@ -289,4 +552,5 @@ class XMLParser {
289
552
 
290
553
  module.exports = {
291
554
  XMLParser,
555
+ Z_ORDER_SYMBOL,
292
556
  }
@@ -1,42 +1,194 @@
1
1
  /**
2
- * @fileoverview XML validation and repair utilities.
2
+ * @fileoverview XML validation, repair, recovery, and security diagnostics utilities.
3
3
  *
4
- * Provides tools to check if generated XML is well-formed and
5
- * attempt automatic repairs for common PPTX corruption issues.
4
+ * Provides tools to check if generated XML is well-formed, protect against
5
+ * XML Entity Attacks (XXE, DTD abuse, Billion Laughs), and recover diagnostics.
6
6
  */
7
7
 
8
+ const { XMLValidator } = require('fast-xml-parser')
8
9
  const { XMLParser } = require('../parsers/XMLParser.js')
10
+ const { PPTXError } = require('./errors.js')
9
11
 
10
12
  const parser = new XMLParser()
11
13
 
14
+ /**
15
+ * Helper to compute line and column numbers from a string index.
16
+ */
17
+ function getLineAndCol(str, index) {
18
+ let line = 1
19
+ let col = 1
20
+ for (let i = 0; i < index; i++) {
21
+ if (str[i] === '\n') {
22
+ line++
23
+ col = 1
24
+ } else if (str[i] !== '\r') {
25
+ col++
26
+ }
27
+ }
28
+ return { line, col }
29
+ }
30
+
31
+ /**
32
+ * Validates that an XML string is secure and well-formed.
33
+ * Checks for DTDs, recursive/custom entities, external references (XXE), and malformed tags.
34
+ *
35
+ * @param {string} xmlString - Raw XML content.
36
+ * @returns {{ valid: boolean, error: string|null, line: number|null, column: number|null, recommendation: string|null }}
37
+ */
38
+ function validateXml(xmlString) {
39
+ if (typeof xmlString !== 'string') {
40
+ return {
41
+ valid: false,
42
+ error: 'Invalid XML input: expected string.',
43
+ line: 1,
44
+ column: 1,
45
+ recommendation: 'Ensure XML input is passed as a string.',
46
+ }
47
+ }
48
+
49
+ // 1. Check for external references (XXE)
50
+ if (/SYSTEM\b/i.test(xmlString) || /PUBLIC\b/i.test(xmlString)) {
51
+ const match = xmlString.match(/(SYSTEM|PUBLIC)\b/i)
52
+ const index = match ? match.index : 0
53
+ const { line, col } = getLineAndCol(xmlString, index)
54
+ return {
55
+ valid: false,
56
+ error: 'External reference SYSTEM/PUBLIC detected',
57
+ line,
58
+ column: col,
59
+ recommendation: 'Remove external system/public identifiers to prevent XXE attacks.',
60
+ }
61
+ }
62
+
63
+ // 2. Check for entity declarations (prevent custom/recursive entities)
64
+ if (/<!ENTITY/i.test(xmlString)) {
65
+ const index = xmlString.search(/<!ENTITY/i)
66
+ const { line, col } = getLineAndCol(xmlString, index)
67
+ return {
68
+ valid: false,
69
+ error: 'Custom entity declaration detected',
70
+ line,
71
+ column: col,
72
+ recommendation: 'Do not declare custom entities to protect against XML entity injection.',
73
+ }
74
+ }
75
+
76
+ // 3. Check for DTD / DOCTYPE declarations (DTD abuse, recursive entities, Billion Laughs)
77
+ if (/<!DOCTYPE/i.test(xmlString)) {
78
+ const index = xmlString.search(/<!DOCTYPE/i)
79
+ const { line, col } = getLineAndCol(xmlString, index)
80
+ return {
81
+ valid: false,
82
+ error: 'DTD/DOCTYPE declaration detected: entity expansion limit exceeded / DTD abuse',
83
+ line,
84
+ column: col,
85
+ recommendation:
86
+ 'Remove DOCTYPE declarations or DTD abuse to prevent entity expansion attacks.',
87
+ }
88
+ }
89
+
90
+ // 4. Check for oversized entity references to prevent DoS (exceeding 50,000 entity references)
91
+ const entityCount = (xmlString.match(/&[a-zA-Z0-9#x]+;/g) || []).length
92
+ if (entityCount > 50000) {
93
+ return {
94
+ valid: false,
95
+ error: `Entity expansion limit exceeded: ${entityCount} references (max 50000)`,
96
+ line: 1,
97
+ column: 1,
98
+ recommendation: 'Reduce the density of standard entity references.',
99
+ }
100
+ }
101
+
102
+ // 5. Well-formedness check using XMLValidator
103
+ const validation = XMLValidator.validate(xmlString)
104
+ if (validation !== true) {
105
+ return {
106
+ valid: false,
107
+ error: validation.err.msg || 'Malformed XML',
108
+ line: validation.err.line || 1,
109
+ column: validation.err.col || 1,
110
+ recommendation:
111
+ 'Fix XML syntax errors (unclosed tags, invalid characters, mismatched brackets).',
112
+ }
113
+ }
114
+
115
+ return {
116
+ valid: true,
117
+ error: null,
118
+ line: null,
119
+ column: null,
120
+ recommendation: null,
121
+ }
122
+ }
123
+
12
124
  /**
13
125
  * Validates that an XML string is well-formed.
126
+ * Backwards compatibility wrapper for original validateXML.
14
127
  *
15
128
  * @param {string} xmlString - XML to validate.
16
129
  * @returns {{ valid: boolean, error: string|null }} Validation result.
17
- *
18
- * @example
19
- * const { valid, error } = validateXML(xml);
20
- * if (!valid) console.error('XML error:', error);
21
130
  */
22
131
  function validateXML(xmlString) {
23
- return parser.validate(xmlString)
132
+ const result = validateXml(xmlString)
133
+ return {
134
+ valid: result.valid,
135
+ error: result.error,
136
+ }
24
137
  }
25
138
 
26
139
  /**
27
- * Attempts to repair common XML corruption issues in PPTX files.
140
+ * Safely parses XML with validation, recovery diagnostics, and fallback reporting.
28
141
  *
29
- * Known issues this addresses:
30
- * 1. Unescaped & in attribute values (e.g., href="a&b" → href="a&amp;b")
31
- * 2. Unclosed tags (limited heuristic repair)
32
- * 3. Invalid XML characters (removes control chars below 0x20 except tab/LF/CR)
142
+ * @param {string} xmlString - Raw XML content.
143
+ * @param {string} filename - Filename for error reporting context.
144
+ * @param {XMLParser} [xmlParserInstance] - Optional parser instance.
145
+ * @returns {Object} Parsed JS object.
146
+ * @throws {PPTXError} If parsing fails or security limits are violated.
147
+ */
148
+ function safeParseXml(xmlString, filename = 'unknown.xml', xmlParserInstance = null) {
149
+ const validation = validateXml(xmlString)
150
+ if (!validation.valid) {
151
+ const errorDetails = {
152
+ file: filename,
153
+ line: validation.line || 1,
154
+ column: validation.column || 1,
155
+ error: validation.error,
156
+ recommendation: validation.recommendation || 'Malformed entity reference detected',
157
+ }
158
+ const err = new PPTXError(`XML parse validation error in ${filename}: ${validation.error}`)
159
+ err.diagnostic = errorDetails
160
+ throw err
161
+ }
162
+
163
+ try {
164
+ const p = xmlParserInstance || parser
165
+ return p.parse(xmlString, filename)
166
+ } catch (err) {
167
+ let line = 1
168
+ let col = 1
169
+ const lineMatch = err.message.match(/line:?\s*(\d+)/i) || err.message.match(/:(\d+):\d+$/)
170
+ const colMatch = err.message.match(/col(umn)?:?\s*(\d+)/i) || err.message.match(/:\d+:(\d+)$/)
171
+ if (lineMatch) line = parseInt(lineMatch[1], 10)
172
+ if (colMatch) col = parseInt(colMatch[2] || colMatch[1], 10)
173
+
174
+ const errorDetails = {
175
+ file: filename,
176
+ line,
177
+ column: col,
178
+ error: err.message,
179
+ recommendation: 'Ensure all XML tags are closed properly and entity syntax is valid.',
180
+ }
181
+ const newErr = new PPTXError(`XML parse error in ${filename}: ${err.message}`)
182
+ newErr.diagnostic = errorDetails
183
+ throw newErr
184
+ }
185
+ }
186
+
187
+ /**
188
+ * Attempts to repair common XML corruption issues in PPTX files.
33
189
  *
34
190
  * @param {string} xmlString - Potentially broken XML.
35
191
  * @returns {{ xml: string, repaired: boolean, changes: string[] }}
36
- *
37
- * @example
38
- * const { xml, repaired, changes } = repairXML(brokenXml);
39
- * if (repaired) console.log('Repaired:', changes);
40
192
  */
41
193
  function repairXML(xmlString) {
42
194
  const changes = []
@@ -48,7 +200,6 @@ function repairXML(xmlString) {
48
200
  if (xml !== before) changes.push('Removed invalid control characters')
49
201
 
50
202
  // Fix 2: Fix unescaped ampersands in text content (not in entities)
51
- // Match & not followed by valid entity patterns
52
203
  const fixedAmp = xml.replace(/&(?!amp;|lt;|gt;|quot;|apos;|#\d+;|#x[0-9a-fA-F]+;)/g, '&amp;')
53
204
  if (fixedAmp !== xml) {
54
205
  xml = fixedAmp
@@ -76,10 +227,6 @@ function repairXML(xmlString) {
76
227
 
77
228
  /**
78
229
  * Checks if an XML string contains a specific element.
79
- *
80
- * @param {string} xmlString
81
- * @param {string} elementName - Element tag name (e.g., 'a:tbl').
82
- * @returns {boolean}
83
230
  */
84
231
  function xmlContainsElement(xmlString, elementName) {
85
232
  return xmlString.includes(`<${elementName}`) || xmlString.includes(`<${elementName}>`)
@@ -87,10 +234,6 @@ function xmlContainsElement(xmlString, elementName) {
87
234
 
88
235
  /**
89
236
  * Counts occurrences of an element in XML.
90
- *
91
- * @param {string} xmlString
92
- * @param {string} elementName
93
- * @returns {number}
94
237
  */
95
238
  function countElements(xmlString, elementName) {
96
239
  const pattern = new RegExp(`<${elementName}[\\s>/]`, 'g')
@@ -99,10 +242,6 @@ function countElements(xmlString, elementName) {
99
242
 
100
243
  /**
101
244
  * Extracts all attribute values for a given attribute name.
102
- *
103
- * @param {string} xmlString - XML string to search.
104
- * @param {string} attrName - Attribute name (e.g., 'r:id', 'name').
105
- * @returns {string[]} Array of attribute values found.
106
245
  */
107
246
  function extractAttributeValues(xmlString, attrName) {
108
247
  const pattern = new RegExp(`${attrName.replace(':', '\\:')}="([^"]*)"`, 'g')
@@ -114,10 +253,126 @@ function extractAttributeValues(xmlString, attrName) {
114
253
  return values
115
254
  }
116
255
 
256
+ /**
257
+ * Scans an XML string for entity references.
258
+ *
259
+ * @param {string} xmlString - XML string to scan.
260
+ * @returns {{ standard: number, custom: number, numeric: number, hex: number, total: number, entities: string[] }}
261
+ */
262
+ function scanForEntities(xmlString) {
263
+ const result = {
264
+ standard: 0,
265
+ custom: 0,
266
+ numeric: 0,
267
+ hex: 0,
268
+ total: 0,
269
+ entities: [],
270
+ }
271
+ if (typeof xmlString !== 'string') return result
272
+
273
+ const entityRegex = /&[a-zA-Z0-9#x_:-]+;/g
274
+ const matches = xmlString.match(entityRegex) || []
275
+ result.total = matches.length
276
+
277
+ const standardSet = new Set(['&amp;', '&lt;', '&gt;', '&quot;', '&apos;'])
278
+
279
+ matches.forEach(match => {
280
+ result.entities.push(match)
281
+ if (standardSet.has(match)) {
282
+ result.standard++
283
+ } else if (match.startsWith('&#x')) {
284
+ result.hex++
285
+ } else if (match.startsWith('&#')) {
286
+ result.numeric++
287
+ } else {
288
+ result.custom++
289
+ }
290
+ })
291
+
292
+ return result
293
+ }
294
+
295
+ /**
296
+ * Analyzes XML properties.
297
+ *
298
+ * @param {string} xmlString - XML content.
299
+ * @returns {{ sizeBytes: number, lineCount: number, elementCount: number, attributeCount: number, entityStats: Object }}
300
+ */
301
+ function analyzeXmlFile(xmlString) {
302
+ if (typeof xmlString !== 'string') {
303
+ return { sizeBytes: 0, lineCount: 0, elementCount: 0, attributeCount: 0, entityStats: {} }
304
+ }
305
+
306
+ const sizeBytes = Buffer.byteLength(xmlString, 'utf8')
307
+ const lineCount = xmlString.split('\n').length
308
+ const elementCount = (xmlString.match(/<[a-zA-Z0-9_:-]+/g) || []).length
309
+ const attributeCount = (xmlString.match(/\s[a-zA-Z0-9_:-]+=/g) || []).length
310
+ const entityStats = scanForEntities(xmlString)
311
+
312
+ return {
313
+ sizeBytes,
314
+ lineCount,
315
+ elementCount,
316
+ attributeCount,
317
+ entityStats,
318
+ }
319
+ }
320
+
321
+ /**
322
+ * Reports complexity indicators of the XML document.
323
+ *
324
+ * @param {string} xmlString - XML content.
325
+ * @returns {{ maxDepth: number, nodeCount: number, ratioTextToMarkup: number }}
326
+ */
327
+ function reportXmlComplexity(xmlString) {
328
+ if (typeof xmlString !== 'string') {
329
+ return { maxDepth: 0, nodeCount: 0, ratioTextToMarkup: 0 }
330
+ }
331
+
332
+ let currentDepth = 0
333
+ let maxDepth = 0
334
+ let nodeCount = 0
335
+
336
+ const tagRegex = /<\/?([a-zA-Z0-9_:-]+)(\s[^>]*)*>/g
337
+ let match
338
+ while ((match = tagRegex.exec(xmlString)) !== null) {
339
+ const rawTag = match[0]
340
+ nodeCount++
341
+ if (rawTag.startsWith('</')) {
342
+ currentDepth--
343
+ } else if (rawTag.endsWith('/>')) {
344
+ if (currentDepth + 1 > maxDepth) {
345
+ maxDepth = currentDepth + 1
346
+ }
347
+ } else {
348
+ currentDepth++
349
+ if (currentDepth > maxDepth) {
350
+ maxDepth = currentDepth
351
+ }
352
+ }
353
+ }
354
+
355
+ const textOnly = xmlString.replace(/<[^>]+>/g, '')
356
+ const textLength = textOnly.length
357
+ const xmlLength = xmlString.length
358
+ const ratioTextToMarkup = xmlLength > 0 ? parseFloat((textLength / xmlLength).toFixed(4)) : 0
359
+
360
+ return {
361
+ maxDepth,
362
+ nodeCount,
363
+ ratioTextToMarkup,
364
+ }
365
+ }
366
+
117
367
  module.exports = {
368
+ validateXml,
118
369
  validateXML,
370
+ safeParseXml,
119
371
  repairXML,
120
372
  xmlContainsElement,
121
373
  countElements,
122
374
  extractAttributeValues,
375
+ scanForEntities,
376
+ analyzeXmlFile,
377
+ reportXmlComplexity,
123
378
  }