node-pptx-templater 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,42 +1,194 @@
1
1
  /**
2
- * @fileoverview XML validation and repair utilities.
2
+ * @fileoverview XML validation, repair, recovery, and security diagnostics utilities.
3
3
  *
4
- * Provides tools to check if generated XML is well-formed and
5
- * attempt automatic repairs for common PPTX corruption issues.
4
+ * Provides tools to check if generated XML is well-formed, protect against
5
+ * XML Entity Attacks (XXE, DTD abuse, Billion Laughs), and recover diagnostics.
6
6
  */
7
7
 
8
+ const { XMLValidator } = require('fast-xml-parser')
8
9
  const { XMLParser } = require('../parsers/XMLParser.js')
10
+ const { PPTXError } = require('./errors.js')
9
11
 
10
12
  const parser = new XMLParser()
11
13
 
14
+ /**
15
+ * Helper to compute line and column numbers from a string index.
16
+ */
17
+ function getLineAndCol(str, index) {
18
+ let line = 1
19
+ let col = 1
20
+ for (let i = 0; i < index; i++) {
21
+ if (str[i] === '\n') {
22
+ line++
23
+ col = 1
24
+ } else if (str[i] !== '\r') {
25
+ col++
26
+ }
27
+ }
28
+ return { line, col }
29
+ }
30
+
31
+ /**
32
+ * Validates that an XML string is secure and well-formed.
33
+ * Checks for DTDs, recursive/custom entities, external references (XXE), and malformed tags.
34
+ *
35
+ * @param {string} xmlString - Raw XML content.
36
+ * @returns {{ valid: boolean, error: string|null, line: number|null, column: number|null, recommendation: string|null }}
37
+ */
38
+ function validateXml(xmlString) {
39
+ if (typeof xmlString !== 'string') {
40
+ return {
41
+ valid: false,
42
+ error: 'Invalid XML input: expected string.',
43
+ line: 1,
44
+ column: 1,
45
+ recommendation: 'Ensure XML input is passed as a string.',
46
+ }
47
+ }
48
+
49
+ // 1. Check for external references (XXE)
50
+ if (/SYSTEM\b/i.test(xmlString) || /PUBLIC\b/i.test(xmlString)) {
51
+ const match = xmlString.match(/(SYSTEM|PUBLIC)\b/i)
52
+ const index = match ? match.index : 0
53
+ const { line, col } = getLineAndCol(xmlString, index)
54
+ return {
55
+ valid: false,
56
+ error: 'External reference SYSTEM/PUBLIC detected',
57
+ line,
58
+ column: col,
59
+ recommendation: 'Remove external system/public identifiers to prevent XXE attacks.',
60
+ }
61
+ }
62
+
63
+ // 2. Check for entity declarations (prevent custom/recursive entities)
64
+ if (/<!ENTITY/i.test(xmlString)) {
65
+ const index = xmlString.search(/<!ENTITY/i)
66
+ const { line, col } = getLineAndCol(xmlString, index)
67
+ return {
68
+ valid: false,
69
+ error: 'Custom entity declaration detected',
70
+ line,
71
+ column: col,
72
+ recommendation: 'Do not declare custom entities to protect against XML entity injection.',
73
+ }
74
+ }
75
+
76
+ // 3. Check for DTD / DOCTYPE declarations (DTD abuse, recursive entities, Billion Laughs)
77
+ if (/<!DOCTYPE/i.test(xmlString)) {
78
+ const index = xmlString.search(/<!DOCTYPE/i)
79
+ const { line, col } = getLineAndCol(xmlString, index)
80
+ return {
81
+ valid: false,
82
+ error: 'DTD/DOCTYPE declaration detected: entity expansion limit exceeded / DTD abuse',
83
+ line,
84
+ column: col,
85
+ recommendation:
86
+ 'Remove DOCTYPE declarations or DTD abuse to prevent entity expansion attacks.',
87
+ }
88
+ }
89
+
90
+ // 4. Check for oversized entity references to prevent DoS (exceeding 50,000 entity references)
91
+ const entityCount = (xmlString.match(/&[a-zA-Z0-9#x]+;/g) || []).length
92
+ if (entityCount > 50000) {
93
+ return {
94
+ valid: false,
95
+ error: `Entity expansion limit exceeded: ${entityCount} references (max 50000)`,
96
+ line: 1,
97
+ column: 1,
98
+ recommendation: 'Reduce the density of standard entity references.',
99
+ }
100
+ }
101
+
102
+ // 5. Well-formedness check using XMLValidator
103
+ const validation = XMLValidator.validate(xmlString)
104
+ if (validation !== true) {
105
+ return {
106
+ valid: false,
107
+ error: validation.err.msg || 'Malformed XML',
108
+ line: validation.err.line || 1,
109
+ column: validation.err.col || 1,
110
+ recommendation:
111
+ 'Fix XML syntax errors (unclosed tags, invalid characters, mismatched brackets).',
112
+ }
113
+ }
114
+
115
+ return {
116
+ valid: true,
117
+ error: null,
118
+ line: null,
119
+ column: null,
120
+ recommendation: null,
121
+ }
122
+ }
123
+
12
124
  /**
13
125
  * Validates that an XML string is well-formed.
126
+ * Backwards compatibility wrapper for original validateXML.
14
127
  *
15
128
  * @param {string} xmlString - XML to validate.
16
129
  * @returns {{ valid: boolean, error: string|null }} Validation result.
17
- *
18
- * @example
19
- * const { valid, error } = validateXML(xml);
20
- * if (!valid) console.error('XML error:', error);
21
130
  */
22
131
  function validateXML(xmlString) {
23
- return parser.validate(xmlString)
132
+ const result = validateXml(xmlString)
133
+ return {
134
+ valid: result.valid,
135
+ error: result.error,
136
+ }
24
137
  }
25
138
 
26
139
  /**
27
- * Attempts to repair common XML corruption issues in PPTX files.
140
+ * Safely parses XML with validation, recovery diagnostics, and fallback reporting.
28
141
  *
29
- * Known issues this addresses:
30
- * 1. Unescaped & in attribute values (e.g., href="a&b" → href="a&amp;b")
31
- * 2. Unclosed tags (limited heuristic repair)
32
- * 3. Invalid XML characters (removes control chars below 0x20 except tab/LF/CR)
142
+ * @param {string} xmlString - Raw XML content.
143
+ * @param {string} filename - Filename for error reporting context.
144
+ * @param {XMLParser} [xmlParserInstance] - Optional parser instance.
145
+ * @returns {Object} Parsed JS object.
146
+ * @throws {PPTXError} If parsing fails or security limits are violated.
147
+ */
148
+ function safeParseXml(xmlString, filename = 'unknown.xml', xmlParserInstance = null) {
149
+ const validation = validateXml(xmlString)
150
+ if (!validation.valid) {
151
+ const errorDetails = {
152
+ file: filename,
153
+ line: validation.line || 1,
154
+ column: validation.column || 1,
155
+ error: validation.error,
156
+ recommendation: validation.recommendation || 'Malformed entity reference detected',
157
+ }
158
+ const err = new PPTXError(`XML parse validation error in ${filename}: ${validation.error}`)
159
+ err.diagnostic = errorDetails
160
+ throw err
161
+ }
162
+
163
+ try {
164
+ const p = xmlParserInstance || parser
165
+ return p.parse(xmlString, filename)
166
+ } catch (err) {
167
+ let line = 1
168
+ let col = 1
169
+ const lineMatch = err.message.match(/line:?\s*(\d+)/i) || err.message.match(/:(\d+):\d+$/)
170
+ const colMatch = err.message.match(/col(umn)?:?\s*(\d+)/i) || err.message.match(/:\d+:(\d+)$/)
171
+ if (lineMatch) line = parseInt(lineMatch[1], 10)
172
+ if (colMatch) col = parseInt(colMatch[2] || colMatch[1], 10)
173
+
174
+ const errorDetails = {
175
+ file: filename,
176
+ line,
177
+ column: col,
178
+ error: err.message,
179
+ recommendation: 'Ensure all XML tags are closed properly and entity syntax is valid.',
180
+ }
181
+ const newErr = new PPTXError(`XML parse error in ${filename}: ${err.message}`)
182
+ newErr.diagnostic = errorDetails
183
+ throw newErr
184
+ }
185
+ }
186
+
187
+ /**
188
+ * Attempts to repair common XML corruption issues in PPTX files.
33
189
  *
34
190
  * @param {string} xmlString - Potentially broken XML.
35
191
  * @returns {{ xml: string, repaired: boolean, changes: string[] }}
36
- *
37
- * @example
38
- * const { xml, repaired, changes } = repairXML(brokenXml);
39
- * if (repaired) console.log('Repaired:', changes);
40
192
  */
41
193
  function repairXML(xmlString) {
42
194
  const changes = []
@@ -48,7 +200,6 @@ function repairXML(xmlString) {
48
200
  if (xml !== before) changes.push('Removed invalid control characters')
49
201
 
50
202
  // Fix 2: Fix unescaped ampersands in text content (not in entities)
51
- // Match & not followed by valid entity patterns
52
203
  const fixedAmp = xml.replace(/&(?!amp;|lt;|gt;|quot;|apos;|#\d+;|#x[0-9a-fA-F]+;)/g, '&amp;')
53
204
  if (fixedAmp !== xml) {
54
205
  xml = fixedAmp
@@ -76,10 +227,6 @@ function repairXML(xmlString) {
76
227
 
77
228
  /**
78
229
  * Checks if an XML string contains a specific element.
79
- *
80
- * @param {string} xmlString
81
- * @param {string} elementName - Element tag name (e.g., 'a:tbl').
82
- * @returns {boolean}
83
230
  */
84
231
  function xmlContainsElement(xmlString, elementName) {
85
232
  return xmlString.includes(`<${elementName}`) || xmlString.includes(`<${elementName}>`)
@@ -87,10 +234,6 @@ function xmlContainsElement(xmlString, elementName) {
87
234
 
88
235
  /**
89
236
  * Counts occurrences of an element in XML.
90
- *
91
- * @param {string} xmlString
92
- * @param {string} elementName
93
- * @returns {number}
94
237
  */
95
238
  function countElements(xmlString, elementName) {
96
239
  const pattern = new RegExp(`<${elementName}[\\s>/]`, 'g')
@@ -99,10 +242,6 @@ function countElements(xmlString, elementName) {
99
242
 
100
243
  /**
101
244
  * Extracts all attribute values for a given attribute name.
102
- *
103
- * @param {string} xmlString - XML string to search.
104
- * @param {string} attrName - Attribute name (e.g., 'r:id', 'name').
105
- * @returns {string[]} Array of attribute values found.
106
245
  */
107
246
  function extractAttributeValues(xmlString, attrName) {
108
247
  const pattern = new RegExp(`${attrName.replace(':', '\\:')}="([^"]*)"`, 'g')
@@ -114,10 +253,126 @@ function extractAttributeValues(xmlString, attrName) {
114
253
  return values
115
254
  }
116
255
 
256
+ /**
257
+ * Scans an XML string for entity references.
258
+ *
259
+ * @param {string} xmlString - XML string to scan.
260
+ * @returns {{ standard: number, custom: number, numeric: number, hex: number, total: number, entities: string[] }}
261
+ */
262
+ function scanForEntities(xmlString) {
263
+ const result = {
264
+ standard: 0,
265
+ custom: 0,
266
+ numeric: 0,
267
+ hex: 0,
268
+ total: 0,
269
+ entities: [],
270
+ }
271
+ if (typeof xmlString !== 'string') return result
272
+
273
+ const entityRegex = /&[a-zA-Z0-9#x_:-]+;/g
274
+ const matches = xmlString.match(entityRegex) || []
275
+ result.total = matches.length
276
+
277
+ const standardSet = new Set(['&amp;', '&lt;', '&gt;', '&quot;', '&apos;'])
278
+
279
+ matches.forEach(match => {
280
+ result.entities.push(match)
281
+ if (standardSet.has(match)) {
282
+ result.standard++
283
+ } else if (match.startsWith('&#x')) {
284
+ result.hex++
285
+ } else if (match.startsWith('&#')) {
286
+ result.numeric++
287
+ } else {
288
+ result.custom++
289
+ }
290
+ })
291
+
292
+ return result
293
+ }
294
+
295
+ /**
296
+ * Analyzes XML properties.
297
+ *
298
+ * @param {string} xmlString - XML content.
299
+ * @returns {{ sizeBytes: number, lineCount: number, elementCount: number, attributeCount: number, entityStats: Object }}
300
+ */
301
+ function analyzeXmlFile(xmlString) {
302
+ if (typeof xmlString !== 'string') {
303
+ return { sizeBytes: 0, lineCount: 0, elementCount: 0, attributeCount: 0, entityStats: {} }
304
+ }
305
+
306
+ const sizeBytes = Buffer.byteLength(xmlString, 'utf8')
307
+ const lineCount = xmlString.split('\n').length
308
+ const elementCount = (xmlString.match(/<[a-zA-Z0-9_:-]+/g) || []).length
309
+ const attributeCount = (xmlString.match(/\s[a-zA-Z0-9_:-]+=/g) || []).length
310
+ const entityStats = scanForEntities(xmlString)
311
+
312
+ return {
313
+ sizeBytes,
314
+ lineCount,
315
+ elementCount,
316
+ attributeCount,
317
+ entityStats,
318
+ }
319
+ }
320
+
321
+ /**
322
+ * Reports complexity indicators of the XML document.
323
+ *
324
+ * @param {string} xmlString - XML content.
325
+ * @returns {{ maxDepth: number, nodeCount: number, ratioTextToMarkup: number }}
326
+ */
327
+ function reportXmlComplexity(xmlString) {
328
+ if (typeof xmlString !== 'string') {
329
+ return { maxDepth: 0, nodeCount: 0, ratioTextToMarkup: 0 }
330
+ }
331
+
332
+ let currentDepth = 0
333
+ let maxDepth = 0
334
+ let nodeCount = 0
335
+
336
+ const tagRegex = /<\/?([a-zA-Z0-9_:-]+)(\s[^>]*)*>/g
337
+ let match
338
+ while ((match = tagRegex.exec(xmlString)) !== null) {
339
+ const rawTag = match[0]
340
+ nodeCount++
341
+ if (rawTag.startsWith('</')) {
342
+ currentDepth--
343
+ } else if (rawTag.endsWith('/>')) {
344
+ if (currentDepth + 1 > maxDepth) {
345
+ maxDepth = currentDepth + 1
346
+ }
347
+ } else {
348
+ currentDepth++
349
+ if (currentDepth > maxDepth) {
350
+ maxDepth = currentDepth
351
+ }
352
+ }
353
+ }
354
+
355
+ const textOnly = xmlString.replace(/<[^>]+>/g, '')
356
+ const textLength = textOnly.length
357
+ const xmlLength = xmlString.length
358
+ const ratioTextToMarkup = xmlLength > 0 ? parseFloat((textLength / xmlLength).toFixed(4)) : 0
359
+
360
+ return {
361
+ maxDepth,
362
+ nodeCount,
363
+ ratioTextToMarkup,
364
+ }
365
+ }
366
+
117
367
  module.exports = {
368
+ validateXml,
118
369
  validateXML,
370
+ safeParseXml,
119
371
  repairXML,
120
372
  xmlContainsElement,
121
373
  countElements,
122
374
  extractAttributeValues,
375
+ scanForEntities,
376
+ analyzeXmlFile,
377
+ reportXmlComplexity,
123
378
  }