node-pptx-templater 1.0.5 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +1305 -225
- package/package.json +95 -3
- package/src/core/PPTXTemplater.js +52 -1
- package/src/core/ValidationEngine.js +93 -0
- package/src/index.js +14 -1
- package/src/managers/ChartManager.js +352 -21
- package/src/managers/charts/ChartCacheGenerator.js +371 -0
- package/src/managers/charts/ChartWorkbookUpdater.js +187 -34
- package/src/parsers/XMLParser.js +38 -1
- package/src/utils/xmlUtils.js +285 -30
package/src/utils/xmlUtils.js
CHANGED
|
@@ -1,42 +1,194 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @fileoverview XML validation and
|
|
2
|
+
* @fileoverview XML validation, repair, recovery, and security diagnostics utilities.
|
|
3
3
|
*
|
|
4
|
-
* Provides tools to check if generated XML is well-formed
|
|
5
|
-
*
|
|
4
|
+
* Provides tools to check if generated XML is well-formed, protect against
|
|
5
|
+
* XML Entity Attacks (XXE, DTD abuse, Billion Laughs), and recover diagnostics.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
+
const { XMLValidator } = require('fast-xml-parser')
|
|
8
9
|
const { XMLParser } = require('../parsers/XMLParser.js')
|
|
10
|
+
const { PPTXError } = require('./errors.js')
|
|
9
11
|
|
|
10
12
|
const parser = new XMLParser()
|
|
11
13
|
|
|
14
|
+
/**
|
|
15
|
+
* Helper to compute line and column numbers from a string index.
|
|
16
|
+
*/
|
|
17
|
+
function getLineAndCol(str, index) {
|
|
18
|
+
let line = 1
|
|
19
|
+
let col = 1
|
|
20
|
+
for (let i = 0; i < index; i++) {
|
|
21
|
+
if (str[i] === '\n') {
|
|
22
|
+
line++
|
|
23
|
+
col = 1
|
|
24
|
+
} else if (str[i] !== '\r') {
|
|
25
|
+
col++
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return { line, col }
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Validates that an XML string is secure and well-formed.
|
|
33
|
+
* Checks for DTDs, recursive/custom entities, external references (XXE), and malformed tags.
|
|
34
|
+
*
|
|
35
|
+
* @param {string} xmlString - Raw XML content.
|
|
36
|
+
* @returns {{ valid: boolean, error: string|null, line: number|null, column: number|null, recommendation: string|null }}
|
|
37
|
+
*/
|
|
38
|
+
function validateXml(xmlString) {
|
|
39
|
+
if (typeof xmlString !== 'string') {
|
|
40
|
+
return {
|
|
41
|
+
valid: false,
|
|
42
|
+
error: 'Invalid XML input: expected string.',
|
|
43
|
+
line: 1,
|
|
44
|
+
column: 1,
|
|
45
|
+
recommendation: 'Ensure XML input is passed as a string.',
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// 1. Check for external references (XXE)
|
|
50
|
+
if (/SYSTEM\b/i.test(xmlString) || /PUBLIC\b/i.test(xmlString)) {
|
|
51
|
+
const match = xmlString.match(/(SYSTEM|PUBLIC)\b/i)
|
|
52
|
+
const index = match ? match.index : 0
|
|
53
|
+
const { line, col } = getLineAndCol(xmlString, index)
|
|
54
|
+
return {
|
|
55
|
+
valid: false,
|
|
56
|
+
error: 'External reference SYSTEM/PUBLIC detected',
|
|
57
|
+
line,
|
|
58
|
+
column: col,
|
|
59
|
+
recommendation: 'Remove external system/public identifiers to prevent XXE attacks.',
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// 2. Check for entity declarations (prevent custom/recursive entities)
|
|
64
|
+
if (/<!ENTITY/i.test(xmlString)) {
|
|
65
|
+
const index = xmlString.search(/<!ENTITY/i)
|
|
66
|
+
const { line, col } = getLineAndCol(xmlString, index)
|
|
67
|
+
return {
|
|
68
|
+
valid: false,
|
|
69
|
+
error: 'Custom entity declaration detected',
|
|
70
|
+
line,
|
|
71
|
+
column: col,
|
|
72
|
+
recommendation: 'Do not declare custom entities to protect against XML entity injection.',
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// 3. Check for DTD / DOCTYPE declarations (DTD abuse, recursive entities, Billion Laughs)
|
|
77
|
+
if (/<!DOCTYPE/i.test(xmlString)) {
|
|
78
|
+
const index = xmlString.search(/<!DOCTYPE/i)
|
|
79
|
+
const { line, col } = getLineAndCol(xmlString, index)
|
|
80
|
+
return {
|
|
81
|
+
valid: false,
|
|
82
|
+
error: 'DTD/DOCTYPE declaration detected: entity expansion limit exceeded / DTD abuse',
|
|
83
|
+
line,
|
|
84
|
+
column: col,
|
|
85
|
+
recommendation:
|
|
86
|
+
'Remove DOCTYPE declarations or DTD abuse to prevent entity expansion attacks.',
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// 4. Check for oversized entity references to prevent DoS (exceeding 50,000 entity references)
|
|
91
|
+
const entityCount = (xmlString.match(/&[a-zA-Z0-9#x]+;/g) || []).length
|
|
92
|
+
if (entityCount > 50000) {
|
|
93
|
+
return {
|
|
94
|
+
valid: false,
|
|
95
|
+
error: `Entity expansion limit exceeded: ${entityCount} references (max 50000)`,
|
|
96
|
+
line: 1,
|
|
97
|
+
column: 1,
|
|
98
|
+
recommendation: 'Reduce the density of standard entity references.',
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// 5. Well-formedness check using XMLValidator
|
|
103
|
+
const validation = XMLValidator.validate(xmlString)
|
|
104
|
+
if (validation !== true) {
|
|
105
|
+
return {
|
|
106
|
+
valid: false,
|
|
107
|
+
error: validation.err.msg || 'Malformed XML',
|
|
108
|
+
line: validation.err.line || 1,
|
|
109
|
+
column: validation.err.col || 1,
|
|
110
|
+
recommendation:
|
|
111
|
+
'Fix XML syntax errors (unclosed tags, invalid characters, mismatched brackets).',
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return {
|
|
116
|
+
valid: true,
|
|
117
|
+
error: null,
|
|
118
|
+
line: null,
|
|
119
|
+
column: null,
|
|
120
|
+
recommendation: null,
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
12
124
|
/**
|
|
13
125
|
* Validates that an XML string is well-formed.
|
|
126
|
+
* Backwards compatibility wrapper for original validateXML.
|
|
14
127
|
*
|
|
15
128
|
* @param {string} xmlString - XML to validate.
|
|
16
129
|
* @returns {{ valid: boolean, error: string|null }} Validation result.
|
|
17
|
-
*
|
|
18
|
-
* @example
|
|
19
|
-
* const { valid, error } = validateXML(xml);
|
|
20
|
-
* if (!valid) console.error('XML error:', error);
|
|
21
130
|
*/
|
|
22
131
|
function validateXML(xmlString) {
|
|
23
|
-
|
|
132
|
+
const result = validateXml(xmlString)
|
|
133
|
+
return {
|
|
134
|
+
valid: result.valid,
|
|
135
|
+
error: result.error,
|
|
136
|
+
}
|
|
24
137
|
}
|
|
25
138
|
|
|
26
139
|
/**
|
|
27
|
-
*
|
|
140
|
+
* Safely parses XML with validation, recovery diagnostics, and fallback reporting.
|
|
28
141
|
*
|
|
29
|
-
*
|
|
30
|
-
*
|
|
31
|
-
*
|
|
32
|
-
*
|
|
142
|
+
* @param {string} xmlString - Raw XML content.
|
|
143
|
+
* @param {string} filename - Filename for error reporting context.
|
|
144
|
+
* @param {XMLParser} [xmlParserInstance] - Optional parser instance.
|
|
145
|
+
* @returns {Object} Parsed JS object.
|
|
146
|
+
* @throws {PPTXError} If parsing fails or security limits are violated.
|
|
147
|
+
*/
|
|
148
|
+
function safeParseXml(xmlString, filename = 'unknown.xml', xmlParserInstance = null) {
|
|
149
|
+
const validation = validateXml(xmlString)
|
|
150
|
+
if (!validation.valid) {
|
|
151
|
+
const errorDetails = {
|
|
152
|
+
file: filename,
|
|
153
|
+
line: validation.line || 1,
|
|
154
|
+
column: validation.column || 1,
|
|
155
|
+
error: validation.error,
|
|
156
|
+
recommendation: validation.recommendation || 'Malformed entity reference detected',
|
|
157
|
+
}
|
|
158
|
+
const err = new PPTXError(`XML parse validation error in ${filename}: ${validation.error}`)
|
|
159
|
+
err.diagnostic = errorDetails
|
|
160
|
+
throw err
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
try {
|
|
164
|
+
const p = xmlParserInstance || parser
|
|
165
|
+
return p.parse(xmlString, filename)
|
|
166
|
+
} catch (err) {
|
|
167
|
+
let line = 1
|
|
168
|
+
let col = 1
|
|
169
|
+
const lineMatch = err.message.match(/line:?\s*(\d+)/i) || err.message.match(/:(\d+):\d+$/)
|
|
170
|
+
const colMatch = err.message.match(/col(umn)?:?\s*(\d+)/i) || err.message.match(/:\d+:(\d+)$/)
|
|
171
|
+
if (lineMatch) line = parseInt(lineMatch[1], 10)
|
|
172
|
+
if (colMatch) col = parseInt(colMatch[2] || colMatch[1], 10)
|
|
173
|
+
|
|
174
|
+
const errorDetails = {
|
|
175
|
+
file: filename,
|
|
176
|
+
line,
|
|
177
|
+
column: col,
|
|
178
|
+
error: err.message,
|
|
179
|
+
recommendation: 'Ensure all XML tags are closed properly and entity syntax is valid.',
|
|
180
|
+
}
|
|
181
|
+
const newErr = new PPTXError(`XML parse error in ${filename}: ${err.message}`)
|
|
182
|
+
newErr.diagnostic = errorDetails
|
|
183
|
+
throw newErr
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Attempts to repair common XML corruption issues in PPTX files.
|
|
33
189
|
*
|
|
34
190
|
* @param {string} xmlString - Potentially broken XML.
|
|
35
191
|
* @returns {{ xml: string, repaired: boolean, changes: string[] }}
|
|
36
|
-
*
|
|
37
|
-
* @example
|
|
38
|
-
* const { xml, repaired, changes } = repairXML(brokenXml);
|
|
39
|
-
* if (repaired) console.log('Repaired:', changes);
|
|
40
192
|
*/
|
|
41
193
|
function repairXML(xmlString) {
|
|
42
194
|
const changes = []
|
|
@@ -48,7 +200,6 @@ function repairXML(xmlString) {
|
|
|
48
200
|
if (xml !== before) changes.push('Removed invalid control characters')
|
|
49
201
|
|
|
50
202
|
// Fix 2: Fix unescaped ampersands in text content (not in entities)
|
|
51
|
-
// Match & not followed by valid entity patterns
|
|
52
203
|
const fixedAmp = xml.replace(/&(?!amp;|lt;|gt;|quot;|apos;|#\d+;|#x[0-9a-fA-F]+;)/g, '&')
|
|
53
204
|
if (fixedAmp !== xml) {
|
|
54
205
|
xml = fixedAmp
|
|
@@ -76,10 +227,6 @@ function repairXML(xmlString) {
|
|
|
76
227
|
|
|
77
228
|
/**
|
|
78
229
|
* Checks if an XML string contains a specific element.
|
|
79
|
-
*
|
|
80
|
-
* @param {string} xmlString
|
|
81
|
-
* @param {string} elementName - Element tag name (e.g., 'a:tbl').
|
|
82
|
-
* @returns {boolean}
|
|
83
230
|
*/
|
|
84
231
|
function xmlContainsElement(xmlString, elementName) {
|
|
85
232
|
return xmlString.includes(`<${elementName}`) || xmlString.includes(`<${elementName}>`)
|
|
@@ -87,10 +234,6 @@ function xmlContainsElement(xmlString, elementName) {
|
|
|
87
234
|
|
|
88
235
|
/**
|
|
89
236
|
* Counts occurrences of an element in XML.
|
|
90
|
-
*
|
|
91
|
-
* @param {string} xmlString
|
|
92
|
-
* @param {string} elementName
|
|
93
|
-
* @returns {number}
|
|
94
237
|
*/
|
|
95
238
|
function countElements(xmlString, elementName) {
|
|
96
239
|
const pattern = new RegExp(`<${elementName}[\\s>/]`, 'g')
|
|
@@ -99,10 +242,6 @@ function countElements(xmlString, elementName) {
|
|
|
99
242
|
|
|
100
243
|
/**
|
|
101
244
|
* Extracts all attribute values for a given attribute name.
|
|
102
|
-
*
|
|
103
|
-
* @param {string} xmlString - XML string to search.
|
|
104
|
-
* @param {string} attrName - Attribute name (e.g., 'r:id', 'name').
|
|
105
|
-
* @returns {string[]} Array of attribute values found.
|
|
106
245
|
*/
|
|
107
246
|
function extractAttributeValues(xmlString, attrName) {
|
|
108
247
|
const pattern = new RegExp(`${attrName.replace(':', '\\:')}="([^"]*)"`, 'g')
|
|
@@ -114,10 +253,126 @@ function extractAttributeValues(xmlString, attrName) {
|
|
|
114
253
|
return values
|
|
115
254
|
}
|
|
116
255
|
|
|
256
|
+
/**
|
|
257
|
+
* Scans an XML string for entity references.
|
|
258
|
+
*
|
|
259
|
+
* @param {string} xmlString - XML string to scan.
|
|
260
|
+
* @returns {{ standard: number, custom: number, numeric: number, hex: number, total: number, entities: string[] }}
|
|
261
|
+
*/
|
|
262
|
+
function scanForEntities(xmlString) {
|
|
263
|
+
const result = {
|
|
264
|
+
standard: 0,
|
|
265
|
+
custom: 0,
|
|
266
|
+
numeric: 0,
|
|
267
|
+
hex: 0,
|
|
268
|
+
total: 0,
|
|
269
|
+
entities: [],
|
|
270
|
+
}
|
|
271
|
+
if (typeof xmlString !== 'string') return result
|
|
272
|
+
|
|
273
|
+
const entityRegex = /&[a-zA-Z0-9#x_:-]+;/g
|
|
274
|
+
const matches = xmlString.match(entityRegex) || []
|
|
275
|
+
result.total = matches.length
|
|
276
|
+
|
|
277
|
+
const standardSet = new Set(['&', '<', '>', '"', '''])
|
|
278
|
+
|
|
279
|
+
matches.forEach(match => {
|
|
280
|
+
result.entities.push(match)
|
|
281
|
+
if (standardSet.has(match)) {
|
|
282
|
+
result.standard++
|
|
283
|
+
} else if (match.startsWith('&#x')) {
|
|
284
|
+
result.hex++
|
|
285
|
+
} else if (match.startsWith('&#')) {
|
|
286
|
+
result.numeric++
|
|
287
|
+
} else {
|
|
288
|
+
result.custom++
|
|
289
|
+
}
|
|
290
|
+
})
|
|
291
|
+
|
|
292
|
+
return result
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
/**
|
|
296
|
+
* Analyzes XML properties.
|
|
297
|
+
*
|
|
298
|
+
* @param {string} xmlString - XML content.
|
|
299
|
+
* @returns {{ sizeBytes: number, lineCount: number, elementCount: number, attributeCount: number, entityStats: Object }}
|
|
300
|
+
*/
|
|
301
|
+
function analyzeXmlFile(xmlString) {
|
|
302
|
+
if (typeof xmlString !== 'string') {
|
|
303
|
+
return { sizeBytes: 0, lineCount: 0, elementCount: 0, attributeCount: 0, entityStats: {} }
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const sizeBytes = Buffer.byteLength(xmlString, 'utf8')
|
|
307
|
+
const lineCount = xmlString.split('\n').length
|
|
308
|
+
const elementCount = (xmlString.match(/<[a-zA-Z0-9_:-]+/g) || []).length
|
|
309
|
+
const attributeCount = (xmlString.match(/\s[a-zA-Z0-9_:-]+=/g) || []).length
|
|
310
|
+
const entityStats = scanForEntities(xmlString)
|
|
311
|
+
|
|
312
|
+
return {
|
|
313
|
+
sizeBytes,
|
|
314
|
+
lineCount,
|
|
315
|
+
elementCount,
|
|
316
|
+
attributeCount,
|
|
317
|
+
entityStats,
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Reports complexity indicators of the XML document.
|
|
323
|
+
*
|
|
324
|
+
* @param {string} xmlString - XML content.
|
|
325
|
+
* @returns {{ maxDepth: number, nodeCount: number, ratioTextToMarkup: number }}
|
|
326
|
+
*/
|
|
327
|
+
function reportXmlComplexity(xmlString) {
|
|
328
|
+
if (typeof xmlString !== 'string') {
|
|
329
|
+
return { maxDepth: 0, nodeCount: 0, ratioTextToMarkup: 0 }
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
let currentDepth = 0
|
|
333
|
+
let maxDepth = 0
|
|
334
|
+
let nodeCount = 0
|
|
335
|
+
|
|
336
|
+
const tagRegex = /<\/?([a-zA-Z0-9_:-]+)(\s[^>]*)*>/g
|
|
337
|
+
let match
|
|
338
|
+
while ((match = tagRegex.exec(xmlString)) !== null) {
|
|
339
|
+
const rawTag = match[0]
|
|
340
|
+
nodeCount++
|
|
341
|
+
if (rawTag.startsWith('</')) {
|
|
342
|
+
currentDepth--
|
|
343
|
+
} else if (rawTag.endsWith('/>')) {
|
|
344
|
+
if (currentDepth + 1 > maxDepth) {
|
|
345
|
+
maxDepth = currentDepth + 1
|
|
346
|
+
}
|
|
347
|
+
} else {
|
|
348
|
+
currentDepth++
|
|
349
|
+
if (currentDepth > maxDepth) {
|
|
350
|
+
maxDepth = currentDepth
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
const textOnly = xmlString.replace(/<[^>]+>/g, '')
|
|
356
|
+
const textLength = textOnly.length
|
|
357
|
+
const xmlLength = xmlString.length
|
|
358
|
+
const ratioTextToMarkup = xmlLength > 0 ? parseFloat((textLength / xmlLength).toFixed(4)) : 0
|
|
359
|
+
|
|
360
|
+
return {
|
|
361
|
+
maxDepth,
|
|
362
|
+
nodeCount,
|
|
363
|
+
ratioTextToMarkup,
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
117
367
|
module.exports = {
|
|
368
|
+
validateXml,
|
|
118
369
|
validateXML,
|
|
370
|
+
safeParseXml,
|
|
119
371
|
repairXML,
|
|
120
372
|
xmlContainsElement,
|
|
121
373
|
countElements,
|
|
122
374
|
extractAttributeValues,
|
|
375
|
+
scanForEntities,
|
|
376
|
+
analyzeXmlFile,
|
|
377
|
+
reportXmlComplexity,
|
|
123
378
|
}
|