node-pptx-templater 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +39 -0
- package/LICENSE +21 -0
- package/README.md +415 -0
- package/package.json +83 -0
- package/src/cli/commands/build.js +79 -0
- package/src/cli/commands/debug.js +46 -0
- package/src/cli/commands/extract.js +42 -0
- package/src/cli/commands/inspect.js +39 -0
- package/src/cli/commands/validate.js +36 -0
- package/src/cli/index.js +132 -0
- package/src/core/OutputWriter.js +181 -0
- package/src/core/PPTXTemplater.js +961 -0
- package/src/core/TemplateEngine.js +321 -0
- package/src/index.js +43 -0
- package/src/managers/ChartManager.js +317 -0
- package/src/managers/ContentTypesManager.js +160 -0
- package/src/managers/HyperlinkManager.js +451 -0
- package/src/managers/MediaManager.js +307 -0
- package/src/managers/RelationshipManager.js +401 -0
- package/src/managers/SlideManager.js +950 -0
- package/src/managers/TableManager.js +416 -0
- package/src/managers/ZipManager.js +298 -0
- package/src/managers/charts/ChartCacheGenerator.js +156 -0
- package/src/managers/charts/ChartParser.js +43 -0
- package/src/managers/charts/ChartRelationshipManager.js +33 -0
- package/src/managers/charts/ChartWorkbookUpdater.js +130 -0
- package/src/parsers/XMLParser.js +291 -0
- package/src/templates/blankPptx.js +1 -0
- package/src/templates/slideTemplate.js +314 -0
- package/src/utils/contentTypesHelper.js +149 -0
- package/src/utils/errors.js +129 -0
- package/src/utils/idUtils.js +54 -0
- package/src/utils/logger.js +113 -0
- package/src/utils/relationshipUtils.js +89 -0
- package/src/utils/xmlUtils.js +115 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import { createLogger } from '../../utils/logger.js';
|
|
2
|
+
import { ChartWorkbookUpdater } from './ChartWorkbookUpdater.js';
|
|
3
|
+
|
|
4
|
+
const logger = createLogger('ChartCacheGenerator');
|
|
5
|
+
|
|
6
|
+
export class ChartCacheGenerator {
|
|
7
|
+
/**
|
|
8
|
+
* Generates a string cache XML string (used for categories or series names).
|
|
9
|
+
*/
|
|
10
|
+
static generateStrCache(values) {
|
|
11
|
+
const ptEntries = values
|
|
12
|
+
.map((val, i) => `<c:pt idx="${i}"><c:v>${this.#escapeXml(String(val))}</c:v></c:pt>`)
|
|
13
|
+
.join('');
|
|
14
|
+
return `<c:strCache><c:ptCount val="${values.length}"/>${ptEntries}</c:strCache>`;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Generates a numeric cache XML string.
|
|
19
|
+
*/
|
|
20
|
+
static generateNumCache(values) {
|
|
21
|
+
const ptEntries = values
|
|
22
|
+
.map((val, i) => `<c:pt idx="${i}"><c:v>${Number(val) || 0}</c:v></c:pt>`)
|
|
23
|
+
.join('');
|
|
24
|
+
return `<c:numCache><c:formatCode>General</c:formatCode><c:ptCount val="${values.length}"/>${ptEntries}</c:numCache>`;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Updates category cache and formulas in a chart XML.
|
|
29
|
+
*
|
|
30
|
+
* @param {string} xml - Raw chart XML.
|
|
31
|
+
* @param {string[]} categories - Array of categories.
|
|
32
|
+
* @param {string} sheetName - Target worksheet name.
|
|
33
|
+
*/
|
|
34
|
+
static updateCategories(xml, categories, sheetName = 'Sheet1') {
|
|
35
|
+
const count = categories.length;
|
|
36
|
+
|
|
37
|
+
// Formula for categories: Sheet1!$A$2:$A$N
|
|
38
|
+
const formula = ChartWorkbookUpdater.getFormulaRange(sheetName, 2, 0, count + 1, 0);
|
|
39
|
+
const newStrCache = this.generateStrCache(categories);
|
|
40
|
+
|
|
41
|
+
// Replace the entire <c:cat> block to ensure correct formula and cache
|
|
42
|
+
const catPattern = /(<c:cat>)([\s\S]*?)(<\/c:cat>)/g;
|
|
43
|
+
|
|
44
|
+
return xml.replace(catPattern, (match, open, content, close) => {
|
|
45
|
+
// Reconstruct the cat block
|
|
46
|
+
// Try to determine if it used strRef or numRef originally
|
|
47
|
+
let refTag = content.includes('<c:numRef>') ? 'numRef' : 'strRef';
|
|
48
|
+
// But typically categories are strings. Let's use strRef.
|
|
49
|
+
refTag = 'strRef';
|
|
50
|
+
|
|
51
|
+
return `${open}<c:${refTag}><c:f>${formula}</c:f>${newStrCache}</c:${refTag}>${close}`;
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Updates series names and values in chart XML.
|
|
57
|
+
*/
|
|
58
|
+
static updateSeries(xml, series, categoriesLength, sheetName = 'Sheet1') {
|
|
59
|
+
let updated = xml;
|
|
60
|
+
const serPattern = /(<c:ser>)([\s\S]*?)(<\/c:ser>)/g;
|
|
61
|
+
const serMatches = [...updated.matchAll(serPattern)];
|
|
62
|
+
|
|
63
|
+
if (serMatches.length === 0) return xml;
|
|
64
|
+
|
|
65
|
+
let serIndex = 0;
|
|
66
|
+
updated = updated.replace(serPattern, (match, open, content, close) => {
|
|
67
|
+
if (serIndex >= series.length) {
|
|
68
|
+
// If there are more series templates than data, we could drop them,
|
|
69
|
+
// but replacing with an empty string might break the XML layout if we're not careful.
|
|
70
|
+
// Actually, removing extra series is requested: "Allow removing old series".
|
|
71
|
+
return '';
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const serData = series[serIndex];
|
|
75
|
+
const colIndex = serIndex + 1; // Series data starts in column B (1)
|
|
76
|
+
serIndex++;
|
|
77
|
+
|
|
78
|
+
let updatedContent = content;
|
|
79
|
+
|
|
80
|
+
// 1. Update Series Name (c:tx)
|
|
81
|
+
if (serData.name !== undefined) {
|
|
82
|
+
const nameFormula = ChartWorkbookUpdater.getFormulaSingleCell(sheetName, 1, colIndex);
|
|
83
|
+
const nameCache = `<c:strCache><c:ptCount val="1"/><c:pt idx="0"><c:v>${this.#escapeXml(serData.name)}</c:v></c:pt></c:strCache>`;
|
|
84
|
+
|
|
85
|
+
const txPattern = /(<c:tx>)([\s\S]*?)(<\/c:tx>)/;
|
|
86
|
+
if (txPattern.test(updatedContent)) {
|
|
87
|
+
updatedContent = updatedContent.replace(txPattern, (match, p1, p2, p3) => {
|
|
88
|
+
return `${p1}<c:strRef><c:f>${nameFormula}</c:f>${nameCache}</c:strRef>${p3}`;
|
|
89
|
+
});
|
|
90
|
+
} else {
|
|
91
|
+
// Some charts don't have <c:tx>, we prepend it after <c:order> or <c:idx>
|
|
92
|
+
const insertAfter = /(<c:order[^>]*>)/;
|
|
93
|
+
if (insertAfter.test(updatedContent)) {
|
|
94
|
+
updatedContent = updatedContent.replace(insertAfter, (match, p1) => {
|
|
95
|
+
return `${p1}<c:tx><c:strRef><c:f>${nameFormula}</c:f>${nameCache}</c:strRef></c:tx>`;
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// 2. Update Series Values (c:val)
|
|
102
|
+
if (serData.values !== undefined) {
|
|
103
|
+
const valuesCount = categoriesLength || serData.values.length;
|
|
104
|
+
const valFormula = ChartWorkbookUpdater.getFormulaRange(sheetName, 2, colIndex, valuesCount + 1, colIndex);
|
|
105
|
+
const valCache = this.generateNumCache(serData.values);
|
|
106
|
+
|
|
107
|
+
const valPattern = /(<c:val>)([\s\S]*?)(<\/c:val>)/;
|
|
108
|
+
if (valPattern.test(updatedContent)) {
|
|
109
|
+
updatedContent = updatedContent.replace(valPattern, (match, p1, p2, p3) => {
|
|
110
|
+
return `${p1}<c:numRef><c:f>${valFormula}</c:f>${valCache}</c:numRef>${p3}`;
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return `${open}${updatedContent}${close}`;
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
return updated;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Clones a series template to support dynamic series addition.
|
|
123
|
+
*/
|
|
124
|
+
static appendDynamicSeries(xml, targetCount) {
|
|
125
|
+
const serPattern = /(<c:ser>)([\s\S]*?)(<\/c:ser>)/g;
|
|
126
|
+
const matches = [...xml.matchAll(serPattern)];
|
|
127
|
+
if (matches.length === 0 || matches.length >= targetCount) return xml;
|
|
128
|
+
|
|
129
|
+
// Use the last series as a template to clone
|
|
130
|
+
const templateMatch = matches[matches.length - 1];
|
|
131
|
+
const template = templateMatch[0];
|
|
132
|
+
|
|
133
|
+
// Find the end of the last series
|
|
134
|
+
const lastIndex = templateMatch.index + template.length;
|
|
135
|
+
|
|
136
|
+
let newSeriesBlocks = '';
|
|
137
|
+
for (let i = matches.length; i < targetCount; i++) {
|
|
138
|
+
let clone = template;
|
|
139
|
+
// Update c:idx and c:order
|
|
140
|
+
clone = clone.replace(/(<c:idx val=")\d+("\/>)/g, `$1${i}$2`);
|
|
141
|
+
clone = clone.replace(/(<c:order val=")\d+("\/>)/g, `$1${i}$2`);
|
|
142
|
+
newSeriesBlocks += clone;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return xml.substring(0, lastIndex) + newSeriesBlocks + xml.substring(lastIndex);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
static #escapeXml(str) {
|
|
149
|
+
return str
|
|
150
|
+
.replace(/&/g, '&')
|
|
151
|
+
.replace(/</g, '<')
|
|
152
|
+
.replace(/>/g, '>')
|
|
153
|
+
.replace(/"/g, '"')
|
|
154
|
+
.replace(/'/g, ''');
|
|
155
|
+
}
|
|
156
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { createLogger } from '../../utils/logger.js';
|
|
2
|
+
|
|
3
|
+
const logger = createLogger('ChartParser');
|
|
4
|
+
|
|
5
|
+
export class ChartParser {
|
|
6
|
+
/**
|
|
7
|
+
* Finds a chart's relationship ID and type in a slide's XML based on shape name/id.
|
|
8
|
+
*
|
|
9
|
+
* @param {string} slideXml
|
|
10
|
+
* @param {string} chartId
|
|
11
|
+
* @returns {{ rId: string } | null}
|
|
12
|
+
*/
|
|
13
|
+
static findChartRIdInSlide(slideXml, chartId) {
|
|
14
|
+
// Strategy 1: Look for shape with matching name (cNvPr name attribute)
|
|
15
|
+
const shapeNamePattern = new RegExp(
|
|
16
|
+
`<p:cNvPr[^>]*name="${chartId}"[^>]*>(?:.*?)<c:chart[^>]*r:id="(rId\\d+)"`,
|
|
17
|
+
's'
|
|
18
|
+
);
|
|
19
|
+
const rIdMatch = shapeNamePattern.exec(slideXml);
|
|
20
|
+
if (rIdMatch) {
|
|
21
|
+
return { rId: rIdMatch[1] };
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Strategy 2: Find all chart graphicFrames and we will match later in manager
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Parses the chart XML to extract series and categories for validation.
|
|
30
|
+
*
|
|
31
|
+
* @param {string} xml
|
|
32
|
+
* @returns {Object} Data about the chart configuration
|
|
33
|
+
*/
|
|
34
|
+
static parseChartData(xml) {
|
|
35
|
+
// This could be used for validation and extracting current chart cache
|
|
36
|
+
const ptCountMatch = xml.match(/<c:ptCount val="(\d+)"\/>/);
|
|
37
|
+
const pointCount = ptCountMatch ? parseInt(ptCountMatch[1], 10) : 0;
|
|
38
|
+
|
|
39
|
+
return {
|
|
40
|
+
pointCount
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { REL_TYPES } from '../RelationshipManager.js';
|
|
2
|
+
|
|
3
|
+
export class ChartRelationshipManager {
|
|
4
|
+
/**
|
|
5
|
+
* Validates and fixes chart relationships.
|
|
6
|
+
*
|
|
7
|
+
* @param {RelationshipManager} relationshipManager
|
|
8
|
+
* @param {ZipManager} zipManager
|
|
9
|
+
* @param {string} chartZipPath
|
|
10
|
+
* @returns {Object} validation issues
|
|
11
|
+
*/
|
|
12
|
+
static validateChartRelationships(relationshipManager, zipManager, chartZipPath) {
|
|
13
|
+
const issues = { errors: [], warnings: [] };
|
|
14
|
+
const rels = relationshipManager.getRelationships(chartZipPath);
|
|
15
|
+
|
|
16
|
+
let hasWorkbook = false;
|
|
17
|
+
for (const rel of rels) {
|
|
18
|
+
if (rel.type === REL_TYPES.PACKAGE) {
|
|
19
|
+
hasWorkbook = true;
|
|
20
|
+
const xlsxPath = relationshipManager.resolveTarget(chartZipPath, rel.target);
|
|
21
|
+
if (!zipManager.hasFile(xlsxPath)) {
|
|
22
|
+
issues.errors.push(`Embedded workbook missing: ${xlsxPath}`);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if (!hasWorkbook) {
|
|
28
|
+
issues.warnings.push(`Chart ${chartZipPath} has no embedded workbook relationship. Live editing in PowerPoint may fail.`);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return issues;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import JSZip from 'jszip';
|
|
2
|
+
import { createLogger } from '../../utils/logger.js';
|
|
3
|
+
|
|
4
|
+
const logger = createLogger('ChartWorkbookUpdater');
|
|
5
|
+
|
|
6
|
+
export class ChartWorkbookUpdater {
|
|
7
|
+
/**
|
|
8
|
+
* Updates the embedded Excel workbook for a chart.
|
|
9
|
+
*
|
|
10
|
+
* @param {Buffer|Uint8Array} workbookData - The raw XLSX buffer.
|
|
11
|
+
* @param {Object} data - Chart data (categories, series).
|
|
12
|
+
* @returns {Promise<Buffer>} - The updated XLSX buffer.
|
|
13
|
+
*/
|
|
14
|
+
static async updateWorkbook(workbookData, data) {
|
|
15
|
+
if (!workbookData) return null;
|
|
16
|
+
|
|
17
|
+
try {
|
|
18
|
+
const zip = await JSZip.loadAsync(workbookData);
|
|
19
|
+
|
|
20
|
+
// Look for sheet1.xml
|
|
21
|
+
const sheetPath = 'xl/worksheets/sheet1.xml';
|
|
22
|
+
if (!zip.file(sheetPath)) {
|
|
23
|
+
logger.warn('sheet1.xml not found in embedded workbook, trying to find first sheet');
|
|
24
|
+
// fallback to finding the first sheet
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const newSheetXml = this.#generateSheetXml(data);
|
|
28
|
+
zip.file(sheetPath, newSheetXml);
|
|
29
|
+
|
|
30
|
+
// Clean up any existing Excel tables, as our new sheet data might not align with them
|
|
31
|
+
const tableFiles = Object.keys(zip.files).filter(f => f.startsWith('xl/tables/'));
|
|
32
|
+
tableFiles.forEach(f => zip.remove(f));
|
|
33
|
+
|
|
34
|
+
const sheetRels = Object.keys(zip.files).filter(f => f.startsWith('xl/worksheets/_rels/'));
|
|
35
|
+
sheetRels.forEach(f => zip.remove(f));
|
|
36
|
+
|
|
37
|
+
const contentTypesFile = zip.file('[Content_Types].xml');
|
|
38
|
+
if (contentTypesFile) {
|
|
39
|
+
const contentTypesXml = await contentTypesFile.async('text');
|
|
40
|
+
const updatedContentTypes = contentTypesXml.replace(/<Override[^>]*PartName="\/xl\/tables\/[^"]*"[^>]*\/>/g, '');
|
|
41
|
+
zip.file('[Content_Types].xml', updatedContentTypes);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return await zip.generateAsync({
|
|
45
|
+
type: 'nodebuffer',
|
|
46
|
+
compression: 'DEFLATE',
|
|
47
|
+
compressionOptions: { level: 6 }
|
|
48
|
+
});
|
|
49
|
+
} catch (err) {
|
|
50
|
+
console.error('Failed to update embedded workbook', err);
|
|
51
|
+
logger.error('Failed to update embedded workbook', err);
|
|
52
|
+
return workbookData; // Return original if failed
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
static #generateSheetXml(data) {
|
|
57
|
+
const { categories = [], series = [] } = data;
|
|
58
|
+
|
|
59
|
+
// Column count = 1 (categories) + series.length
|
|
60
|
+
const numCols = 1 + series.length;
|
|
61
|
+
const numRows = 1 + categories.length; // Row 1 = headers
|
|
62
|
+
|
|
63
|
+
const lastColLetter = this.getColumnLetter(numCols - 1);
|
|
64
|
+
const dimensionRef = `A1:${lastColLetter}${numRows}`;
|
|
65
|
+
|
|
66
|
+
let sheetData = '<sheetData>';
|
|
67
|
+
|
|
68
|
+
// Row 1: Headers (empty cell A1, then series names)
|
|
69
|
+
sheetData += '<row r="1">';
|
|
70
|
+
sheetData += `<c r="A1" t="inlineStr"><is><t></t></is></c>`;
|
|
71
|
+
series.forEach((ser, i) => {
|
|
72
|
+
const colLetter = this.getColumnLetter(i + 1);
|
|
73
|
+
sheetData += `<c r="${colLetter}1" t="inlineStr"><is><t>${this.#escapeXml(ser.name || '')}</t></is></c>`;
|
|
74
|
+
});
|
|
75
|
+
sheetData += '</row>';
|
|
76
|
+
|
|
77
|
+
// Rows 2..N: Data (category name in A, then values)
|
|
78
|
+
categories.forEach((cat, rowIndex) => {
|
|
79
|
+
const r = rowIndex + 2; // +1 for 1-based, +1 for header row
|
|
80
|
+
sheetData += `<row r="${r}">`;
|
|
81
|
+
sheetData += `<c r="A${r}" t="inlineStr"><is><t>${this.#escapeXml(String(cat))}</t></is></c>`;
|
|
82
|
+
|
|
83
|
+
series.forEach((ser, colIndex) => {
|
|
84
|
+
const colLetter = this.getColumnLetter(colIndex + 1);
|
|
85
|
+
const val = ser.values && ser.values[rowIndex] !== undefined ? ser.values[rowIndex] : 0;
|
|
86
|
+
sheetData += `<c r="${colLetter}${r}"><v>${Number(val)}</v></c>`;
|
|
87
|
+
});
|
|
88
|
+
sheetData += '</row>';
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
sheetData += '</sheetData>';
|
|
92
|
+
|
|
93
|
+
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
94
|
+
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
|
95
|
+
<dimension ref="${dimensionRef}"/>
|
|
96
|
+
<sheetViews><sheetView workbookViewId="0"/></sheetViews>
|
|
97
|
+
<sheetFormatPr defaultRowHeight="15"/>
|
|
98
|
+
${sheetData}
|
|
99
|
+
</worksheet>`;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
static getColumnLetter(colIndex) {
|
|
103
|
+
let letter = '';
|
|
104
|
+
while (colIndex >= 0) {
|
|
105
|
+
letter = String.fromCharCode(65 + (colIndex % 26)) + letter;
|
|
106
|
+
colIndex = Math.floor(colIndex / 26) - 1;
|
|
107
|
+
}
|
|
108
|
+
return letter;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
static getFormulaRange(sheetName, startRow, startCol, endRow, endCol) {
|
|
112
|
+
const startLetter = this.getColumnLetter(startCol);
|
|
113
|
+
const endLetter = this.getColumnLetter(endCol);
|
|
114
|
+
return `${sheetName}!$${startLetter}$${startRow}:$${endLetter}$${endRow}`;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
static getFormulaSingleCell(sheetName, row, col) {
|
|
118
|
+
const letter = this.getColumnLetter(col);
|
|
119
|
+
return `${sheetName}!$${letter}$${row}`;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
static #escapeXml(str) {
|
|
123
|
+
return str
|
|
124
|
+
.replace(/&/g, '&')
|
|
125
|
+
.replace(/</g, '<')
|
|
126
|
+
.replace(/>/g, '>')
|
|
127
|
+
.replace(/"/g, '"')
|
|
128
|
+
.replace(/'/g, ''');
|
|
129
|
+
}
|
|
130
|
+
}
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview XMLParser - Unified XML parsing and serialization layer.
|
|
3
|
+
*
|
|
4
|
+
* Uses fast-xml-parser for high-performance XML → JS object conversion.
|
|
5
|
+
* Provides consistent, reusable parse/build methods used by all managers.
|
|
6
|
+
*
|
|
7
|
+
* Key design decisions:
|
|
8
|
+
* - Attributes are stored with '@_' prefix (fast-xml-parser convention)
|
|
9
|
+
* - Arrays are preserved for elements that can repeat (e.g., slides, rows)
|
|
10
|
+
* - Text content uses '#text' key
|
|
11
|
+
* - CDATA sections are preserved
|
|
12
|
+
* - XML namespaces (a:, p:, r:, etc.) are preserved as-is
|
|
13
|
+
*
|
|
14
|
+
* OpenXML namespace prefixes you'll see:
|
|
15
|
+
* a: — DrawingML (shapes, fonts, colors)
|
|
16
|
+
* p: — PresentationML (slides, layouts, masters)
|
|
17
|
+
* r: — Relationships (rId references)
|
|
18
|
+
* c: — ChartML (chart data)
|
|
19
|
+
* w: — WordprocessingML (not typically in PPTX)
|
|
20
|
+
* mc: — Markup Compatibility
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { XMLParser as FastXMLParser, XMLBuilder } from 'fast-xml-parser';
|
|
24
|
+
import { createLogger } from '../utils/logger.js';
|
|
25
|
+
import { PPTXError } from '../utils/errors.js';
|
|
26
|
+
|
|
27
|
+
const logger = createLogger('XMLParser');
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Parser configuration for fast-xml-parser.
|
|
31
|
+
* These settings ensure lossless round-trip XML parsing.
|
|
32
|
+
*/
|
|
33
|
+
const PARSER_OPTIONS = {
|
|
34
|
+
ignoreAttributes: false,
|
|
35
|
+
ignoreDeclaration: true,
|
|
36
|
+
attributeNamePrefix: '@_',
|
|
37
|
+
allowBooleanAttributes: true,
|
|
38
|
+
parseAttributeValue: false, // Keep all values as strings to avoid type coercion
|
|
39
|
+
parseTagValue: false,
|
|
40
|
+
cdataPropName: '__cdata',
|
|
41
|
+
commentPropName: '__comment',
|
|
42
|
+
preserveOrder: false,
|
|
43
|
+
trimValues: false,
|
|
44
|
+
processEntities: true,
|
|
45
|
+
htmlEntities: false,
|
|
46
|
+
isArray: (name, jpath) => {
|
|
47
|
+
// Elements that should ALWAYS be arrays (even when there's only one)
|
|
48
|
+
const alwaysArrayPaths = [
|
|
49
|
+
'p:sld.p:cSld.p:spTree.p:sp',
|
|
50
|
+
'p:sld.p:cSld.p:spTree.p:pic',
|
|
51
|
+
'p:sld.p:cSld.p:spTree.p:graphicFrame',
|
|
52
|
+
'p:sld.p:cSld.p:spTree.p:grpSp',
|
|
53
|
+
'p:sldMaster.p:sldLayoutIdLst.p:sldLayoutId',
|
|
54
|
+
'p:presentation.p:sldMasterIdLst.p:sldMasterId',
|
|
55
|
+
'p:presentation.p:sldIdLst.p:sldId',
|
|
56
|
+
'a:tbl.a:tr',
|
|
57
|
+
'a:tr.a:tc',
|
|
58
|
+
'c:ser',
|
|
59
|
+
'c:pt',
|
|
60
|
+
'c:cat.c:strRef.c:strCache.c:pt',
|
|
61
|
+
'c:val.c:numRef.c:numCache.c:pt',
|
|
62
|
+
'p:sp',
|
|
63
|
+
'p:pic',
|
|
64
|
+
'a:r', // text runs
|
|
65
|
+
'Relationship',
|
|
66
|
+
'Override',
|
|
67
|
+
'Default',
|
|
68
|
+
'p14:sldId',
|
|
69
|
+
'p14:section',
|
|
70
|
+
];
|
|
71
|
+
return alwaysArrayPaths.some(path => jpath.endsWith(path) || name === path.split('.').pop());
|
|
72
|
+
},
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Builder configuration for XMLBuilder.
|
|
77
|
+
* Must match the parser configuration for correct round-trip.
|
|
78
|
+
*/
|
|
79
|
+
const BUILDER_OPTIONS = {
|
|
80
|
+
ignoreAttributes: false,
|
|
81
|
+
ignoreDeclaration: true,
|
|
82
|
+
attributeNamePrefix: '@_',
|
|
83
|
+
cdataPropName: '__cdata',
|
|
84
|
+
commentPropName: '__comment',
|
|
85
|
+
suppressEmptyNode: false,
|
|
86
|
+
format: false, // No extra whitespace — PPTX is sensitive to whitespace in some cases
|
|
87
|
+
processEntities: true,
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* @class XMLParser
|
|
92
|
+
* @description Provides XML parsing and serialization with OpenXML-aware configuration.
|
|
93
|
+
*/
|
|
94
|
+
export class XMLParser {
|
|
95
|
+
/**
|
|
96
|
+
* @private
|
|
97
|
+
* @type {FastXMLParser}
|
|
98
|
+
*/
|
|
99
|
+
#parser;
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* @private
|
|
103
|
+
* @type {XMLBuilder}
|
|
104
|
+
*/
|
|
105
|
+
#builder;
|
|
106
|
+
|
|
107
|
+
constructor() {
|
|
108
|
+
this.#parser = new FastXMLParser(PARSER_OPTIONS);
|
|
109
|
+
this.#builder = new XMLBuilder(BUILDER_OPTIONS);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Parses an XML string into a JavaScript object.
|
|
114
|
+
* The resulting object preserves all attributes, namespaces, and structure.
|
|
115
|
+
*
|
|
116
|
+
* @param {string} xmlString - Raw XML content.
|
|
117
|
+
* @param {string} [context] - Optional context description for error messages.
|
|
118
|
+
* @returns {Object} Parsed JavaScript object.
|
|
119
|
+
* @throws {PPTXError} If XML is malformed.
|
|
120
|
+
*
|
|
121
|
+
* @example
|
|
122
|
+
* const obj = parser.parse('<p:sp><p:nvSpPr>...</p:nvSpPr></p:sp>');
|
|
123
|
+
*/
|
|
124
|
+
parse(xmlString, context = '') {
|
|
125
|
+
if (!xmlString || typeof xmlString !== 'string') {
|
|
126
|
+
throw new PPTXError(`Invalid XML input${context ? ` (${context})` : ''}`);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
try {
|
|
130
|
+
return this.#parser.parse(xmlString);
|
|
131
|
+
} catch (err) {
|
|
132
|
+
throw new PPTXError(`XML parse error${context ? ` in ${context}` : ''}: ${err.message}`, err);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Serializes a JavaScript object back to an XML string.
|
|
138
|
+
*
|
|
139
|
+
* @param {Object} obj - JavaScript object (from parse() or manually constructed).
|
|
140
|
+
* @param {string} [xmlDeclaration] - Optional XML declaration to prepend.
|
|
141
|
+
* @returns {string} XML string.
|
|
142
|
+
*
|
|
143
|
+
* @example
|
|
144
|
+
* const xml = parser.build(obj, '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>');
|
|
145
|
+
*/
|
|
146
|
+
build(obj, xmlDeclaration = '') {
|
|
147
|
+
try {
|
|
148
|
+
const xml = this.#builder.build(obj);
|
|
149
|
+
return xmlDeclaration ? `${xmlDeclaration}\n${xml}` : xml;
|
|
150
|
+
} catch (err) {
|
|
151
|
+
throw new PPTXError(`XML build error: ${err.message}`, err);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Extracts the XML declaration line from an XML string.
|
|
157
|
+
*
|
|
158
|
+
* @param {string} xmlString - Raw XML string.
|
|
159
|
+
* @returns {string} Declaration line or empty string.
|
|
160
|
+
*/
|
|
161
|
+
extractDeclaration(xmlString) {
|
|
162
|
+
const match = xmlString.match(/^<\?xml[^>]+\?>/);
|
|
163
|
+
return match ? match[0] : '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>';
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Performs a deep clone of a parsed XML object.
|
|
168
|
+
* Used when copying slides to avoid shared object references.
|
|
169
|
+
*
|
|
170
|
+
* @param {Object} obj - Object to clone.
|
|
171
|
+
* @returns {Object} Deep clone.
|
|
172
|
+
*/
|
|
173
|
+
deepClone(obj) {
|
|
174
|
+
return JSON.parse(JSON.stringify(obj));
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Finds all nodes matching a key path in a parsed XML object.
|
|
179
|
+
* Uses a simple dot-notation path (e.g., 'p:sp.p:txBody.a:p.a:r').
|
|
180
|
+
*
|
|
181
|
+
* @param {Object} obj - Root object to search.
|
|
182
|
+
* @param {string} path - Dot-separated key path.
|
|
183
|
+
* @returns {Array} Array of matching nodes.
|
|
184
|
+
*
|
|
185
|
+
* @example
|
|
186
|
+
* const runs = parser.findAll(slideObj, 'p:cSld.p:spTree.p:sp.p:txBody.a:p.a:r');
|
|
187
|
+
*/
|
|
188
|
+
findAll(obj, path) {
|
|
189
|
+
const keys = path.split('.');
|
|
190
|
+
let current = [obj];
|
|
191
|
+
|
|
192
|
+
for (const key of keys) {
|
|
193
|
+
const next = [];
|
|
194
|
+
for (const node of current) {
|
|
195
|
+
if (node && typeof node === 'object') {
|
|
196
|
+
const val = node[key];
|
|
197
|
+
if (Array.isArray(val)) {
|
|
198
|
+
next.push(...val);
|
|
199
|
+
} else if (val !== undefined) {
|
|
200
|
+
next.push(val);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
current = next;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return current;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Gets a single node by path (returns first match).
|
|
212
|
+
*
|
|
213
|
+
* @param {Object} obj - Root object to search.
|
|
214
|
+
* @param {string} path - Dot-separated key path.
|
|
215
|
+
* @returns {*} First matching node or undefined.
|
|
216
|
+
*/
|
|
217
|
+
getNode(obj, path) {
|
|
218
|
+
return this.findAll(obj, path)[0];
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Sets a value at a dot-notation path in an object, creating intermediate
|
|
223
|
+
* objects as needed.
|
|
224
|
+
*
|
|
225
|
+
* @param {Object} obj - Root object.
|
|
226
|
+
* @param {string} path - Dot-separated key path.
|
|
227
|
+
* @param {*} value - Value to set.
|
|
228
|
+
*/
|
|
229
|
+
setNode(obj, path, value) {
|
|
230
|
+
const keys = path.split('.');
|
|
231
|
+
let current = obj;
|
|
232
|
+
|
|
233
|
+
for (let i = 0; i < keys.length - 1; i++) {
|
|
234
|
+
const key = keys[i];
|
|
235
|
+
if (!current[key] || typeof current[key] !== 'object') {
|
|
236
|
+
current[key] = {};
|
|
237
|
+
}
|
|
238
|
+
current = Array.isArray(current[key]) ? current[key][0] : current[key];
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
current[keys[keys.length - 1]] = value;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Performs a string replacement directly on the raw XML string.
|
|
246
|
+
* Faster than parse → modify → build for simple text replacements.
|
|
247
|
+
*
|
|
248
|
+
* @param {string} xmlString - Raw XML.
|
|
249
|
+
* @param {string} search - Substring to find.
|
|
250
|
+
* @param {string} replace - Replacement string.
|
|
251
|
+
* @param {boolean} [all=true] - Replace all occurrences or just first.
|
|
252
|
+
* @returns {string} Modified XML string.
|
|
253
|
+
*/
|
|
254
|
+
replaceInXml(xmlString, search, replace, all = true) {
|
|
255
|
+
const escaped = search.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
256
|
+
const flags = all ? 'g' : '';
|
|
257
|
+
return xmlString.replace(new RegExp(escaped, flags), replace);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
/**
|
|
261
|
+
* Extracts all text content from a slide XML string.
|
|
262
|
+
* Useful for debugging or searching slide content.
|
|
263
|
+
*
|
|
264
|
+
* @param {string} xmlString - Slide XML content.
|
|
265
|
+
* @returns {string[]} Array of text strings found in the slide.
|
|
266
|
+
*/
|
|
267
|
+
extractTextContent(xmlString) {
|
|
268
|
+
const texts = [];
|
|
269
|
+
const textPattern = /<a:t>([^<]*)<\/a:t>/g;
|
|
270
|
+
let match;
|
|
271
|
+
while ((match = textPattern.exec(xmlString)) !== null) {
|
|
272
|
+
if (match[1].trim()) texts.push(match[1]);
|
|
273
|
+
}
|
|
274
|
+
return texts;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Validates that an XML string is well-formed.
|
|
279
|
+
*
|
|
280
|
+
* @param {string} xmlString - XML to validate.
|
|
281
|
+
* @returns {{ valid: boolean, error: string|null }}
|
|
282
|
+
*/
|
|
283
|
+
validate(xmlString) {
|
|
284
|
+
try {
|
|
285
|
+
this.parse(xmlString);
|
|
286
|
+
return { valid: true, error: null };
|
|
287
|
+
} catch (err) {
|
|
288
|
+
return { valid: false, error: err.message };
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
}
|