stegdoc 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Shared XML parsing utilities for DOCX and XLSX handlers
3
+ * Handles namespace prefix variations (w:, ns0:, ns1:, etc.)
4
+ */
5
+
6
+ const AdmZip = require('adm-zip');
7
+ const { XMLParser } = require('fast-xml-parser');
8
+
9
+ // Reusable XML parser configured to strip namespace prefixes
10
+ const xmlParser = new XMLParser({
11
+ ignoreAttributes: false,
12
+ attributeNamePrefix: '@_',
13
+ removeNSPrefix: true, // Strips ns0:, ns1:, w:, etc.
14
+ });
15
+
16
+ /**
17
+ * Parse XML string with namespace-agnostic parser
18
+ * @param {string} xmlString - XML content to parse
19
+ * @returns {object} Parsed XML as JavaScript object
20
+ */
21
+ function parseXml(xmlString) {
22
+ return xmlParser.parse(xmlString);
23
+ }
24
+
25
+ /**
26
+ * Read and parse an XML file from a ZIP archive (DOCX/XLSX)
27
+ * @param {string|Buffer} zipPath - Path to ZIP file or Buffer
28
+ * @param {string} entryPath - Path to XML file within ZIP
29
+ * @returns {object|null} Parsed XML or null if entry not found
30
+ */
31
+ function parseXmlFromZip(zipPath, entryPath) {
32
+ const zip = new AdmZip(zipPath);
33
+ const entry = zip.getEntry(entryPath);
34
+
35
+ if (!entry) {
36
+ return null;
37
+ }
38
+
39
+ const xmlString = entry.getData().toString('utf8');
40
+ return parseXml(xmlString);
41
+ }
42
+
43
+ /**
44
+ * Get raw XML string from a ZIP archive
45
+ * @param {string|Buffer} zipPath - Path to ZIP file or Buffer
46
+ * @param {string} entryPath - Path to XML file within ZIP
47
+ * @returns {string|null} XML string or null if entry not found
48
+ */
49
+ function getXmlStringFromZip(zipPath, entryPath) {
50
+ const zip = new AdmZip(zipPath);
51
+ const entry = zip.getEntry(entryPath);
52
+
53
+ if (!entry) {
54
+ return null;
55
+ }
56
+
57
+ return entry.getData().toString('utf8');
58
+ }
59
+
60
+ /**
61
+ * List all entries in a ZIP file
62
+ * @param {string|Buffer} zipPath - Path to ZIP file or Buffer
63
+ * @returns {string[]} Array of entry paths
64
+ */
65
+ function listZipEntries(zipPath) {
66
+ const zip = new AdmZip(zipPath);
67
+ return zip.getEntries().map(e => e.entryName);
68
+ }
69
+
70
+ /**
71
+ * Ensure value is an array (handles single item vs array in XML parsing)
72
+ * @param {*} value - Value that might be an array or single item
73
+ * @returns {Array} Always returns an array
74
+ */
75
+ function ensureArray(value) {
76
+ if (value === undefined || value === null) return [];
77
+ return Array.isArray(value) ? value : [value];
78
+ }
79
+
80
+ /**
81
+ * Safely get nested property from object
82
+ * @param {object} obj - Object to traverse
83
+ * @param {string} path - Dot-separated path (e.g., 'worksheet.sheetData.row')
84
+ * @returns {*} Value at path or undefined
85
+ */
86
+ function getNestedValue(obj, path) {
87
+ return path.split('.').reduce((current, key) => {
88
+ return current && current[key] !== undefined ? current[key] : undefined;
89
+ }, obj);
90
+ }
91
+
92
+ /**
93
+ * Extract text content from a parsed XML text node
94
+ * Handles both simple strings and objects with #text
95
+ * @param {*} textNode - Text node from parsed XML
96
+ * @returns {string} Extracted text
97
+ */
98
+ function extractTextContent(textNode) {
99
+ if (typeof textNode === 'string') return textNode;
100
+ if (typeof textNode === 'number') return String(textNode);
101
+ if (textNode && typeof textNode === 'object') {
102
+ if (textNode['#text'] !== undefined) return String(textNode['#text']);
103
+ }
104
+ return '';
105
+ }
106
+
107
+ module.exports = {
108
+ parseXml,
109
+ parseXmlFromZip,
110
+ getXmlStringFromZip,
111
+ listZipEntries,
112
+ ensureArray,
113
+ getNestedValue,
114
+ extractTextContent,
115
+ };