stegdoc 4.0.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,150 +1,160 @@
1
- const fs = require('fs');
2
- const path = require('path');
3
- const { parseMetadata } = require('./metadata');
4
- const { parseFilename } = require('./utils');
5
-
6
- /**
7
- * Extract content and metadata based on format
8
- * @param {object} readResult - Result from readFile
9
- * @param {string} format - File format ('xlsx' or 'docx')
10
- * @returns {object} { encryptedContent, encryptionMeta, metadata }
11
- */
12
- function extractContent(readResult, format) {
13
- if (format === 'xlsx') {
14
- return {
15
- encryptedContent: readResult.base64Content,
16
- encryptionMeta: readResult.encryptionMeta,
17
- metadata: parseMetadata(readResult.metadata),
18
- };
19
- } else {
20
- // DOCX: encryption meta is embedded in content with ||| separator
21
- const { base64Content, metadata } = readResult;
22
-
23
- // Check if this is a v2+ encrypted file
24
- if (base64Content.includes('|||')) {
25
- const [encryptionMeta, encryptedContent] = base64Content.split('|||');
26
- return {
27
- encryptedContent,
28
- encryptionMeta,
29
- metadata,
30
- };
31
- }
32
-
33
- // Legacy unencrypted DOCX
34
- return {
35
- encryptedContent: base64Content,
36
- encryptionMeta: null,
37
- metadata,
38
- };
39
- }
40
- }
41
-
42
- /**
43
- * Find all parts of a multi-part file in a directory
44
- * @param {string} dirPath - Directory to search
45
- * @param {string} hash - Original hash from metadata
46
- * @param {string} format - File format ('xlsx' or 'docx')
47
- * @param {number} [expectedParts] - Expected total parts (optional, for validation)
48
- * @returns {Array<{path: string, partNumber: number, filename: string}>} Array of parts sorted by part number
49
- */
50
- function findMultiPartFiles(dirPath, hash, format, expectedParts = null) {
51
- const files = fs.readdirSync(dirPath);
52
- const parts = [];
53
- const ext = format === 'docx' ? '.docx' : '.xlsx';
54
-
55
- // Also support legacy hex filenames for backward compatibility
56
- const legacyBaseHash = hash.length >= 16 ? hash.slice(0, 16) : hash;
57
-
58
- for (const file of files) {
59
- if (!file.toLowerCase().endsWith(ext)) continue;
60
-
61
- const parsed = parseFilename(file);
62
- if (!parsed || parsed.partNumber === null) continue;
63
-
64
- // Match by new realistic filename pattern
65
- // Check if file matches the expected pattern (same reportId from hash)
66
- if (parsed.reportId) {
67
- // New realistic format - match by reportId (last 4 chars of hash)
68
- // This is deterministic and doesn't depend on current date
69
- const expectedReportId = hash.slice(-4).toUpperCase();
70
- if (parsed.reportId === expectedReportId) {
71
- parts.push({
72
- path: path.join(dirPath, file),
73
- partNumber: parsed.partNumber,
74
- filename: file,
75
- dateStr: parsed.dateStr,
76
- timeStr: parsed.timeStr,
77
- });
78
- }
79
- } else if (parsed.baseHash === legacyBaseHash) {
80
- // Legacy hex format - match by base hash
81
- parts.push({
82
- path: path.join(dirPath, file),
83
- partNumber: parsed.partNumber,
84
- filename: file,
85
- });
86
- }
87
- }
88
-
89
- // Sort by part number
90
- parts.sort((a, b) => a.partNumber - b.partNumber);
91
-
92
- // For realistic filenames, ensure all parts have the same date/time pattern
93
- // This handles edge cases where multiple file sets might share the same reportId
94
- if (parts.length > 0 && parts[0].dateStr) {
95
- const refDateStr = parts[0].dateStr;
96
- const refTimeStr = parts[0].timeStr;
97
- const filteredParts = parts.filter(
98
- (p) => p.dateStr === refDateStr && p.timeStr === refTimeStr
99
- );
100
- // If filtering removed some parts, use the filtered set
101
- if (filteredParts.length !== parts.length) {
102
- parts.length = 0;
103
- parts.push(...filteredParts);
104
- }
105
- }
106
-
107
- // Validate sequential parts if expectedParts is provided
108
- if (expectedParts !== null && parts.length === expectedParts) {
109
- for (let i = 0; i < expectedParts; i++) {
110
- if (parts[i].partNumber !== i + 1) {
111
- throw new Error(`Missing part ${i + 1}. Parts must be sequential.`);
112
- }
113
- }
114
- }
115
-
116
- return parts;
117
- }
118
-
119
- /**
120
- * Check if a directory is writable
121
- * @param {string} dirPath - Directory path to check
122
- * @returns {boolean} True if writable
123
- */
124
- function isDirectoryWritable(dirPath) {
125
- try {
126
- if (!fs.existsSync(dirPath)) {
127
- fs.mkdirSync(dirPath, { recursive: true });
128
- }
129
- fs.accessSync(dirPath, fs.constants.W_OK);
130
- return true;
131
- } catch {
132
- return false;
133
- }
134
- }
135
-
136
- /**
137
- * Merge base64 chunks back into a single string
138
- * @param {Array<string>} chunks - Array of base64 chunks
139
- * @returns {string} Merged base64 string
140
- */
141
- function mergeBase64Chunks(chunks) {
142
- return chunks.join('');
143
- }
144
-
145
- module.exports = {
146
- extractContent,
147
- findMultiPartFiles,
148
- isDirectoryWritable,
149
- mergeBase64Chunks,
150
- };
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const { parseMetadata } = require('./metadata');
4
+ const { parseFilename } = require('./utils');
5
+
6
+ /**
7
+ * Extract content and metadata based on format
8
+ * @param {object} readResult - Result from readFile
9
+ * @param {string} format - File format ('xlsx' or 'docx')
10
+ * @returns {object} { encryptedContent, encryptionMeta, metadata }
11
+ */
12
+ function extractContent(readResult, format) {
13
+ // v5 log-embed format returns metadata already parsed
14
+ if (readResult.formatVersion === 'v5') {
15
+ return {
16
+ encryptedContent: null, // v5 uses payloadBuffer instead
17
+ encryptionMeta: readResult.encryptionMeta,
18
+ metadata: readResult.metadata,
19
+ payloadBuffer: readResult.payloadBuffer,
20
+ };
21
+ }
22
+
23
+ if (format === 'xlsx') {
24
+ return {
25
+ encryptedContent: readResult.base64Content,
26
+ encryptionMeta: readResult.encryptionMeta,
27
+ metadata: parseMetadata(readResult.metadata),
28
+ };
29
+ } else {
30
+ // DOCX: encryption meta is embedded in content with ||| separator
31
+ const { base64Content, metadata } = readResult;
32
+
33
+ // Check if this is a v2+ encrypted file
34
+ if (base64Content.includes('|||')) {
35
+ const [encryptionMeta, encryptedContent] = base64Content.split('|||');
36
+ return {
37
+ encryptedContent,
38
+ encryptionMeta,
39
+ metadata,
40
+ };
41
+ }
42
+
43
+ // Legacy unencrypted DOCX
44
+ return {
45
+ encryptedContent: base64Content,
46
+ encryptionMeta: null,
47
+ metadata,
48
+ };
49
+ }
50
+ }
51
+
52
+ /**
53
+ * Find all parts of a multi-part file in a directory
54
+ * @param {string} dirPath - Directory to search
55
+ * @param {string} hash - Original hash from metadata
56
+ * @param {string} format - File format ('xlsx' or 'docx')
57
+ * @param {number} [expectedParts] - Expected total parts (optional, for validation)
58
+ * @returns {Array<{path: string, partNumber: number, filename: string}>} Array of parts sorted by part number
59
+ */
60
+ function findMultiPartFiles(dirPath, hash, format, expectedParts = null) {
61
+ const files = fs.readdirSync(dirPath);
62
+ const parts = [];
63
+ const ext = format === 'docx' ? '.docx' : '.xlsx';
64
+
65
+ // Also support legacy hex filenames for backward compatibility
66
+ const legacyBaseHash = hash.length >= 16 ? hash.slice(0, 16) : hash;
67
+
68
+ for (const file of files) {
69
+ if (!file.toLowerCase().endsWith(ext)) continue;
70
+
71
+ const parsed = parseFilename(file);
72
+ if (!parsed || parsed.partNumber === null) continue;
73
+
74
+ // Match by new realistic filename pattern
75
+ // Check if file matches the expected pattern (same reportId from hash)
76
+ if (parsed.reportId) {
77
+ // New realistic format - match by reportId (last 4 chars of hash)
78
+ // This is deterministic and doesn't depend on current date
79
+ const expectedReportId = hash.slice(-4).toUpperCase();
80
+ if (parsed.reportId === expectedReportId) {
81
+ parts.push({
82
+ path: path.join(dirPath, file),
83
+ partNumber: parsed.partNumber,
84
+ filename: file,
85
+ dateStr: parsed.dateStr,
86
+ timeStr: parsed.timeStr,
87
+ });
88
+ }
89
+ } else if (parsed.baseHash === legacyBaseHash) {
90
+ // Legacy hex format - match by base hash
91
+ parts.push({
92
+ path: path.join(dirPath, file),
93
+ partNumber: parsed.partNumber,
94
+ filename: file,
95
+ });
96
+ }
97
+ }
98
+
99
+ // Sort by part number
100
+ parts.sort((a, b) => a.partNumber - b.partNumber);
101
+
102
+ // For realistic filenames, ensure all parts have the same date/time pattern
103
+ // This handles edge cases where multiple file sets might share the same reportId
104
+ if (parts.length > 0 && parts[0].dateStr) {
105
+ const refDateStr = parts[0].dateStr;
106
+ const refTimeStr = parts[0].timeStr;
107
+ const filteredParts = parts.filter(
108
+ (p) => p.dateStr === refDateStr && p.timeStr === refTimeStr
109
+ );
110
+ // If filtering removed some parts, use the filtered set
111
+ if (filteredParts.length !== parts.length) {
112
+ parts.length = 0;
113
+ parts.push(...filteredParts);
114
+ }
115
+ }
116
+
117
+ // Validate sequential parts if expectedParts is provided
118
+ if (expectedParts !== null && parts.length === expectedParts) {
119
+ for (let i = 0; i < expectedParts; i++) {
120
+ if (parts[i].partNumber !== i + 1) {
121
+ throw new Error(`Missing part ${i + 1}. Parts must be sequential.`);
122
+ }
123
+ }
124
+ }
125
+
126
+ return parts;
127
+ }
128
+
129
+ /**
130
+ * Check if a directory is writable
131
+ * @param {string} dirPath - Directory path to check
132
+ * @returns {boolean} True if writable
133
+ */
134
+ function isDirectoryWritable(dirPath) {
135
+ try {
136
+ if (!fs.existsSync(dirPath)) {
137
+ fs.mkdirSync(dirPath, { recursive: true });
138
+ }
139
+ fs.accessSync(dirPath, fs.constants.W_OK);
140
+ return true;
141
+ } catch {
142
+ return false;
143
+ }
144
+ }
145
+
146
+ /**
147
+ * Merge base64 chunks back into a single string
148
+ * @param {Array<string>} chunks - Array of base64 chunks
149
+ * @returns {string} Merged base64 string
150
+ */
151
+ function mergeBase64Chunks(chunks) {
152
+ return chunks.join('');
153
+ }
154
+
155
+ module.exports = {
156
+ extractContent,
157
+ findMultiPartFiles,
158
+ isDirectoryWritable,
159
+ mergeBase64Chunks,
160
+ };