npm - stegdoc - Versions diffs - 4.0.0 → 5.0.1 - Mend

stegdoc 4.0.0 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/LICENSE +21 -21
package/README.md +200 -214
package/package.json +59 -59
package/src/commands/decode.js +485 -343
package/src/commands/encode.js +567 -449
package/src/commands/info.js +118 -114
package/src/commands/verify.js +207 -204
package/src/index.js +89 -87
package/src/lib/compression.js +177 -115
package/src/lib/crypto.js +172 -172
package/src/lib/decoy-generator.js +306 -306
package/src/lib/docx-handler.js +587 -161
package/src/lib/docx-templates.js +355 -0
package/src/lib/file-handler.js +113 -113
package/src/lib/file-utils.js +160 -150
package/src/lib/interactive.js +190 -190
package/src/lib/log-generator.js +764 -0
package/src/lib/metadata.js +151 -122
package/src/lib/streams.js +197 -197
package/src/lib/utils.js +227 -227
package/src/lib/xlsx-handler.js +597 -416
package/src/lib/xml-utils.js +115 -115

package/src/lib/docx-handler.js CHANGED Viewed

@@ -1,161 +1,587 @@
-const { Document, Paragraph, TextRun, Packer } = require('docx');
-const fs = require('fs');
-const path = require('path');
-const { serializeMetadata, parseMetadata } = require('./metadata');
-const { parseXmlFromZip, ensureArray, extractTextContent } = require('./xml-utils');
-/**
- * Create a DOCX file with base64 content and metadata
- * @param {object} options - Options for creating the DOCX
- * @param {string} options.base64Content - Base64 content to store
- * @param {object} options.metadata - Metadata object
- * @param {string} options.outputPath - Output file path
- * @returns {Promise<string>} Path to created file
- */
-async function createDocxWithBase64(options) {
-  const { base64Content, metadata, outputPath } = options;
-  // Serialize metadata to JSON string
-  const metadataStr = serializeMetadata(metadata);
-  // Create document with metadata in custom properties and hidden paragraph
-  const doc = new Document({
-    sections: [
-      {
-        properties: {},
-        children: [
-          // Metadata paragraph (hidden for user, but readable programmatically)
-          new Paragraph({
-            children: [
-              new TextRun({
-                text: `WHITENER_METADATA:${metadataStr}`,
-                size: 1, // Very small font
-              }),
-            ],
-          }),
-          // Separator
-          new Paragraph({
-            children: [
-              new TextRun({
-                text: '---',
-                break: 1,
-              }),
-            ],
-          }),
-          // Base64 content
-          new Paragraph({
-            children: [
-              new TextRun({
-                text: base64Content,
-                font: 'Courier New', // Monospace for base64
-                size: 16, // 8pt font
-              }),
-            ],
-          }),
-        ],
-      },
-    ],
-  });
-  // Generate DOCX file
-  const buffer = await Packer.toBuffer(doc);
-  // Ensure output directory exists
-  const outputDir = path.dirname(outputPath);
-  if (!fs.existsSync(outputDir)) {
-    fs.mkdirSync(outputDir, { recursive: true });
-  }
-  // Write to file
-  fs.writeFileSync(outputPath, buffer);
-  return outputPath;
-}
-/**
- * Read a DOCX file and extract base64 content and metadata
- * Uses namespace-agnostic XML parsing to handle w:, ns0:, ns1:, etc.
- * @param {string} docxPath - Path to DOCX file
- * @returns {Promise<object>} Object containing base64Content and metadata
- */
-async function readDocxBase64(docxPath) {
-  if (!fs.existsSync(docxPath)) {
-    throw new Error(`DOCX file not found: ${docxPath}`);
-  }
-  try {
-    // Parse document.xml with namespace-agnostic parser
-    const docParsed = parseXmlFromZip(docxPath, 'word/document.xml');
-    if (!docParsed) {
-      throw new Error('Could not find document.xml in DOCX file');
-    }
-    // Extract all text from the document
-    // Structure: document > body > p[] > r[] > t
-    const fullText = extractAllText(docParsed);
-    // Parse the extracted text
-    const metadataMarker = 'WHITENER_METADATA:';
-    const metadataStart = fullText.indexOf(metadataMarker);
-    if (metadataStart === -1) {
-      throw new Error('No metadata found in DOCX file. This may not be a stegdoc-encoded file.');
-    }
-    // Find the separator "---" which comes after the metadata
-    const separatorIndex = fullText.indexOf('---', metadataStart);
-    if (separatorIndex === -1) {
-      throw new Error('Invalid file format: separator not found');
-    }
-    // Extract metadata JSON between marker and separator
-    const metadataStr = fullText.substring(metadataStart + metadataMarker.length, separatorIndex).trim();
-    const metadata = parseMetadata(metadataStr);
-    // Extract base64 content (everything after the separator)
-    const base64Content = fullText.substring(separatorIndex + 3).trim();
-    return {
-      base64Content,
-      metadata,
-    };
-  } catch (error) {
-    throw new Error(`Failed to read DOCX file: ${error.message}`);
-  }
-}
-/**
- * Extract all text content from parsed DOCX document
- * @param {object} docParsed - Parsed document.xml
- * @returns {string} Concatenated text content
- */
-function extractAllText(docParsed) {
-  let fullText = '';
-  // Navigate: document > body > p (paragraphs)
-  const body = docParsed?.document?.body;
-  if (!body) return fullText;
-  const paragraphs = ensureArray(body.p);
-  for (const para of paragraphs) {
-    // Each paragraph has r (runs) containing t (text)
-    const runs = ensureArray(para.r);
-    for (const run of runs) {
-      // Text can be in 't' property
-      if (run.t !== undefined) {
-        fullText += extractTextContent(run.t);
-      }
-    }
-  }
-  return fullText;
-}
-module.exports = {
-  createDocxWithBase64,
-  readDocxBase64,
-};
+const {
+  Document, Paragraph, TextRun, Packer, Table, TableRow, TableCell,
+  AlignmentType, HeadingLevel, WidthType, ShadingType, BorderStyle, PageBreak,
+} = require('docx');
+const fs = require('fs');
+const path = require('path');
+const { serializeMetadata, parseMetadata } = require('./metadata');
+const { parseXmlFromZip, ensureArray, extractTextContent } = require('./xml-utils');
+const { generateIncident, generateHebrewDate } = require('./docx-templates');
+const {
+  encodePayloadToLogLines, decodeLogLines, generateLogHeaders, resetTimeState,
+  BYTES_PER_DATA_LINE, calculateDataLineCount,
+} = require('./log-generator');
+// ─── Shared Styles ──────────────────────────────────────────────────────────
+const FONT_HEBREW = { ascii: 'Arial', cs: 'Arial', hAnsi: 'Arial', eastAsia: 'Arial' };
+const FONT_CODE = { ascii: 'Consolas', cs: 'Consolas', hAnsi: 'Consolas', eastAsia: 'Consolas' };
+const SIZE_BODY = '11pt';
+const SIZE_BODY_CS = '11pt';
+const SIZE_CODE = '8pt';
+const SIZE_SMALL = '9pt';
+/**
+ * Create a Hebrew RTL paragraph
+ */
+function heParagraph(text, opts = {}) {
+  const paragraphOpts = {
+    bidirectional: true,
+    spacing: opts.spacing || { after: 120, line: 276 },
+    ...opts.paragraphOpts,
+    children: [
+      new TextRun({
+        text,
+        rightToLeft: true,
+        font: FONT_HEBREW,
+        size: opts.size || SIZE_BODY,
+        sizeComplexScript: opts.size || SIZE_BODY_CS,
+        bold: opts.bold || false,
+        boldComplexScript: opts.bold || false,
+        color: opts.color,
+        ...opts.runOpts,
+      }),
+    ],
+  };
+  // Only set alignment if explicitly requested (e.g. CENTER)
+  // For RTL bidi paragraphs, omitting alignment lets Word use the natural RTL default (right-aligned)
+  if (opts.alignment) {
+    paragraphOpts.alignment = opts.alignment;
+  }
+  return new Paragraph(paragraphOpts);
+}
+/**
+ * Create a Hebrew heading
+ */
+function heHeading(text, level = HeadingLevel.HEADING_1, opts = {}) {
+  const sizes = {
+    [HeadingLevel.HEADING_1]: '16pt',
+    [HeadingLevel.HEADING_2]: '14pt',
+    [HeadingLevel.HEADING_3]: '12pt',
+  };
+  // Don't use heading: level — built-in heading styles override alignment.
+  // Instead, manually style to look like headings.
+  return new Paragraph({
+    bidirectional: true,
+    // No alignment — let bidi default handle it (natural RTL = right-aligned)
+    spacing: { before: 240, after: 120 },
+    ...opts.paragraphOpts,
+    children: [
+      new TextRun({
+        text,
+        rightToLeft: true,
+        font: FONT_HEBREW,
+        size: sizes[level] || '14pt',
+        sizeComplexScript: sizes[level] || '14pt',
+        bold: true,
+        boldComplexScript: true,
+        color: opts.color || '1F3864',
+        ...opts.runOpts,
+      }),
+    ],
+  });
+}
+/**
+ * Create an LTR code/log line paragraph (English monospace)
+ */
+function codeParagraph(text) {
+  return new Paragraph({
+    bidirectional: false,
+    alignment: AlignmentType.LEFT,
+    spacing: { after: 20, line: 240 },
+    shading: { type: ShadingType.SOLID, color: 'F2F2F2', fill: 'F2F2F2' },
+    indent: { left: 200, right: 200 },
+    children: [
+      new TextRun({
+        text,
+        rightToLeft: false,
+        font: FONT_CODE,
+        size: SIZE_CODE,
+        sizeComplexScript: SIZE_CODE,
+        color: '333333',
+      }),
+    ],
+  });
+}
+/**
+ * Create an empty paragraph (spacer)
+ */
+function spacer() {
+  return new Paragraph({ spacing: { after: 80 }, children: [] });
+}
+// ─── v5 Log-Embed DOCX ─────────────────────────────────────────────────────
+/**
+ * Create a v5 log-embed DOCX file — Hebrew incident report with embedded log lines.
+ *
+ * @param {object} options
+ * @param {Buffer} options.payloadBuffer - Encrypted binary payload
+ * @param {string} options.encryptionMeta - Packed encryption metadata or ''
+ * @param {string} options.metadataJson - Serialized metadata JSON string
+ * @param {string} options.outputPath - Output file path
+ * @param {string} options.hash - File hash for deterministic template selection
+ * @returns {Promise<string>} Path to created file
+ */
+async function createDocxV5(options) {
+  const { payloadBuffer, encryptionMeta, metadataJson, outputPath, hash } = options;
+  resetTimeState();
+  // Generate a unique report from hash + part number
+  const metadata = JSON.parse(metadataJson);
+  const partNum = metadata.partNumber || 1;
+  const incident = generateIncident(hash || 'default', partNum);
+  const dateStr = generateHebrewDate(hash || 'default');
+  // Generate log lines from payload
+  const { headerRows, dataRows, fillerRows } = encodePayloadToLogLines(
+    payloadBuffer, metadataJson, encryptionMeta
+  );
+  // Build document sections
+  const children = [];
+  // ── Title ──
+  children.push(heHeading(`דוח תקרית — ${incident.title}`, HeadingLevel.HEADING_1));
+  children.push(heParagraph(`תאריך: ${dateStr}`, { size: SIZE_SMALL, color: '666666' }));
+  children.push(heParagraph('מסווג: פנימי בלבד', { size: SIZE_SMALL, color: '666666' }));
+  children.push(spacer());
+  // ── Executive Summary ──
+  children.push(heHeading('תקציר מנהלים', HeadingLevel.HEADING_2));
+  children.push(heParagraph(incident.summary));
+  children.push(spacer());
+  // ── Timeline ──
+  children.push(heHeading('ציר זמן', HeadingLevel.HEADING_2));
+  const timelineTable = new Table({
+    visuallyRightToLeft: true,
+    width: { size: 100, type: WidthType.PERCENTAGE },
+    rows: [
+      // Header row
+      new TableRow({
+        tableHeader: true,
+        children: [
+          createHebrewCell('שעה', true, 20),
+          createHebrewCell('אירוע', true, 80),
+        ],
+      }),
+      // Data rows
+      ...incident.timeline.map(entry =>
+        new TableRow({
+          children: [
+            createHebrewCell(entry.time, false, 20),
+            createHebrewCell(entry.desc, false, 80),
+          ],
+        })
+      ),
+    ],
+  });
+  children.push(timelineTable);
+  children.push(spacer());
+  // ── Relevant Log Entries ──
+  children.push(heHeading('רשומות לוג רלוונטיות', HeadingLevel.HEADING_2));
+  children.push(heParagraph('להלן רשומות הלוג שאותרו כחלק מהחקירה. הרשומות סוננו מתוך מערכת הניטור ומכילות את הבקשות הרלוונטיות לתקרית:'));
+  children.push(spacer());
+  // Header log lines (metadata) — first batch
+  const headerLogLabel = `// Filtered logs — ${headerRows.length + dataRows.length + fillerRows.length} entries`;
+  children.push(codeParagraph(headerLogLabel));
+  for (const row of headerRows) {
+    children.push(codeParagraph(formatLogLine(row)));
+  }
+  // Data log lines (payload)
+  for (const row of dataRows) {
+    children.push(codeParagraph(formatLogLine(row)));
+  }
+  // Filler log lines
+  for (const row of fillerRows) {
+    children.push(codeParagraph(formatLogLine(row)));
+  }
+  children.push(spacer());
+  // ── Root Cause ──
+  children.push(heHeading('ניתוח שורש הבעיה', HeadingLevel.HEADING_2));
+  children.push(heParagraph(incident.rootCause));
+  children.push(spacer());
+  // ── Recommendations ──
+  children.push(heHeading('המלצות', HeadingLevel.HEADING_2));
+  for (let i = 0; i < incident.recommendations.length; i++) {
+    children.push(heParagraph(`${i + 1}. ${incident.recommendations[i]}`));
+  }
+  children.push(spacer());
+  // ── Footer ──
+  children.push(heParagraph('—— סוף הדוח ——', {
+    alignment: AlignmentType.CENTER,
+    size: SIZE_SMALL,
+    color: '999999',
+  }));
+  // Build document
+  const doc = new Document({
+    styles: {
+      default: {
+        document: {
+          run: {
+            rightToLeft: true,
+            font: FONT_HEBREW,
+            size: SIZE_BODY,
+            sizeComplexScript: SIZE_BODY_CS,
+            language: { bidirectional: 'he-IL' },
+          },
+          paragraph: {
+            // No alignment — bidi paragraphs default to right in RTL context
+            spacing: { line: 276 },
+          },
+        },
+      },
+    },
+    sections: [{
+      properties: {
+        page: {
+          size: { width: '21cm', height: '29.7cm' }, // A4
+          margin: {
+            top: '2.54cm',
+            bottom: '2.54cm',
+            left: '2.54cm',
+            right: '2.54cm',
+          },
+        },
+      },
+      children,
+    }],
+  });
+  const buffer = await Packer.toBuffer(doc);
+  const outputDir = path.dirname(outputPath);
+  if (!fs.existsSync(outputDir)) {
+    fs.mkdirSync(outputDir, { recursive: true });
+  }
+  fs.writeFileSync(outputPath, buffer);
+  return outputPath;
+}
+/**
+ * Format a log row array into a single log line string (nginx combined format).
+ */
+function formatLogLine(row) {
+  // row: [ip, timestamp, method, request, status, bytes, referer, ua, requestId, traceId]
+  return `${row[0]} - - ${row[1]} "${row[3]}" ${row[4]} ${row[5]} "${row[6]}" "${row[7]}" "${row[8]}" "${row[9]}"`;
+}
+/**
+ * Create a Hebrew table cell
+ */
+function createHebrewCell(text, isHeader, widthPct) {
+  return new TableCell({
+    width: { size: widthPct, type: WidthType.PERCENTAGE },
+    shading: isHeader ? { type: ShadingType.SOLID, color: '2F5496', fill: '2F5496' } : undefined,
+    children: [
+      new Paragraph({
+        bidirectional: true,
+        spacing: { before: 40, after: 40 },
+        children: [
+          new TextRun({
+            text,
+            rightToLeft: true,
+            font: FONT_HEBREW,
+            size: SIZE_SMALL,
+            sizeComplexScript: SIZE_SMALL,
+            bold: isHeader,
+            boldComplexScript: isHeader,
+            color: isHeader ? 'FFFFFF' : '333333',
+          }),
+        ],
+      }),
+    ],
+  });
+}
+// ─── v5 DOCX Reader ─────────────────────────────────────────────────────────
+/**
+ * Read a v5 log-embed DOCX file and extract payload.
+ * Scans for monospace paragraphs that contain log lines.
+ * @param {string} docxPath - Path to DOCX file
+ * @returns {object} { payloadBuffer, metadataJson, encryptionMeta, metadata, formatVersion }
+ */
+async function readDocxV5(docxPath) {
+  const docParsed = parseXmlFromZip(docxPath, 'word/document.xml');
+  if (!docParsed) {
+    throw new Error('Could not find document.xml in DOCX file');
+  }
+  // Also parse styles to identify monospace/code runs
+  const stylesParsed = parseXmlFromZip(docxPath, 'word/styles.xml');
+  // Extract all paragraphs with their formatting info
+  const body = docParsed?.document?.body;
+  if (!body) throw new Error('Empty document body');
+  const paragraphs = ensureArray(body.p);
+  const logLines = [];
+  for (const para of paragraphs) {
+    const runs = ensureArray(para.r);
+    let paraText = '';
+    let isCode = false;
+    for (const run of runs) {
+      if (run.t !== undefined) {
+        paraText += extractTextContent(run.t);
+      }
+      // Check if run uses monospace font (Consolas/Courier)
+      const rPr = run.rPr;
+      if (rPr) {
+        const fonts = rPr.rFonts;
+        if (fonts) {
+          const fontName = fonts['@_w:ascii'] || fonts['@_ascii'] || '';
+          if (/consolas|courier/i.test(fontName)) {
+            isCode = true;
+          }
+        }
+      }
+    }
+    // Also check paragraph-level shading as indicator of code block
+    const pPr = para.pPr;
+    if (pPr && pPr.shd) {
+      const fill = pPr.shd['@_w:fill'] || pPr.shd['@_fill'] || '';
+      if (fill === 'F2F2F2' || fill === 'f2f2f2') {
+        isCode = true;
+      }
+    }
+    // Collect code paragraphs that look like log lines
+    if (isCode && paraText.trim().length > 0) {
+      // Skip the comment line
+      if (paraText.startsWith('//')) continue;
+      logLines.push(paraText.trim());
+    }
+  }
+  if (logLines.length === 0) {
+    throw new Error('No log lines found in DOCX file. This may not be a v5 stegdoc file.');
+  }
+  // Parse log lines back into row arrays
+  const rows = logLines.map(parseLogLine);
+  // Decode using the same engine as XLSX
+  return decodeLogLines(rows);
+}
+/**
+ * Parse a formatted log line string back into a row array.
+ * Input format: `IP - - [timestamp] "request" status bytes "referer" "ua" "requestId" "traceId"`
+ */
+function parseLogLine(line) {
+  // Regex to parse nginx combined log format with extra fields
+  const regex = /^(\S+)\s+-\s+-\s+(\[[^\]]+\])\s+"([^"]+)"\s+(\d+)\s+(\d+)\s+"([^"]+)"\s+"([^"]+)"\s+"([^"]+)"\s+"([^"]+)"$/;
+  const match = line.match(regex);
+  if (!match) {
+    // Fallback: try to extract what we can
+    return ['', '', '', line, '', '', '', '', '', ''];
+  }
+  const [, ip, timestamp, request, status, bytes, referer, ua, requestId, traceId] = match;
+  // Extract method from request
+  const methodMatch = request.match(/^(\w+)\s/);
+  const method = methodMatch ? methodMatch[1] : '';
+  return [ip, timestamp, method, request, status, bytes, referer, ua, requestId, traceId];
+}
+/**
+ * Detect if a DOCX file is v5 (log-embed) format.
+ * Checks for the presence of log-formatted monospace content.
+ */
+function detectDocxVersion(docxPath) {
+  try {
+    const docParsed = parseXmlFromZip(docxPath, 'word/document.xml');
+    if (!docParsed) return 'legacy';
+    const body = docParsed?.document?.body;
+    if (!body) return 'legacy';
+    // Quick check: look for the STGD05 marker in raw text
+    const paragraphs = ensureArray(body.p);
+    for (const para of paragraphs) {
+      const runs = ensureArray(para.r);
+      for (const run of runs) {
+        const text = extractTextContent(run.t || '');
+        if (text.includes('/api/v1/health/')) {
+          return 'v5';
+        }
+      }
+    }
+    // Check for WHITENER_METADATA (legacy)
+    for (const para of paragraphs) {
+      const runs = ensureArray(para.r);
+      for (const run of runs) {
+        const text = extractTextContent(run.t || '');
+        if (text.includes('WHITENER_METADATA:')) {
+          return 'legacy';
+        }
+      }
+    }
+    return 'legacy';
+  } catch {
+    return 'legacy';
+  }
+}
+// ─── Legacy DOCX (v3/v4) ───────────────────────────────────────────────────
+/**
+ * Create a legacy DOCX file with base64 content (v3/v4 format)
+ */
+async function createDocxWithBase64(options) {
+  const { base64Content, metadata, outputPath } = options;
+  const metadataStr = serializeMetadata(metadata);
+  const doc = new Document({
+    sections: [{
+      properties: {},
+      children: [
+        new Paragraph({
+          children: [
+            new TextRun({
+              text: `WHITENER_METADATA:${metadataStr}`,
+              size: 1,
+            }),
+          ],
+        }),
+        new Paragraph({
+          children: [
+            new TextRun({ text: '---', break: 1 }),
+          ],
+        }),
+        new Paragraph({
+          children: [
+            new TextRun({
+              text: base64Content,
+              font: 'Courier New',
+              size: 16,
+            }),
+          ],
+        }),
+      ],
+    }],
+  });
+  const buffer = await Packer.toBuffer(doc);
+  const outputDir = path.dirname(outputPath);
+  if (!fs.existsSync(outputDir)) {
+    fs.mkdirSync(outputDir, { recursive: true });
+  }
+  fs.writeFileSync(outputPath, buffer);
+  return outputPath;
+}
+// ─── Unified Reader ─────────────────────────────────────────────────────────
+/**
+ * Read a DOCX file, auto-detecting v5 vs legacy format.
+ */
+async function readDocxBase64(docxPath) {
+  if (!fs.existsSync(docxPath)) {
+    throw new Error(`DOCX file not found: ${docxPath}`);
+  }
+  const version = detectDocxVersion(docxPath);
+  if (version === 'v5') {
+    const result = await readDocxV5(docxPath);
+    return {
+      ...result,
+      formatVersion: 'v5',
+    };
+  }
+  // Legacy path
+  try {
+    const docParsed = parseXmlFromZip(docxPath, 'word/document.xml');
+    if (!docParsed) {
+      throw new Error('Could not find document.xml in DOCX file');
+    }
+    const fullText = extractAllText(docParsed);
+    const metadataMarker = 'WHITENER_METADATA:';
+    const metadataStart = fullText.indexOf(metadataMarker);
+    if (metadataStart === -1) {
+      throw new Error('No metadata found in DOCX file. This may not be a stegdoc-encoded file.');
+    }
+    const separatorIndex = fullText.indexOf('---', metadataStart);
+    if (separatorIndex === -1) {
+      throw new Error('Invalid file format: separator not found');
+    }
+    const metadataStr = fullText.substring(metadataStart + metadataMarker.length, separatorIndex).trim();
+    const metadata = parseMetadata(metadataStr);
+    const base64Content = fullText.substring(separatorIndex + 3).trim();
+    return {
+      base64Content,
+      metadata,
+      formatVersion: 'legacy',
+    };
+  } catch (error) {
+    throw new Error(`Failed to read DOCX file: ${error.message}`);
+  }
+}
+/**
+ * Extract all text content from parsed DOCX document (legacy)
+ */
+function extractAllText(docParsed) {
+  let fullText = '';
+  const body = docParsed?.document?.body;
+  if (!body) return fullText;
+  const paragraphs = ensureArray(body.p);
+  for (const para of paragraphs) {
+    const runs = ensureArray(para.r);
+    for (const run of runs) {
+      if (run.t !== undefined) {
+        fullText += extractTextContent(run.t);
+      }
+    }
+  }
+  return fullText;
+}
+module.exports = {
+  // v5
+  createDocxV5,
+  readDocxV5,
+  detectDocxVersion,
+  // Legacy
+  createDocxWithBase64,
+  // Unified
+  readDocxBase64,
+};