npm - @modusoperandi/licit-import-utils - Versions diffs - 0.1.0 → 0.1.1 - Mend

@modusoperandi/licit-import-utils 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/LICENSE CHANGED Viewed

@@ -1,21 +1,21 @@
-MIT License
-Copyright (c) 2026 Modus Operandi Inc.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+MIT License
+Copyright (c) 2026 Modus Operandi Inc.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # licit-import-utils
2	+ This is a utility package for importing files like json or docx into Licit compatible documents.

package/index.d.ts CHANGED Viewed

@@ -4,5 +4,8 @@
  */
 export * from './types';
 export * from './licit-transform';
+export * from './preprocess.utils';
 export * from './transform.docx';
+export * from './transform.utils';
 export * from './transform.zip';
+export { LicitDocumentJSON, LicitElementJSON } from './licit-elements';

package/index.js CHANGED Viewed

@@ -4,5 +4,7 @@
  */
 export * from './types';
 export * from './licit-transform';
+export * from './preprocess.utils';
 export * from './transform.docx';
+export * from './transform.utils';
 export * from './transform.zip';

package/licit-transform.d.ts CHANGED Viewed

@@ -3,8 +3,6 @@
  * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
  */
 import type { LicitDocumentJSON } from './licit-elements';
-import { LicitBulletListElement, LicitDocumentElement, LicitEnhancedImageElement, LicitTableRowElement } from './licit-elements';
-import type { UpdatedCapco } from './capco.util';
 import type { MessageSink } from './types';
 export interface ParserElement {
     node: Element;
@@ -13,12 +11,6 @@ export interface ParserElement {
     level: number;
     subText: string;
 }
-interface ImageInfo {
-    src: string;
-    alt: string;
-    width: number;
-    height: number;
-}
 declare enum ParserElementType {
     ChapterTitle = 0,
     ChapterSubtitle = 1,
@@ -88,73 +80,63 @@ export interface AddCellOptions {
 }
 export declare class LicitConverter {
     private readonly config;
-    elementsParsedMap: Map<string, boolean>;
-    elements: ParserElement[];
+    private readonly elementsParsedMap;
+    private elements;
     constructor(config: TransformConfig);
     parseHTML(html: Document, isDoctorine: boolean, moDocType?: string): LicitDocumentJSON;
     parseFrameMakerHTML5(html: Element[]): LicitDocumentJSON;
-    render_FrameMakerHTML5_zip(nodes: NodeList, infoIconData?: HTMLOListElement[], _moDocType?: string, renderedContentList?: Node[]): LicitDocumentJSON;
-    render_FrameMakerHTML5_zip_SwitchHelper(e: ParserElement, infoIconData: HTMLOListElement[], renderedContentList: Node[], isNumberReseted: boolean, licitDocument: LicitDocumentElement): boolean;
+    private render_FrameMakerHTML5_zip;
+    private render_FrameMakerHTML5_zip_SwitchHelper;
     private handleNodes;
-    fetchRenderedContent(nodes: NodeList): Node[];
-    /**
-     * Returns a map elements which were parsed.
-     *
-     * @returns Map of elements
-     */
-    getElementsParsedMap(): Map<string, boolean>;
-    getCustomStyle(styleName: string): StyleInfo | undefined;
-    handleOrderedListItem(e: ParserElement, licitDocument: LicitDocumentElement): void;
+    private fetchRenderedContent;
+    private getCustomStyle;
+    private handleOrderedListItem;
     /**
      * Renders the HTML as a Licit JSON structure
      *
      * @returns The document as an `LicitDocumentJSON` object
      */
-    render(nodes: NodeListOf<Element>): LicitDocumentJSON;
-    renderSwitchHelper(e: ParserElement, licitDocument: LicitDocumentElement): void;
+    private render;
+    private renderSwitchHelper;
     private renderTable;
     private renderParagraph;
     private renderHeader;
     private buildElements;
-    checkChildNode(node: HTMLElement | Element, nextNode: HTMLElement | Element): number;
-    render_doc(nodes: NodeListOf<Element>, infoIconData: HTMLOListElement[] | undefined, moDocType: string): LicitDocumentJSON;
-    render_docSwitchHelper(e: ParserElement, licitDocument: LicitDocumentElement, tocRemoved: boolean, infoIconData: HTMLOListElement[], moDocType: string): boolean;
-    renderTypeParagraph(e: ParserElement, licitDocument: LicitDocumentElement, infoIconData?: HTMLOListElement[]): void;
-    handle_UrlText(text: string, licitDocument: LicitDocumentElement, infoIconData?: HTMLOListElement[]): void;
-    text_WithoutUrl(n: Node, licitDocument: LicitDocumentElement, infoIconData?: HTMLOListElement[]): void;
+    private checkChildNode;
+    private render_doc;
+    private render_docSwitchHelper;
+    private renderTypeParagraph;
+    private handle_UrlText;
+    private text_WithoutUrl;
     private handleNode;
-    mergeSpans(node: Element, nextNode: Element): number;
-    updateChildCapcoContent(e: ParserElement): void;
-    updateChildCapcoContentLoopHelper(childNodes: ChildNode[], res: UpdatedCapco): void;
-    processChildNodesCapco(childNodes: NodeListOf<ChildNode>): void;
-    updateCapcoToParagraph(child: ChildNode, res: UpdatedCapco): void;
-    processTableCapco(tableNode: HTMLTableElement): void;
-    figureTitleCase(e: ParserElement, licitDocument: LicitDocumentElement): void;
-    handleImageChild(child: Element, licitDocument: LicitDocumentElement): void;
-    renderNewFigureTitle(e: ParserElement, licitDocument: LicitDocumentElement): void;
-    figureParagraphCase(e: ParserElement, licitDocument: LicitDocumentElement, infoIconData: HTMLOListElement[] | undefined, renderedContentList: Node[]): void;
-    figureNoteCase(e: ParserElement, licitDocument: LicitDocumentElement): void;
-    figureTableTitleCase(e: ParserElement, licitDocument: LicitDocumentElement): void;
+    private mergeSpans;
+    private updateChildCapcoContent;
+    private updateChildCapcoContentLoopHelper;
+    private processChildNodesCapco;
+    private updateCapcoToParagraph;
+    private processTableCapco;
+    private figureTitleCase;
+    private handleImageChild;
+    private renderNewFigureTitle;
+    private figureParagraphCase;
+    private figureNoteCase;
+    private figureTableTitleCase;
     private renderDocVignet;
     private parseUntypedDocVignet;
     private parseTypedDocVignet;
-    parseTypedDocVignetHelper(val: string, bgColor: string, borderColor: string, boxWidth: number): {
-        bgColor: string;
-        borderColor: string;
-        boxWidth: number;
-    };
+    private parseTypedDocVignetHelper;
     private renderDocTable;
     private renderEnhancedTable;
     private getLicitTable;
-    renderNewLicitImage(imageElement: HTMLImageElement, capco: string | null): LicitEnhancedImageElement;
-    renderDocBulletItems(e: ParserElement, licitDocument: LicitDocumentElement): void;
-    processBulletNodes(childNodes: Node[], bulletList: LicitBulletListElement, licitDocument: any, indent: number, e: any): void;
-    addElementLicit(licitDocument: any, bulletList: LicitBulletListElement): void;
-    removeEmptyATags(node: Node): void;
+    private renderNewLicitImage;
+    private renderDocBulletItems;
+    private processBulletNodes;
+    private addElementLicit;
+    private removeEmptyATags;
     private handleULNode;
     private renderDocFigure;
-    renderImage(imgElement: HTMLImageElement, licitDocument: LicitDocumentElement): void;
-    parseOL(e: ParserElement, licitDocument: LicitDocumentElement): void;
+    private renderImage;
+    private parseOL;
     /**
      * To parse table data
      * @param e - element
@@ -166,114 +148,109 @@ export declare class LicitConverter {
      * @param isTransparent - flag to distinguish preface table
      * @returns void
      */
-    parseTableContent(_e: any, tableTag: any, querySel: any, isChapterHeader: any, licitTable: any, widthArray: number[], isTransparent: boolean): void;
-    parseTableContentInnerLoopHelper(cells: any, _cellIndex: number, isChapterHeader: boolean, licitRow: LicitTableRowElement, widthArray: number[], isTransparent: boolean): void;
+    private parseTableContent;
+    private parseTableContentInnerLoopHelper;
     private addCell;
-    checkCellStyle(style: string | null): string | null;
+    private checkCellStyle;
     private addTableImageCell;
-    ParseNestedList(_listType: string, node: ChildNode, licitDocument: LicitDocumentElement, indent: number): void;
+    private ParseNestedList;
     /**
      * Returns the level of an element as described by the number at the end of its classname
      *
      * @param className - The className of the element
      * @returns The level as a number or zero if the level cannot be determined
      */
-    extractLevel(className: string): number;
+    private extractLevel;
     /**
      * Determines if an element is a table or image then calls the appropriate parse method
      */
-    parseTableFigure(element: Element): void;
+    private parseTableFigure;
     /**
      * Parse a table element
      */
-    parseTable(element: Element, useEnhancedTables: boolean): void;
+    private parseTable;
     /**
      * Parse a table element
      */
-    parseVignet(element: Element): void;
+    private parseVignet;
     /**
      * Parse a figure (image) element
      */
-    parseFigure(element: Element): void;
+    private parseFigure;
     /**
      * Parse a note element
      */
-    parseNote(element: Element): void;
+    private parseNote;
     /**
      * Parse a hr element
      */
-    parseHR(element: Element): void;
+    private parseHR;
     /**
      * Parse a chapter title element
      */
-    parseChapterTitle(element: Element): void;
+    private parseChapterTitle;
     /**
      * Parse a chapter subtitle element
      */
-    parseChapterSubtitle(element: Element): void;
+    private parseChapterSubtitle;
     /**
      * Parse a header element
      */
-    parseHeader(element: Element, nextElement: Element): void;
+    private parseHeader;
     /**
      * Parse a bullet point item element
      */
-    parseBullet(element: Element): void;
+    private parseBullet;
     /**
      * Parse a ordered list point item element
      */
-    parseOrdered(element: Element): void;
+    private parseOrdered;
     /**
      * Parse a paragraph element
      */
-    parseParagraph(element: Element): void;
-    parseDynamicHeader(element: Element): void;
+    private parseParagraph;
+    private parseDynamicHeader;
     /** Sanitize the text content by removing specific characters */
-    sanitizeText(element: Element): void;
+    private sanitizeText;
     /**
      * Parse a figure (image) title element
      */
-    parseFigureTitle(element: Element): void;
+    private parseFigureTitle;
     /**
      * Parse a ChangeBarPara element
      */
-    parseChangeBarPara(element: Element): void;
+    private parseChangeBarPara;
     /**
      * Parse a table title element
      */
-    parseTableTitle(element: Element): void;
+    private parseTableTitle;
     /**
      * Parse an unknown element. Currently does nothing besides printing a warning to the console.
      */
-    parseUnknownElement(element: Element, message: string): void;
+    private parseUnknownElement;
     /**
      * Parse a section title element
      */
-    parseSectionTitle(element: Element): void;
+    private parseSectionTitle;
     /**
      * Parses an `Element` as determined by its `className`
      *
      * @param element - The `Element` to be parsed
      */
-    parseElement(element: Element, nextElement: Element): void;
-    parseElement_doc(element: Element, nextElement: Element): void;
+    private parseElement;
+    private parseElement_doc;
     /**
      * Cleans up the HTML by calling certain helper methods
      */
-    sanitizeHTML(html: string): string;
-    /**
-     * Replaces characters in the HTML as defined by the `replacementChars` parameter in the config
-     */
-    replaceUnwantedChars(html: string): string;
+    private sanitizeHTML;
     /**
      * Replaces keywords in the HTML with links, as defined by the `replaceWithLinks` parameter in the config
      */
-    replaceKeywordsWithLinks(html: string): string;
-    matchClassToExcludeNumber(className: string): boolean;
-    sanitizeElement(element: Element): void;
-    removeLastNumber(inputString: string): string;
-    getScaledWidth(width: number): string;
-    isTransparentTable(element: Element): boolean;
+    private replaceKeywordsWithLinks;
+    private matchClassToExcludeNumber;
+    private sanitizeElement;
+    private getScaledWidth;
+    private isTransparentTable;
     /**
      * Extracts and calculates the column widths from a given HTML table element.
      *
@@ -286,24 +263,24 @@ export declare class LicitConverter {
      * @param {HTMLTableElement} table - The HTML table element from which column widths are to be extracted.
      * @returns {number[] | undefined} An array of column widths in pixels, or `undefined` if the widths are invalid or missing.
      */
-    getColWidthArray(table: HTMLTableElement): number[] | undefined;
-    setCellWidth(colSpan: number, cellIndex: number, colWidthArray: number[]): number[];
-    scaleWidthArray(rawWidthArray: number[]): number[];
-    getSumOfArray(array: number[]): number;
+    private getColWidthArray;
+    private setCellWidth;
+    private scaleWidthArray;
+    private getSumOfArray;
     /**
      * Determines the orientation (portrait or landscape) based on the total width.
      *
      * @param {number} totalWidth - The total width (in pixels) used to determine orientation.
      * @returns {'portrait' | 'landscape'} Returns 'portrait' if the width is less than 700 pixels; otherwise, returns 'landscape'.
      */
-    findOrientation(totalWidth: number): 'portrait' | 'landscape';
+    private findOrientation;
     /**
      * Extracts image information from an HTMLImageElement.
      *
      * @param {HTMLImageElement} img - The image element to extract information from.
      * @returns {{ src: string; alt: string; width: number; height: number }} An object containing the image's source URL, alt text, width, and height.
      */
-    extractImageInfo(img: HTMLImageElement): ImageInfo;
+    private extractImageInfo;
     /**
      * Extracts note paragraphs from the last row of an HTML table if that row
      * contains a note header such as "OVERALL NOTE:" or "NOTES:".
@@ -345,7 +322,7 @@ export declare class LicitConverter {
      * @param {Element} node - The DOM element to check.
      * @returns {boolean} `true` if the element qualifies as a table figure, otherwise `false`.
      */
-    isTableFigureNode(node: Element): boolean;
+    private isTableFigureNode;
     /**
      * Determines whether the provided class name corresponds to a note-related node.
      *

package/licit-transform.js CHANGED Viewed

@@ -260,14 +260,6 @@ export class LicitConverter {
         }
         return renderedArr;
     }
-    /**
-     * Returns a map elements which were parsed.
-     *
-     * @returns Map of elements
-     */
-    getElementsParsedMap() {
-        return this.elementsParsedMap;
-    }
     getCustomStyle(styleName) {
         return this.config.customStyles?.find((s) => s.styleName === styleName);
     }
@@ -467,7 +459,7 @@ export class LicitConverter {
                 }
                 const childNode = children[j];
                 let nextChildNode = children[j + 1];
-                // KNITE-1013: Handling paragraph combining logic for the case where
+                // Handling paragraph combining logic for the case where
                 // heading is inside <OL>/<UL> and content is outside
                 if (!nextChildNode &&
                     (node.tagName === 'OL' || node.tagName === 'UL') &&
@@ -1894,16 +1886,6 @@ export class LicitConverter {
     sanitizeHTML(html) {
         return this.replaceKeywordsWithLinks(html);
     }
-    /**
-     * Replaces characters in the HTML as defined by the `replacementChars` parameter in the config
-     */
-    replaceUnwantedChars(html) {
-        const chars = this.config.replacementChars;
-        for (const char of chars) {
-            html = html.replace(char.find, char.replace);
-        }
-        return html;
-    }
     /**
      * Replaces keywords in the HTML with links, as defined by the `replaceWithLinks` parameter in the config
      */
@@ -1916,7 +1898,7 @@ export class LicitConverter {
         }
         return html;
     }
-    //FS : For skipping triming inside table, add more classes to the class list for future use
+    // For skipping triming inside table, add more classes to the class list for future use
     matchClassToExcludeNumber(className) {
         let trimmedClassName = className.trim();
         trimmedClassName = trimmedClassName.toLowerCase();
@@ -1950,14 +1932,6 @@ export class LicitConverter {
         };
         stripTextContent(element);
     }
-    removeLastNumber(inputString) {
-        let lastNonDigitIndex = inputString.length - 1;
-        while (lastNonDigitIndex >= 0 &&
-            !Number.isNaN(Number.parseInt(inputString[lastNonDigitIndex]))) {
-            lastNonDigitIndex--;
-        }
-        return inputString.slice(0, lastNonDigitIndex + 1);
-    }
     getScaledWidth(width) {
         if (width <= 200) {
             return width.toString();

package/package.json CHANGED Viewed

@@ -1,52 +1,52 @@
-{
-  "name": "@modusoperandi/licit-import-utils",
-  "version": "0.1.0",
-  "license": "MIT",
-  "type": "module",
-  "subversion": "1",
-  "description": "A utility package for importing files like json or docx into Licit compatible documents",
-  "main": "index.js",
-  "types": "index.d.ts",
-  "repository": {
-    "type": "git",
-    "url": "git+https://github.com/MO-Movia/licit-import-utils.git"
-  },
-  "scripts": {
-    "test": "jest",
-    "test:unit": "jest",
-    "test:coverage": "jest --env=jsdom --coverage",
-    "build:clean": "rm -rf dist/ && rm -f modusoperandi-*.*.*.tgz",
-    "lint": "eslint src",
-    "ci:build": "tsc -b tsconfig.prod.json --clean && tsc -b tsconfig.prod.json && npx copyfiles@2.4.1 package.json LICENSE dist",
-    "ci:bom": "npx @cyclonedx/cyclonedx-npm --ignore-npm-errors --short-PURLs --output-format XML --output-file dist/bom.xml",
-    "verify": "npm run lint -- --fix && npm run ci:build && npm run test:coverage && echo 'All Tests Passed!'"
-  },
-  "peerDependencies": {
-    "@modusoperandi/mammoth": "^1.7.0-6",
-    "jszip": "^3.10.1"
-  },
-  "peerDependenciesMeta": {
-    "@modusoperandi/mammoth": {
-      "optional": true
-    },
-    "jszip": {
-      "optional": true
-    }
-  },
-  "dependencies": {
-    "uuid": "^13.0.0"
-  },
-  "devDependencies": {
-    "@modusoperandi/mammoth": "^1.7.0-6",
-    "@modusoperandi/eslint-config": "^3.0.3",
-    "@types/jest": "^30.0.0",
-    "jszip": "^3.10.1",
-    "eslint": "^9.39.2",
-    "jest": "^30.2.0",
-    "jest-environment-jsdom": "^30.2.0",
-    "jest-junit": "^16.0.0",
-    "ts-jest": "^29.4.6",
-    "ts-node": "^10.9.2",
-    "typescript": "^5.9.3"
-  }
-}
+{
+  "name": "@modusoperandi/licit-import-utils",
+  "version": "0.1.1",
+  "license": "MIT",
+  "type": "module",
+  "subversion": "1",
+  "description": "A utility package for importing files like json or docx into Licit compatible documents",
+  "main": "index.js",
+  "types": "index.d.ts",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/MO-Movia/licit-import-utils.git"
+  },
+  "scripts": {
+    "test": "jest",
+    "test:unit": "jest",
+    "test:coverage": "jest --env=jsdom --coverage",
+    "build:clean": "rm -rf dist/ && rm -f modusoperandi-*.*.*.tgz",
+    "lint": "eslint src",
+    "ci:build": "tsc -b tsconfig.prod.json --clean && tsc -b tsconfig.prod.json && npx copyfiles@2.4.1 package.json LICENSE README.md dist",
+    "ci:bom": "npx @cyclonedx/cyclonedx-npm --ignore-npm-errors --short-PURLs --output-format XML --output-file dist/bom.xml",
+    "verify": "npm run lint -- --fix && npm run ci:build && npm run test:coverage && echo 'All Tests Passed!'"
+  },
+  "peerDependencies": {
+    "@modusoperandi/mammoth": "^1.7.0-6",
+    "jszip": "^3.10.1"
+  },
+  "peerDependenciesMeta": {
+    "@modusoperandi/mammoth": {
+      "optional": true
+    },
+    "jszip": {
+      "optional": true
+    }
+  },
+  "dependencies": {
+    "uuid": "^13.0.0"
+  },
+  "devDependencies": {
+    "@modusoperandi/mammoth": "^1.7.0-6",
+    "@modusoperandi/eslint-config": "^3.0.3",
+    "@types/jest": "^30.0.0",
+    "jszip": "^3.10.1",
+    "eslint": "^9.39.2",
+    "jest": "^30.2.0",
+    "jest-environment-jsdom": "^30.2.0",
+    "jest-junit": "^16.0.0",
+    "ts-jest": "^29.4.6",
+    "ts-node": "^10.9.2",
+    "typescript": "^5.9.3"
+  }
+}

package/preprocess.utils.d.ts ADDED Viewed

@@ -0,0 +1,22 @@
+/**
+ * @license MIT
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
+ */
+import type { Message } from './types';
+export declare function extractStylesForDoc(arrayBuffer: ArrayBuffer, docType: string): Promise<{
+    styles: string[];
+}>;
+export declare function extractUniqueStyleIds(data: Message[]): string[];
+export declare function extractStylesForJSON(arrayBuffer: ArrayBuffer): Promise<{
+    content: string;
+    styles: string[];
+}>;
+export declare function collectStyles(obj: unknown, styles?: string[]): string[];
+export declare function processHTML(arrayBuffer: ArrayBuffer): Promise<{
+    styles: string[];
+}>;
+export declare function extractStylesFromZip(zipFile: File): Promise<{
+    styles: string[];
+}>;
+export declare function arrayBufferToString(arrayBuffer: ArrayBuffer): string;
+export declare function extractStyleNamesFromHTML(doc: Document): string[];

package/preprocess.utils.js ADDED Viewed

@@ -0,0 +1,105 @@
+/**
+ * @license MIT
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
+ */
+import JSZip from 'jszip';
+import { DocxTransformer } from './transform.docx';
+export async function extractStylesForDoc(arrayBuffer, docType) {
+    const messages = [];
+    // Convert the ArrayBuffer to HTML using Mammoth.js
+    await new DocxTransformer(docType, (type, message) => messages.push({ type, message })).transform(arrayBuffer);
+    // Extract styles from the HTML (adapt as needed for your styling approach)
+    const styles = extractUniqueStyleIds(messages);
+    return { styles };
+}
+export function extractUniqueStyleIds(data) {
+    const styleIds = [];
+    data ??= [];
+    for (const item of data) {
+        const match = new RegExp(/Style ID: (.{0,100}?)(?=\))/).exec(item.message);
+        const styleId = match?.[1];
+        if (styleId && !styleIds.includes(styleId)) {
+            styleIds.push(styleId);
+        }
+    }
+    return styleIds;
+}
+export function extractStylesForJSON(arrayBuffer) {
+    const decoder = new TextDecoder('utf-8');
+    const content = decoder.decode(arrayBuffer);
+    const jsonObject = JSON.parse(content);
+    if (typeof jsonObject !== 'object' || jsonObject === null) {
+        throw new Error('Invalid JSON document');
+    }
+    const styles = [];
+    collectStyles(jsonObject, styles);
+    return Promise.resolve({ content, styles });
+}
+// Preprocessor to handle the JSON formatted documents
+export function collectStyles(obj, styles = []) {
+    if (typeof obj !== 'object' || obj === null) {
+        return styles;
+    }
+    for (const [key, value] of Object.entries(obj)) {
+        if (typeof value === 'object' && value !== null) {
+            // Recursively traverse nested objects
+            collectStyles(value, styles);
+        }
+        else if (key === 'styleName' &&
+            typeof value === 'string' &&
+            !styles.includes(value)) {
+            // Add the style name to the list if it's not already included
+            styles.push(value);
+        }
+    }
+    return styles;
+}
+export function processHTML(arrayBuffer) {
+    return new Promise((resolve) => {
+        const content = arrayBufferToString(arrayBuffer);
+        // Use DOMParser to parse HTML content
+        const parser = new DOMParser();
+        const doc = parser.parseFromString(content, 'text/html');
+        // Extract style names using regular expressions
+        const styleNames = extractStyleNamesFromHTML(doc);
+        resolve({ styles: styleNames });
+    });
+}
+export async function extractStylesFromZip(zipFile) {
+    const MAX_FILES = 10000;
+    const MAX_SIZE = 1073741824; // 1 GB
+    if (zipFile.size > MAX_SIZE &&
+        !confirm(`zip is ${zipFile.size / MAX_SIZE} GB. continue?`)) {
+        throw new Error('Size of the file is more than the limit 25 mb');
+    }
+    const loadedZip = await JSZip.loadAsync(zipFile); //NOSONAR size validated. Safe to extract.
+    // Check if the total number of files exceeds the limit
+    const totalFiles = Object.keys(loadedZip.files).length;
+    if (totalFiles > MAX_FILES &&
+        !confirm(`zip contains an excessive ${totalFiles} files. continue?`)) {
+        throw new Error(`Number of files in the zip (${totalFiles}) exceeds the limit (${MAX_FILES})`);
+    }
+    const htmlFiles = Object.keys(loadedZip.files).filter((fileName) => fileName.endsWith('.htm'));
+    let combinedStyles = [];
+    for (const fileName of htmlFiles) {
+        const arrayBuffer = await loadedZip.files[fileName].async('arraybuffer');
+        const { styles } = await processHTML(arrayBuffer);
+        // Combine styles
+        combinedStyles = [...new Set([...combinedStyles, ...styles])];
+    }
+    return { styles: combinedStyles };
+}
+export function arrayBufferToString(arrayBuffer) {
+    return new TextDecoder().decode(new Uint8Array(arrayBuffer));
+}
+export function extractStyleNamesFromHTML(doc) {
+    const styleNames = [];
+    // Extract class names from HTML elements and add to style names
+    const elementsWithClass = doc.querySelectorAll('[class]');
+    for (const element of Array.from(elementsWithClass)) {
+        const classes = element.className.split(/\s{1,100}/); // Split by whitespace
+        styleNames.push(...classes);
+    }
+    // Return unique style names
+    return [...new Set(styleNames)];
+}

package/transform.zip.js CHANGED Viewed

@@ -154,25 +154,32 @@ async function loopHTMLFiles(htmlFiles, updateSrc) {
     const processedHtmlContents = (await Promise.all(htmlFiles.files
         .filter((htmlFile) => !!htmlFile)
         .map((f) => processFile(f, htmlFiles.imageFiles, updateSrc)))).filter((x) => x?.length);
+    if (processedHtmlContents.length === 0 && htmlFiles.files.length > 0) {
+        throw new Error(`File contents are empty`);
+    }
     return sortedNodeList(processedHtmlContents);
 }
 async function processFile(file, imageFiles, updateSrc) {
     const htmlContent = await file.content();
     const htmlFileName = file.name ?? 'Unknown file';
+    // Reject files with zero bytes
+    if (!htmlContent?.length) {
+        throw new Error(`File ${htmlFileName} has zero bytes`);
+    }
     // Get content before <head> (first 1000 chars should be enough)
     const beforeHead = htmlContent.substring(0, 1000);
-    // Check 1: Reject old DOCTYPE declarations
+    // Reject old DOCTYPE declarations
     if (beforeHead.includes('<!DOCTYPE HTML PUBLIC')) {
-        throw new Error(`Incorrect file format: ${htmlFileName}`);
+        throw new Error(`Incorrect file format (was "!DOCTYPE HTML PUBLIC"): ${htmlFileName}`);
     }
-    // Check 2: Reject XML declarations (XHTML format)
+    // Reject XML declarations (XHTML format)
     if (beforeHead.trimStart().startsWith('<?xml')) {
-        throw new Error(`Incorrect file format: ${htmlFileName}`);
+        throw new Error(`Incorrect file format (was "XHTML"): ${htmlFileName}`);
     }
-    // Check 3: Must have <html lang="...">
+    // Must have <html lang="...">
     // Option A: Exact match for en-US
     if (!beforeHead.includes('<html lang="en-US">')) {
-        throw new Error(`Incorrect file format: ${htmlFileName}`);
+        throw new Error(`Incorrect file format (missing "<html lang=..."): ${htmlFileName}`);
     }
     const domCollection = new DOMParser().parseFromString(htmlContent, 'text/html');
     //Get the title text