npm - @modusoperandi/licit-import-utils - Versions diffs - 0.1.0 - Mend

@modusoperandi/licit-import-utils 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/transform.zip.js ADDED Viewed

@@ -0,0 +1,296 @@
+/**
+ * @license MIT
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
+ */
+import { openZip } from './zip.utils';
+import { updateImageSrc } from './transform.utils';
+export async function parseFrameMakerHTM5Zip(file, updateSrc) {
+    if (!file) {
+        throw new Error('No file provided for parsing.');
+    }
+    return loopHTMLFiles(await extractFiles(file), updateSrc);
+}
+//A method for extracting zip file and for getting the correct order of files from the toc file if any
+async function extractFiles(file) {
+    let tocFiles = [];
+    const zip = await openZip(file);
+    //Checking if toc.js is present
+    let tocFile = zip.file(/toc\.js$/gm);
+    if (zip.files && tocFile.length === 1) {
+        tocFiles = await getTocArray(tocFile[0], zip);
+    }
+    else {
+        //Checking if toc.htm is present
+        tocFile = zip.file(/toc\.htm$/gim);
+        if (tocFile.length == 1) {
+            tocFiles = await getTocHtmArray(tocFile[0]);
+        }
+    }
+    const fileNames = Object.keys(zip.files);
+    const content = filterFiles(zip.files, fileNames, tocFiles);
+    if (!content?.files?.length) {
+        throw new Error('No HTM files found in the ZIP archive.');
+    }
+    return content;
+}
+function filterFiles(zip, fileNames, tocFiles) {
+    let htmlArray = [];
+    const imageFiles = [];
+    const regex = /\.(jpeg|jpg|gif|png)$/;
+    for (const fileName of fileNames) {
+        const match = regex.exec(fileName);
+        if (match) {
+            const blobp = zip[fileName].async('blob');
+            imageFiles.push({
+                name: fileName,
+                fallback: blobp.then((b) => blobToBase64(b)),
+                file: blobp.then((blob) => new File([blob], fileName, { type: 'Image/' + match[1] })),
+            });
+        }
+        if (fileName.endsWith('.htm')) {
+            htmlArray.push(zip[fileName]);
+        }
+    }
+    //If tocFile is available then filter and sort the html array as per the toc else send it as such
+    if (tocFiles.length !== 0) {
+        //Replacing htmlArray with the filtered and sorted array
+        htmlArray = tocFiles
+            .map((fileName) => htmlArray.find((htmlFile) => htmlFile.name.endsWith(fileName.split('/').pop() ?? '.')))
+            .filter((x) => !!x);
+    }
+    return {
+        files: htmlArray.map((file) => ({
+            name: file.name,
+            content: () => file.async('string'),
+        })),
+        imageFiles: imageFiles,
+    };
+}
+function blobToBase64(blob) {
+    return new Promise((resolve, reject) => {
+        const reader = new FileReader();
+        reader.onloadend = () => resolve(reader.result);
+        reader.onerror = reject;
+        reader.readAsDataURL(blob);
+    });
+}
+async function getTocArray(tocFile, zipFiles) {
+    const htmUrlsWithOffsets = [];
+    const tocContent = await tocFile.async('string');
+    const startIndx = tocContent.indexOf('<?xml');
+    const endIndx = tocContent.length - 2;
+    const xmlStr = tocContent.substring(startIndx, endIndx);
+    const xmlString = xmlStr.split('\\').join('');
+    let currentIndex = 0;
+    while (currentIndex < xmlString.length) {
+        const urlIndex = xmlString.indexOf('url="', currentIndex);
+        if (urlIndex === -1) {
+            break; // No more "url=" attributes
+        }
+        const startOfUrl = urlIndex + 5; // Length of 'url="'
+        const endOfUrl = xmlString.indexOf('"', startOfUrl);
+        if (endOfUrl === -1) {
+            break;
+        }
+        const url = xmlString.substring(startOfUrl, endOfUrl);
+        //Check if the URL is an .htm file
+        if (url.includes('.htm')) {
+            htmUrlsWithOffsets.push(url);
+        }
+        currentIndex = endOfUrl + 1;
+    }
+    const htmUrlsWithoutOffsets = htmUrlsWithOffsets.map((url) => {
+        return url.split('#')[0];
+    });
+    let uniqueHtmUrls = [];
+    let previousUrl = '';
+    for (const url of htmUrlsWithoutOffsets) {
+        if (url !== previousUrl) {
+            uniqueHtmUrls.push(url);
+            previousUrl = url;
+        }
+    }
+    if (uniqueHtmUrls.length > 0) {
+        const firstUrlSegment = uniqueHtmUrls[0].split('/')[0];
+        const parentDirectory = Object.keys(zipFiles.files)[0].split('/')[0];
+        if (firstUrlSegment != parentDirectory) {
+            uniqueHtmUrls = uniqueHtmUrls.map((url) => parentDirectory + '/' + url);
+        }
+    }
+    return uniqueHtmUrls;
+}
+async function getTocHtmArray(tocHtmFile) {
+    const htmlContent = await tocHtmFile.async('string');
+    const domCollection = new DOMParser().parseFromString(htmlContent, 'text/html');
+    const chTextTOCElements = Array.from(domCollection.getElementsByClassName('chTextTOC'));
+    const attTextTOCElements = Array.from(domCollection.getElementsByClassName('attTextTOC'));
+    const tocNodeList = [...chTextTOCElements, ...attTextTOCElements];
+    return getHrefValues(tocNodeList);
+}
+function getHrefValues(tocNodes) {
+    const hrefArray = [];
+    for (const element of tocNodes) {
+        const anchorTags = element.querySelectorAll('a');
+        for (const anchorTag of Array.from(anchorTags)) {
+            const href = anchorTag.href;
+            if (href) {
+                const lastSlashIndex = href.lastIndexOf('/');
+                let extractedHref = decodeURIComponent(href.slice(lastSlashIndex + 1));
+                const hashIndex = extractedHref.indexOf('#');
+                if (hashIndex !== -1) {
+                    extractedHref = extractedHref.slice(0, hashIndex);
+                }
+                // Remove '_NEWC' from the file name
+                if (extractedHref.endsWith('_NEWC.htm')) {
+                    extractedHref = extractedHref.replace('_NEWC.htm', '.htm');
+                }
+                hrefArray.push(extractedHref);
+            }
+        }
+    }
+    return hrefArray;
+}
+async function loopHTMLFiles(htmlFiles, updateSrc) {
+    const processedHtmlContents = (await Promise.all(htmlFiles.files
+        .filter((htmlFile) => !!htmlFile)
+        .map((f) => processFile(f, htmlFiles.imageFiles, updateSrc)))).filter((x) => x?.length);
+    return sortedNodeList(processedHtmlContents);
+}
+async function processFile(file, imageFiles, updateSrc) {
+    const htmlContent = await file.content();
+    const htmlFileName = file.name ?? 'Unknown file';
+    // Get content before <head> (first 1000 chars should be enough)
+    const beforeHead = htmlContent.substring(0, 1000);
+    // Check 1: Reject old DOCTYPE declarations
+    if (beforeHead.includes('<!DOCTYPE HTML PUBLIC')) {
+        throw new Error(`Incorrect file format: ${htmlFileName}`);
+    }
+    // Check 2: Reject XML declarations (XHTML format)
+    if (beforeHead.trimStart().startsWith('<?xml')) {
+        throw new Error(`Incorrect file format: ${htmlFileName}`);
+    }
+    // Check 3: Must have <html lang="...">
+    // Option A: Exact match for en-US
+    if (!beforeHead.includes('<html lang="en-US">')) {
+        throw new Error(`Incorrect file format: ${htmlFileName}`);
+    }
+    const domCollection = new DOMParser().parseFromString(htmlContent, 'text/html');
+    //Get the title text
+    const titleElement = domCollection.querySelector('title');
+    const titleText = titleElement?.textContent?.trim();
+    //Get the chapterTitle element and text
+    const chapterTitleElement = domCollection.querySelector('.chapterTitle');
+    const chapterText = chapterTitleElement?.textContent;
+    // If title exists and chapterTitle is empty
+    if (titleText &&
+        chapterTitleElement &&
+        !chapterText?.replaceAll('&nbsp;', '').trim()) {
+        chapterTitleElement.textContent = titleText;
+    }
+    const imgTags = Array.from(domCollection.querySelectorAll('img'));
+    await processImages(imgTags, imageFiles, updateSrc);
+    const nodes = domCollection.querySelectorAll('body > *');
+    const nodeArray = Array.from(nodes).filter((node) => !(node instanceof HTMLScriptElement));
+    return nodeArray;
+}
+// Fix for file order
+function sortedNodeList(processedHtmlContents) {
+    let nodeListArray = [];
+    processedHtmlContents ??= [];
+    for (const element of processedHtmlContents) {
+        if (element) {
+            nodeListArray = nodeListArray.concat(element);
+        }
+    }
+    return nodeListArray;
+}
+async function processImages(imgArray, imageFiles, updateSrc) {
+    for (const img of imgArray) {
+        const imgUrl = img.getAttribute('src');
+        const parts = imgUrl?.split('/');
+        if (!parts?.length) {
+            continue;
+        }
+        const targetFileName = parts.at(-1);
+        const file = imageFiles.find((f) => extractFileName(f.name) === targetFileName);
+        if (file) {
+            try {
+                const f = await file.file; // Wait for file resolution
+                await updateImageSize(f, targetFileName, img);
+                await updateImageSrc(f, img, updateSrc, file.fallback);
+            }
+            catch (error) {
+                console.error(`Error processing ${targetFileName}:`, error);
+            }
+        }
+        else {
+            const errorMessage = `${targetFileName} missing from doc`;
+            console.warn(errorMessage);
+            img.src = '';
+            img.alt = `WARNING! File ${targetFileName} was missing during import!`;
+        }
+    }
+}
+function extractFileName(fullPath) {
+    return fullPath.split('/').pop();
+}
+async function updateImageSize(f, targetFileName, img) {
+    let aspectRatio;
+    try {
+        aspectRatio = await getImageAspectRatio(f);
+    }
+    catch (e) {
+        console.warn(`Could not determine aspect ratio for ${targetFileName}`, e);
+    }
+    let width;
+    let height;
+    // Prefer explicit attributes if present
+    const widthAttr = img.style.getPropertyValue('width');
+    const heightAttr = img.style.getPropertyValue('height');
+    if (widthAttr) {
+        width = Number.parseFloat(widthAttr);
+    }
+    else if (img.width) {
+        width = img.width;
+    }
+    if (heightAttr) {
+        height = Number.parseFloat(heightAttr);
+    }
+    else if (img.height) {
+        height = img.height;
+    }
+    if (aspectRatio && width && !height) {
+        height = width / aspectRatio;
+    }
+    else if (aspectRatio && height && !width) {
+        width = height * aspectRatio;
+    }
+    if (width) {
+        width = Math.round(width);
+        img.width = width;
+        img.style.setProperty('width', String(width));
+    }
+    if (height) {
+        height = Math.round(height);
+        img.height = height;
+        img.style.setProperty('height', String(height));
+    }
+}
+async function getImageAspectRatio(file) {
+    return new Promise((resolve, reject) => {
+        const url = URL.createObjectURL(file);
+        const img = new Image();
+        img.onload = () => {
+            const aspectRatio = img.width / img.height;
+            URL.revokeObjectURL(url);
+            resolve(aspectRatio);
+        };
+        img.onerror = (err) => {
+            URL.revokeObjectURL(url);
+            reject(new Error('Failed to load image for aspect ratio calculation', {
+                cause: err,
+            }));
+        };
+        img.src = url;
+    });
+}

package/types.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+/**
+ * @license MIT
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
+ */
+export interface Message {
+    type: string;
+    message: string;
+}
+export type MessageSink = (type: string, message: string) => void;

package/types.js ADDED Viewed

@@ -0,0 +1,5 @@
+/**
+ * @license MIT
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
+ */
+export {};

package/zip.utils.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+/**
+ * @license MIT
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
+ */
+import JSZip from 'jszip';
+export declare function openZip(file: File): Promise<JSZip>;

package/zip.utils.js ADDED Viewed

@@ -0,0 +1,23 @@
+/**
+ * @license MIT
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
+ */
+import JSZip from 'jszip';
+export async function openZip(file) {
+    const MAX_FILES = 10000;
+    const MAX_SIZE = 1073741824; // 1 GB
+    if (file.size > MAX_SIZE &&
+        !confirm(`zip is ${file.size / MAX_SIZE} GB. continue?`)) {
+        throw new Error('Size of the file is more than the limit 1GB');
+    }
+    const zip = await JSZip.loadAsync(file); //NOSONAR zip checked before parsing
+    const fileCount = Object.keys(zip.files).length;
+    if (fileCount > MAX_FILES &&
+        !confirm(`zip contains an excessive ${fileCount} files. continue?`)) {
+        throw new Error('Total number of files exceeded the limit 10000');
+    }
+    if (fileCount === 0) {
+        throw new Error('No files found in the zip');
+    }
+    return zip;
+}