@modusoperandi/licit-import-utils 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,296 @@
1
+ /**
2
+ * @license MIT
3
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
4
+ */
5
+ import { openZip } from './zip.utils';
6
+ import { updateImageSrc } from './transform.utils';
7
+ export async function parseFrameMakerHTM5Zip(file, updateSrc) {
8
+ if (!file) {
9
+ throw new Error('No file provided for parsing.');
10
+ }
11
+ return loopHTMLFiles(await extractFiles(file), updateSrc);
12
+ }
13
+ //A method for extracting zip file and for getting the correct order of files from the toc file if any
14
+ async function extractFiles(file) {
15
+ let tocFiles = [];
16
+ const zip = await openZip(file);
17
+ //Checking if toc.js is present
18
+ let tocFile = zip.file(/toc\.js$/gm);
19
+ if (zip.files && tocFile.length === 1) {
20
+ tocFiles = await getTocArray(tocFile[0], zip);
21
+ }
22
+ else {
23
+ //Checking if toc.htm is present
24
+ tocFile = zip.file(/toc\.htm$/gim);
25
+ if (tocFile.length == 1) {
26
+ tocFiles = await getTocHtmArray(tocFile[0]);
27
+ }
28
+ }
29
+ const fileNames = Object.keys(zip.files);
30
+ const content = filterFiles(zip.files, fileNames, tocFiles);
31
+ if (!content?.files?.length) {
32
+ throw new Error('No HTM files found in the ZIP archive.');
33
+ }
34
+ return content;
35
+ }
36
+ function filterFiles(zip, fileNames, tocFiles) {
37
+ let htmlArray = [];
38
+ const imageFiles = [];
39
+ const regex = /\.(jpeg|jpg|gif|png)$/;
40
+ for (const fileName of fileNames) {
41
+ const match = regex.exec(fileName);
42
+ if (match) {
43
+ const blobp = zip[fileName].async('blob');
44
+ imageFiles.push({
45
+ name: fileName,
46
+ fallback: blobp.then((b) => blobToBase64(b)),
47
+ file: blobp.then((blob) => new File([blob], fileName, { type: 'Image/' + match[1] })),
48
+ });
49
+ }
50
+ if (fileName.endsWith('.htm')) {
51
+ htmlArray.push(zip[fileName]);
52
+ }
53
+ }
54
+ //If tocFile is available then filter and sort the html array as per the toc else send it as such
55
+ if (tocFiles.length !== 0) {
56
+ //Replacing htmlArray with the filtered and sorted array
57
+ htmlArray = tocFiles
58
+ .map((fileName) => htmlArray.find((htmlFile) => htmlFile.name.endsWith(fileName.split('/').pop() ?? '.')))
59
+ .filter((x) => !!x);
60
+ }
61
+ return {
62
+ files: htmlArray.map((file) => ({
63
+ name: file.name,
64
+ content: () => file.async('string'),
65
+ })),
66
+ imageFiles: imageFiles,
67
+ };
68
+ }
69
+ function blobToBase64(blob) {
70
+ return new Promise((resolve, reject) => {
71
+ const reader = new FileReader();
72
+ reader.onloadend = () => resolve(reader.result);
73
+ reader.onerror = reject;
74
+ reader.readAsDataURL(blob);
75
+ });
76
+ }
77
+ async function getTocArray(tocFile, zipFiles) {
78
+ const htmUrlsWithOffsets = [];
79
+ const tocContent = await tocFile.async('string');
80
+ const startIndx = tocContent.indexOf('<?xml');
81
+ const endIndx = tocContent.length - 2;
82
+ const xmlStr = tocContent.substring(startIndx, endIndx);
83
+ const xmlString = xmlStr.split('\\').join('');
84
+ let currentIndex = 0;
85
+ while (currentIndex < xmlString.length) {
86
+ const urlIndex = xmlString.indexOf('url="', currentIndex);
87
+ if (urlIndex === -1) {
88
+ break; // No more "url=" attributes
89
+ }
90
+ const startOfUrl = urlIndex + 5; // Length of 'url="'
91
+ const endOfUrl = xmlString.indexOf('"', startOfUrl);
92
+ if (endOfUrl === -1) {
93
+ break;
94
+ }
95
+ const url = xmlString.substring(startOfUrl, endOfUrl);
96
+ //Check if the URL is an .htm file
97
+ if (url.includes('.htm')) {
98
+ htmUrlsWithOffsets.push(url);
99
+ }
100
+ currentIndex = endOfUrl + 1;
101
+ }
102
+ const htmUrlsWithoutOffsets = htmUrlsWithOffsets.map((url) => {
103
+ return url.split('#')[0];
104
+ });
105
+ let uniqueHtmUrls = [];
106
+ let previousUrl = '';
107
+ for (const url of htmUrlsWithoutOffsets) {
108
+ if (url !== previousUrl) {
109
+ uniqueHtmUrls.push(url);
110
+ previousUrl = url;
111
+ }
112
+ }
113
+ if (uniqueHtmUrls.length > 0) {
114
+ const firstUrlSegment = uniqueHtmUrls[0].split('/')[0];
115
+ const parentDirectory = Object.keys(zipFiles.files)[0].split('/')[0];
116
+ if (firstUrlSegment != parentDirectory) {
117
+ uniqueHtmUrls = uniqueHtmUrls.map((url) => parentDirectory + '/' + url);
118
+ }
119
+ }
120
+ return uniqueHtmUrls;
121
+ }
122
+ async function getTocHtmArray(tocHtmFile) {
123
+ const htmlContent = await tocHtmFile.async('string');
124
+ const domCollection = new DOMParser().parseFromString(htmlContent, 'text/html');
125
+ const chTextTOCElements = Array.from(domCollection.getElementsByClassName('chTextTOC'));
126
+ const attTextTOCElements = Array.from(domCollection.getElementsByClassName('attTextTOC'));
127
+ const tocNodeList = [...chTextTOCElements, ...attTextTOCElements];
128
+ return getHrefValues(tocNodeList);
129
+ }
130
+ function getHrefValues(tocNodes) {
131
+ const hrefArray = [];
132
+ for (const element of tocNodes) {
133
+ const anchorTags = element.querySelectorAll('a');
134
+ for (const anchorTag of Array.from(anchorTags)) {
135
+ const href = anchorTag.href;
136
+ if (href) {
137
+ const lastSlashIndex = href.lastIndexOf('/');
138
+ let extractedHref = decodeURIComponent(href.slice(lastSlashIndex + 1));
139
+ const hashIndex = extractedHref.indexOf('#');
140
+ if (hashIndex !== -1) {
141
+ extractedHref = extractedHref.slice(0, hashIndex);
142
+ }
143
+ // Remove '_NEWC' from the file name
144
+ if (extractedHref.endsWith('_NEWC.htm')) {
145
+ extractedHref = extractedHref.replace('_NEWC.htm', '.htm');
146
+ }
147
+ hrefArray.push(extractedHref);
148
+ }
149
+ }
150
+ }
151
+ return hrefArray;
152
+ }
153
+ async function loopHTMLFiles(htmlFiles, updateSrc) {
154
+ const processedHtmlContents = (await Promise.all(htmlFiles.files
155
+ .filter((htmlFile) => !!htmlFile)
156
+ .map((f) => processFile(f, htmlFiles.imageFiles, updateSrc)))).filter((x) => x?.length);
157
+ return sortedNodeList(processedHtmlContents);
158
+ }
159
+ async function processFile(file, imageFiles, updateSrc) {
160
+ const htmlContent = await file.content();
161
+ const htmlFileName = file.name ?? 'Unknown file';
162
+ // Get content before <head> (first 1000 chars should be enough)
163
+ const beforeHead = htmlContent.substring(0, 1000);
164
+ // Check 1: Reject old DOCTYPE declarations
165
+ if (beforeHead.includes('<!DOCTYPE HTML PUBLIC')) {
166
+ throw new Error(`Incorrect file format: ${htmlFileName}`);
167
+ }
168
+ // Check 2: Reject XML declarations (XHTML format)
169
+ if (beforeHead.trimStart().startsWith('<?xml')) {
170
+ throw new Error(`Incorrect file format: ${htmlFileName}`);
171
+ }
172
+ // Check 3: Must have <html lang="...">
173
+ // Option A: Exact match for en-US
174
+ if (!beforeHead.includes('<html lang="en-US">')) {
175
+ throw new Error(`Incorrect file format: ${htmlFileName}`);
176
+ }
177
+ const domCollection = new DOMParser().parseFromString(htmlContent, 'text/html');
178
+ //Get the title text
179
+ const titleElement = domCollection.querySelector('title');
180
+ const titleText = titleElement?.textContent?.trim();
181
+ //Get the chapterTitle element and text
182
+ const chapterTitleElement = domCollection.querySelector('.chapterTitle');
183
+ const chapterText = chapterTitleElement?.textContent;
184
+ // If title exists and chapterTitle is empty
185
+ if (titleText &&
186
+ chapterTitleElement &&
187
+ !chapterText?.replaceAll('&nbsp;', '').trim()) {
188
+ chapterTitleElement.textContent = titleText;
189
+ }
190
+ const imgTags = Array.from(domCollection.querySelectorAll('img'));
191
+ await processImages(imgTags, imageFiles, updateSrc);
192
+ const nodes = domCollection.querySelectorAll('body > *');
193
+ const nodeArray = Array.from(nodes).filter((node) => !(node instanceof HTMLScriptElement));
194
+ return nodeArray;
195
+ }
196
+ // Fix for file order
197
+ function sortedNodeList(processedHtmlContents) {
198
+ let nodeListArray = [];
199
+ processedHtmlContents ??= [];
200
+ for (const element of processedHtmlContents) {
201
+ if (element) {
202
+ nodeListArray = nodeListArray.concat(element);
203
+ }
204
+ }
205
+ return nodeListArray;
206
+ }
207
+ async function processImages(imgArray, imageFiles, updateSrc) {
208
+ for (const img of imgArray) {
209
+ const imgUrl = img.getAttribute('src');
210
+ const parts = imgUrl?.split('/');
211
+ if (!parts?.length) {
212
+ continue;
213
+ }
214
+ const targetFileName = parts.at(-1);
215
+ const file = imageFiles.find((f) => extractFileName(f.name) === targetFileName);
216
+ if (file) {
217
+ try {
218
+ const f = await file.file; // Wait for file resolution
219
+ await updateImageSize(f, targetFileName, img);
220
+ await updateImageSrc(f, img, updateSrc, file.fallback);
221
+ }
222
+ catch (error) {
223
+ console.error(`Error processing ${targetFileName}:`, error);
224
+ }
225
+ }
226
+ else {
227
+ const errorMessage = `${targetFileName} missing from doc`;
228
+ console.warn(errorMessage);
229
+ img.src = '';
230
+ img.alt = `WARNING! File ${targetFileName} was missing during import!`;
231
+ }
232
+ }
233
+ }
234
+ function extractFileName(fullPath) {
235
+ return fullPath.split('/').pop();
236
+ }
237
+ async function updateImageSize(f, targetFileName, img) {
238
+ let aspectRatio;
239
+ try {
240
+ aspectRatio = await getImageAspectRatio(f);
241
+ }
242
+ catch (e) {
243
+ console.warn(`Could not determine aspect ratio for ${targetFileName}`, e);
244
+ }
245
+ let width;
246
+ let height;
247
+ // Prefer explicit attributes if present
248
+ const widthAttr = img.style.getPropertyValue('width');
249
+ const heightAttr = img.style.getPropertyValue('height');
250
+ if (widthAttr) {
251
+ width = Number.parseFloat(widthAttr);
252
+ }
253
+ else if (img.width) {
254
+ width = img.width;
255
+ }
256
+ if (heightAttr) {
257
+ height = Number.parseFloat(heightAttr);
258
+ }
259
+ else if (img.height) {
260
+ height = img.height;
261
+ }
262
+ if (aspectRatio && width && !height) {
263
+ height = width / aspectRatio;
264
+ }
265
+ else if (aspectRatio && height && !width) {
266
+ width = height * aspectRatio;
267
+ }
268
+ if (width) {
269
+ width = Math.round(width);
270
+ img.width = width;
271
+ img.style.setProperty('width', String(width));
272
+ }
273
+ if (height) {
274
+ height = Math.round(height);
275
+ img.height = height;
276
+ img.style.setProperty('height', String(height));
277
+ }
278
+ }
279
+ async function getImageAspectRatio(file) {
280
+ return new Promise((resolve, reject) => {
281
+ const url = URL.createObjectURL(file);
282
+ const img = new Image();
283
+ img.onload = () => {
284
+ const aspectRatio = img.width / img.height;
285
+ URL.revokeObjectURL(url);
286
+ resolve(aspectRatio);
287
+ };
288
+ img.onerror = (err) => {
289
+ URL.revokeObjectURL(url);
290
+ reject(new Error('Failed to load image for aspect ratio calculation', {
291
+ cause: err,
292
+ }));
293
+ };
294
+ img.src = url;
295
+ });
296
+ }
package/types.d.ts ADDED
@@ -0,0 +1,9 @@
1
+ /**
2
+ * @license MIT
3
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
4
+ */
5
+ export interface Message {
6
+ type: string;
7
+ message: string;
8
+ }
9
+ export type MessageSink = (type: string, message: string) => void;
package/types.js ADDED
@@ -0,0 +1,5 @@
1
+ /**
2
+ * @license MIT
3
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
4
+ */
5
+ export {};
package/zip.utils.d.ts ADDED
@@ -0,0 +1,6 @@
1
+ /**
2
+ * @license MIT
3
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
4
+ */
5
+ import JSZip from 'jszip';
6
+ export declare function openZip(file: File): Promise<JSZip>;
package/zip.utils.js ADDED
@@ -0,0 +1,23 @@
1
+ /**
2
+ * @license MIT
3
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
4
+ */
5
+ import JSZip from 'jszip';
6
+ export async function openZip(file) {
7
+ const MAX_FILES = 10000;
8
+ const MAX_SIZE = 1073741824; // 1 GB
9
+ if (file.size > MAX_SIZE &&
10
+ !confirm(`zip is ${file.size / MAX_SIZE} GB. continue?`)) {
11
+ throw new Error('Size of the file is more than the limit 1GB');
12
+ }
13
+ const zip = await JSZip.loadAsync(file); //NOSONAR zip checked before parsing
14
+ const fileCount = Object.keys(zip.files).length;
15
+ if (fileCount > MAX_FILES &&
16
+ !confirm(`zip contains an excessive ${fileCount} files. continue?`)) {
17
+ throw new Error('Total number of files exceeded the limit 10000');
18
+ }
19
+ if (fileCount === 0) {
20
+ throw new Error('No files found in the zip');
21
+ }
22
+ return zip;
23
+ }