@modusoperandi/licit-import-utils 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -1,21 +1,21 @@
1
- MIT License
2
-
3
- Copyright (c) 2026 Modus Operandi Inc.
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Modus Operandi Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,2 @@
1
+ # licit-import-utils
2
+ This is a utility package for importing files like json or docx into Licit compatible documents.
package/index.d.ts CHANGED
@@ -4,5 +4,8 @@
4
4
  */
5
5
  export * from './types';
6
6
  export * from './licit-transform';
7
+ export * from './preprocess.utils';
7
8
  export * from './transform.docx';
9
+ export * from './transform.utils';
8
10
  export * from './transform.zip';
11
+ export { LicitDocumentJSON, LicitElementJSON } from './licit-elements';
package/index.js CHANGED
@@ -4,5 +4,7 @@
4
4
  */
5
5
  export * from './types';
6
6
  export * from './licit-transform';
7
+ export * from './preprocess.utils';
7
8
  export * from './transform.docx';
9
+ export * from './transform.utils';
8
10
  export * from './transform.zip';
@@ -3,8 +3,6 @@
3
3
  * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
4
4
  */
5
5
  import type { LicitDocumentJSON } from './licit-elements';
6
- import { LicitBulletListElement, LicitDocumentElement, LicitEnhancedImageElement, LicitTableRowElement } from './licit-elements';
7
- import type { UpdatedCapco } from './capco.util';
8
6
  import type { MessageSink } from './types';
9
7
  export interface ParserElement {
10
8
  node: Element;
@@ -13,12 +11,6 @@ export interface ParserElement {
13
11
  level: number;
14
12
  subText: string;
15
13
  }
16
- interface ImageInfo {
17
- src: string;
18
- alt: string;
19
- width: number;
20
- height: number;
21
- }
22
14
  declare enum ParserElementType {
23
15
  ChapterTitle = 0,
24
16
  ChapterSubtitle = 1,
@@ -88,73 +80,63 @@ export interface AddCellOptions {
88
80
  }
89
81
  export declare class LicitConverter {
90
82
  private readonly config;
91
- elementsParsedMap: Map<string, boolean>;
92
- elements: ParserElement[];
83
+ private readonly elementsParsedMap;
84
+ private elements;
93
85
  constructor(config: TransformConfig);
94
86
  parseHTML(html: Document, isDoctorine: boolean, moDocType?: string): LicitDocumentJSON;
95
87
  parseFrameMakerHTML5(html: Element[]): LicitDocumentJSON;
96
- render_FrameMakerHTML5_zip(nodes: NodeList, infoIconData?: HTMLOListElement[], _moDocType?: string, renderedContentList?: Node[]): LicitDocumentJSON;
97
- render_FrameMakerHTML5_zip_SwitchHelper(e: ParserElement, infoIconData: HTMLOListElement[], renderedContentList: Node[], isNumberReseted: boolean, licitDocument: LicitDocumentElement): boolean;
88
+ private render_FrameMakerHTML5_zip;
89
+ private render_FrameMakerHTML5_zip_SwitchHelper;
98
90
  private handleNodes;
99
- fetchRenderedContent(nodes: NodeList): Node[];
100
- /**
101
- * Returns a map elements which were parsed.
102
- *
103
- * @returns Map of elements
104
- */
105
- getElementsParsedMap(): Map<string, boolean>;
106
- getCustomStyle(styleName: string): StyleInfo | undefined;
107
- handleOrderedListItem(e: ParserElement, licitDocument: LicitDocumentElement): void;
91
+ private fetchRenderedContent;
92
+ private getCustomStyle;
93
+ private handleOrderedListItem;
108
94
  /**
109
95
  * Renders the HTML as a Licit JSON structure
110
96
  *
111
97
  * @returns The document as an `LicitDocumentJSON` object
112
98
  */
113
- render(nodes: NodeListOf<Element>): LicitDocumentJSON;
114
- renderSwitchHelper(e: ParserElement, licitDocument: LicitDocumentElement): void;
99
+ private render;
100
+ private renderSwitchHelper;
115
101
  private renderTable;
116
102
  private renderParagraph;
117
103
  private renderHeader;
118
104
  private buildElements;
119
- checkChildNode(node: HTMLElement | Element, nextNode: HTMLElement | Element): number;
120
- render_doc(nodes: NodeListOf<Element>, infoIconData: HTMLOListElement[] | undefined, moDocType: string): LicitDocumentJSON;
121
- render_docSwitchHelper(e: ParserElement, licitDocument: LicitDocumentElement, tocRemoved: boolean, infoIconData: HTMLOListElement[], moDocType: string): boolean;
122
- renderTypeParagraph(e: ParserElement, licitDocument: LicitDocumentElement, infoIconData?: HTMLOListElement[]): void;
123
- handle_UrlText(text: string, licitDocument: LicitDocumentElement, infoIconData?: HTMLOListElement[]): void;
124
- text_WithoutUrl(n: Node, licitDocument: LicitDocumentElement, infoIconData?: HTMLOListElement[]): void;
105
+ private checkChildNode;
106
+ private render_doc;
107
+ private render_docSwitchHelper;
108
+ private renderTypeParagraph;
109
+ private handle_UrlText;
110
+ private text_WithoutUrl;
125
111
  private handleNode;
126
- mergeSpans(node: Element, nextNode: Element): number;
127
- updateChildCapcoContent(e: ParserElement): void;
128
- updateChildCapcoContentLoopHelper(childNodes: ChildNode[], res: UpdatedCapco): void;
129
- processChildNodesCapco(childNodes: NodeListOf<ChildNode>): void;
130
- updateCapcoToParagraph(child: ChildNode, res: UpdatedCapco): void;
131
- processTableCapco(tableNode: HTMLTableElement): void;
132
- figureTitleCase(e: ParserElement, licitDocument: LicitDocumentElement): void;
133
- handleImageChild(child: Element, licitDocument: LicitDocumentElement): void;
134
- renderNewFigureTitle(e: ParserElement, licitDocument: LicitDocumentElement): void;
135
- figureParagraphCase(e: ParserElement, licitDocument: LicitDocumentElement, infoIconData: HTMLOListElement[] | undefined, renderedContentList: Node[]): void;
136
- figureNoteCase(e: ParserElement, licitDocument: LicitDocumentElement): void;
137
- figureTableTitleCase(e: ParserElement, licitDocument: LicitDocumentElement): void;
112
+ private mergeSpans;
113
+ private updateChildCapcoContent;
114
+ private updateChildCapcoContentLoopHelper;
115
+ private processChildNodesCapco;
116
+ private updateCapcoToParagraph;
117
+ private processTableCapco;
118
+ private figureTitleCase;
119
+ private handleImageChild;
120
+ private renderNewFigureTitle;
121
+ private figureParagraphCase;
122
+ private figureNoteCase;
123
+ private figureTableTitleCase;
138
124
  private renderDocVignet;
139
125
  private parseUntypedDocVignet;
140
126
  private parseTypedDocVignet;
141
- parseTypedDocVignetHelper(val: string, bgColor: string, borderColor: string, boxWidth: number): {
142
- bgColor: string;
143
- borderColor: string;
144
- boxWidth: number;
145
- };
127
+ private parseTypedDocVignetHelper;
146
128
  private renderDocTable;
147
129
  private renderEnhancedTable;
148
130
  private getLicitTable;
149
- renderNewLicitImage(imageElement: HTMLImageElement, capco: string | null): LicitEnhancedImageElement;
150
- renderDocBulletItems(e: ParserElement, licitDocument: LicitDocumentElement): void;
151
- processBulletNodes(childNodes: Node[], bulletList: LicitBulletListElement, licitDocument: any, indent: number, e: any): void;
152
- addElementLicit(licitDocument: any, bulletList: LicitBulletListElement): void;
153
- removeEmptyATags(node: Node): void;
131
+ private renderNewLicitImage;
132
+ private renderDocBulletItems;
133
+ private processBulletNodes;
134
+ private addElementLicit;
135
+ private removeEmptyATags;
154
136
  private handleULNode;
155
137
  private renderDocFigure;
156
- renderImage(imgElement: HTMLImageElement, licitDocument: LicitDocumentElement): void;
157
- parseOL(e: ParserElement, licitDocument: LicitDocumentElement): void;
138
+ private renderImage;
139
+ private parseOL;
158
140
  /**
159
141
  * To parse table data
160
142
  * @param e - element
@@ -166,114 +148,109 @@ export declare class LicitConverter {
166
148
  * @param isTransparent - flag to distinguish preface table
167
149
  * @returns void
168
150
  */
169
- parseTableContent(_e: any, tableTag: any, querySel: any, isChapterHeader: any, licitTable: any, widthArray: number[], isTransparent: boolean): void;
170
- parseTableContentInnerLoopHelper(cells: any, _cellIndex: number, isChapterHeader: boolean, licitRow: LicitTableRowElement, widthArray: number[], isTransparent: boolean): void;
151
+ private parseTableContent;
152
+ private parseTableContentInnerLoopHelper;
171
153
  private addCell;
172
- checkCellStyle(style: string | null): string | null;
154
+ private checkCellStyle;
173
155
  private addTableImageCell;
174
- ParseNestedList(_listType: string, node: ChildNode, licitDocument: LicitDocumentElement, indent: number): void;
156
+ private ParseNestedList;
175
157
  /**
176
158
  * Returns the level of an element as described by the number at the end of its classname
177
159
  *
178
160
  * @param className - The className of the element
179
161
  * @returns The level as a number or zero if the level cannot be determined
180
162
  */
181
- extractLevel(className: string): number;
163
+ private extractLevel;
182
164
  /**
183
165
  * Determines if an element is a table or image then calls the appropriate parse method
184
166
  */
185
- parseTableFigure(element: Element): void;
167
+ private parseTableFigure;
186
168
  /**
187
169
  * Parse a table element
188
170
  */
189
- parseTable(element: Element, useEnhancedTables: boolean): void;
171
+ private parseTable;
190
172
  /**
191
173
  * Parse a table element
192
174
  */
193
- parseVignet(element: Element): void;
175
+ private parseVignet;
194
176
  /**
195
177
  * Parse a figure (image) element
196
178
  */
197
- parseFigure(element: Element): void;
179
+ private parseFigure;
198
180
  /**
199
181
  * Parse a note element
200
182
  */
201
- parseNote(element: Element): void;
183
+ private parseNote;
202
184
  /**
203
185
  * Parse a hr element
204
186
  */
205
- parseHR(element: Element): void;
187
+ private parseHR;
206
188
  /**
207
189
  * Parse a chapter title element
208
190
  */
209
- parseChapterTitle(element: Element): void;
191
+ private parseChapterTitle;
210
192
  /**
211
193
  * Parse a chapter subtitle element
212
194
  */
213
- parseChapterSubtitle(element: Element): void;
195
+ private parseChapterSubtitle;
214
196
  /**
215
197
  * Parse a header element
216
198
  */
217
- parseHeader(element: Element, nextElement: Element): void;
199
+ private parseHeader;
218
200
  /**
219
201
  * Parse a bullet point item element
220
202
  */
221
- parseBullet(element: Element): void;
203
+ private parseBullet;
222
204
  /**
223
205
  * Parse a ordered list point item element
224
206
  */
225
- parseOrdered(element: Element): void;
207
+ private parseOrdered;
226
208
  /**
227
209
  * Parse a paragraph element
228
210
  */
229
- parseParagraph(element: Element): void;
230
- parseDynamicHeader(element: Element): void;
211
+ private parseParagraph;
212
+ private parseDynamicHeader;
231
213
  /** Sanitize the text content by removing specific characters */
232
- sanitizeText(element: Element): void;
214
+ private sanitizeText;
233
215
  /**
234
216
  * Parse a figure (image) title element
235
217
  */
236
- parseFigureTitle(element: Element): void;
218
+ private parseFigureTitle;
237
219
  /**
238
220
  * Parse a ChangeBarPara element
239
221
  */
240
- parseChangeBarPara(element: Element): void;
222
+ private parseChangeBarPara;
241
223
  /**
242
224
  * Parse a table title element
243
225
  */
244
- parseTableTitle(element: Element): void;
226
+ private parseTableTitle;
245
227
  /**
246
228
  * Parse an unknown element. Currently does nothing besides printing a warning to the console.
247
229
  */
248
- parseUnknownElement(element: Element, message: string): void;
230
+ private parseUnknownElement;
249
231
  /**
250
232
  * Parse a section title element
251
233
  */
252
- parseSectionTitle(element: Element): void;
234
+ private parseSectionTitle;
253
235
  /**
254
236
  * Parses an `Element` as determined by its `className`
255
237
  *
256
238
  * @param element - The `Element` to be parsed
257
239
  */
258
- parseElement(element: Element, nextElement: Element): void;
259
- parseElement_doc(element: Element, nextElement: Element): void;
240
+ private parseElement;
241
+ private parseElement_doc;
260
242
  /**
261
243
  * Cleans up the HTML by calling certain helper methods
262
244
  */
263
- sanitizeHTML(html: string): string;
264
- /**
265
- * Replaces characters in the HTML as defined by the `replacementChars` parameter in the config
266
- */
267
- replaceUnwantedChars(html: string): string;
245
+ private sanitizeHTML;
268
246
  /**
269
247
  * Replaces keywords in the HTML with links, as defined by the `replaceWithLinks` parameter in the config
270
248
  */
271
- replaceKeywordsWithLinks(html: string): string;
272
- matchClassToExcludeNumber(className: string): boolean;
273
- sanitizeElement(element: Element): void;
274
- removeLastNumber(inputString: string): string;
275
- getScaledWidth(width: number): string;
276
- isTransparentTable(element: Element): boolean;
249
+ private replaceKeywordsWithLinks;
250
+ private matchClassToExcludeNumber;
251
+ private sanitizeElement;
252
+ private getScaledWidth;
253
+ private isTransparentTable;
277
254
  /**
278
255
  * Extracts and calculates the column widths from a given HTML table element.
279
256
  *
@@ -286,24 +263,24 @@ export declare class LicitConverter {
286
263
  * @param {HTMLTableElement} table - The HTML table element from which column widths are to be extracted.
287
264
  * @returns {number[] | undefined} An array of column widths in pixels, or `undefined` if the widths are invalid or missing.
288
265
  */
289
- getColWidthArray(table: HTMLTableElement): number[] | undefined;
290
- setCellWidth(colSpan: number, cellIndex: number, colWidthArray: number[]): number[];
291
- scaleWidthArray(rawWidthArray: number[]): number[];
292
- getSumOfArray(array: number[]): number;
266
+ private getColWidthArray;
267
+ private setCellWidth;
268
+ private scaleWidthArray;
269
+ private getSumOfArray;
293
270
  /**
294
271
  * Determines the orientation (portrait or landscape) based on the total width.
295
272
  *
296
273
  * @param {number} totalWidth - The total width (in pixels) used to determine orientation.
297
274
  * @returns {'portrait' | 'landscape'} Returns 'portrait' if the width is less than 700 pixels; otherwise, returns 'landscape'.
298
275
  */
299
- findOrientation(totalWidth: number): 'portrait' | 'landscape';
276
+ private findOrientation;
300
277
  /**
301
278
  * Extracts image information from an HTMLImageElement.
302
279
  *
303
280
  * @param {HTMLImageElement} img - The image element to extract information from.
304
281
  * @returns {{ src: string; alt: string; width: number; height: number }} An object containing the image's source URL, alt text, width, and height.
305
282
  */
306
- extractImageInfo(img: HTMLImageElement): ImageInfo;
283
+ private extractImageInfo;
307
284
  /**
308
285
  * Extracts note paragraphs from the last row of an HTML table if that row
309
286
  * contains a note header such as "OVERALL NOTE:" or "NOTES:".
@@ -345,7 +322,7 @@ export declare class LicitConverter {
345
322
  * @param {Element} node - The DOM element to check.
346
323
  * @returns {boolean} `true` if the element qualifies as a table figure, otherwise `false`.
347
324
  */
348
- isTableFigureNode(node: Element): boolean;
325
+ private isTableFigureNode;
349
326
  /**
350
327
  * Determines whether the provided class name corresponds to a note-related node.
351
328
  *
@@ -260,14 +260,6 @@ export class LicitConverter {
260
260
  }
261
261
  return renderedArr;
262
262
  }
263
- /**
264
- * Returns a map elements which were parsed.
265
- *
266
- * @returns Map of elements
267
- */
268
- getElementsParsedMap() {
269
- return this.elementsParsedMap;
270
- }
271
263
  getCustomStyle(styleName) {
272
264
  return this.config.customStyles?.find((s) => s.styleName === styleName);
273
265
  }
@@ -467,7 +459,7 @@ export class LicitConverter {
467
459
  }
468
460
  const childNode = children[j];
469
461
  let nextChildNode = children[j + 1];
470
- // KNITE-1013: Handling paragraph combining logic for the case where
462
+ // Handling paragraph combining logic for the case where
471
463
  // heading is inside <OL>/<UL> and content is outside
472
464
  if (!nextChildNode &&
473
465
  (node.tagName === 'OL' || node.tagName === 'UL') &&
@@ -1894,16 +1886,6 @@ export class LicitConverter {
1894
1886
  sanitizeHTML(html) {
1895
1887
  return this.replaceKeywordsWithLinks(html);
1896
1888
  }
1897
- /**
1898
- * Replaces characters in the HTML as defined by the `replacementChars` parameter in the config
1899
- */
1900
- replaceUnwantedChars(html) {
1901
- const chars = this.config.replacementChars;
1902
- for (const char of chars) {
1903
- html = html.replace(char.find, char.replace);
1904
- }
1905
- return html;
1906
- }
1907
1889
  /**
1908
1890
  * Replaces keywords in the HTML with links, as defined by the `replaceWithLinks` parameter in the config
1909
1891
  */
@@ -1916,7 +1898,7 @@ export class LicitConverter {
1916
1898
  }
1917
1899
  return html;
1918
1900
  }
1919
- //FS : For skipping triming inside table, add more classes to the class list for future use
1901
+ // For skipping triming inside table, add more classes to the class list for future use
1920
1902
  matchClassToExcludeNumber(className) {
1921
1903
  let trimmedClassName = className.trim();
1922
1904
  trimmedClassName = trimmedClassName.toLowerCase();
@@ -1950,14 +1932,6 @@ export class LicitConverter {
1950
1932
  };
1951
1933
  stripTextContent(element);
1952
1934
  }
1953
- removeLastNumber(inputString) {
1954
- let lastNonDigitIndex = inputString.length - 1;
1955
- while (lastNonDigitIndex >= 0 &&
1956
- !Number.isNaN(Number.parseInt(inputString[lastNonDigitIndex]))) {
1957
- lastNonDigitIndex--;
1958
- }
1959
- return inputString.slice(0, lastNonDigitIndex + 1);
1960
- }
1961
1935
  getScaledWidth(width) {
1962
1936
  if (width <= 200) {
1963
1937
  return width.toString();
package/package.json CHANGED
@@ -1,52 +1,52 @@
1
- {
2
- "name": "@modusoperandi/licit-import-utils",
3
- "version": "0.1.0",
4
- "license": "MIT",
5
- "type": "module",
6
- "subversion": "1",
7
- "description": "A utility package for importing files like json or docx into Licit compatible documents",
8
- "main": "index.js",
9
- "types": "index.d.ts",
10
- "repository": {
11
- "type": "git",
12
- "url": "git+https://github.com/MO-Movia/licit-import-utils.git"
13
- },
14
- "scripts": {
15
- "test": "jest",
16
- "test:unit": "jest",
17
- "test:coverage": "jest --env=jsdom --coverage",
18
- "build:clean": "rm -rf dist/ && rm -f modusoperandi-*.*.*.tgz",
19
- "lint": "eslint src",
20
- "ci:build": "tsc -b tsconfig.prod.json --clean && tsc -b tsconfig.prod.json && npx copyfiles@2.4.1 package.json LICENSE dist",
21
- "ci:bom": "npx @cyclonedx/cyclonedx-npm --ignore-npm-errors --short-PURLs --output-format XML --output-file dist/bom.xml",
22
- "verify": "npm run lint -- --fix && npm run ci:build && npm run test:coverage && echo 'All Tests Passed!'"
23
- },
24
- "peerDependencies": {
25
- "@modusoperandi/mammoth": "^1.7.0-6",
26
- "jszip": "^3.10.1"
27
- },
28
- "peerDependenciesMeta": {
29
- "@modusoperandi/mammoth": {
30
- "optional": true
31
- },
32
- "jszip": {
33
- "optional": true
34
- }
35
- },
36
- "dependencies": {
37
- "uuid": "^13.0.0"
38
- },
39
- "devDependencies": {
40
- "@modusoperandi/mammoth": "^1.7.0-6",
41
- "@modusoperandi/eslint-config": "^3.0.3",
42
- "@types/jest": "^30.0.0",
43
- "jszip": "^3.10.1",
44
- "eslint": "^9.39.2",
45
- "jest": "^30.2.0",
46
- "jest-environment-jsdom": "^30.2.0",
47
- "jest-junit": "^16.0.0",
48
- "ts-jest": "^29.4.6",
49
- "ts-node": "^10.9.2",
50
- "typescript": "^5.9.3"
51
- }
52
- }
1
+ {
2
+ "name": "@modusoperandi/licit-import-utils",
3
+ "version": "0.1.1",
4
+ "license": "MIT",
5
+ "type": "module",
6
+ "subversion": "1",
7
+ "description": "A utility package for importing files like json or docx into Licit compatible documents",
8
+ "main": "index.js",
9
+ "types": "index.d.ts",
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "git+https://github.com/MO-Movia/licit-import-utils.git"
13
+ },
14
+ "scripts": {
15
+ "test": "jest",
16
+ "test:unit": "jest",
17
+ "test:coverage": "jest --env=jsdom --coverage",
18
+ "build:clean": "rm -rf dist/ && rm -f modusoperandi-*.*.*.tgz",
19
+ "lint": "eslint src",
20
+ "ci:build": "tsc -b tsconfig.prod.json --clean && tsc -b tsconfig.prod.json && npx copyfiles@2.4.1 package.json LICENSE README.md dist",
21
+ "ci:bom": "npx @cyclonedx/cyclonedx-npm --ignore-npm-errors --short-PURLs --output-format XML --output-file dist/bom.xml",
22
+ "verify": "npm run lint -- --fix && npm run ci:build && npm run test:coverage && echo 'All Tests Passed!'"
23
+ },
24
+ "peerDependencies": {
25
+ "@modusoperandi/mammoth": "^1.7.0-6",
26
+ "jszip": "^3.10.1"
27
+ },
28
+ "peerDependenciesMeta": {
29
+ "@modusoperandi/mammoth": {
30
+ "optional": true
31
+ },
32
+ "jszip": {
33
+ "optional": true
34
+ }
35
+ },
36
+ "dependencies": {
37
+ "uuid": "^13.0.0"
38
+ },
39
+ "devDependencies": {
40
+ "@modusoperandi/mammoth": "^1.7.0-6",
41
+ "@modusoperandi/eslint-config": "^3.0.3",
42
+ "@types/jest": "^30.0.0",
43
+ "jszip": "^3.10.1",
44
+ "eslint": "^9.39.2",
45
+ "jest": "^30.2.0",
46
+ "jest-environment-jsdom": "^30.2.0",
47
+ "jest-junit": "^16.0.0",
48
+ "ts-jest": "^29.4.6",
49
+ "ts-node": "^10.9.2",
50
+ "typescript": "^5.9.3"
51
+ }
52
+ }
@@ -0,0 +1,22 @@
1
+ /**
2
+ * @license MIT
3
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
4
+ */
5
+ import type { Message } from './types';
6
+ export declare function extractStylesForDoc(arrayBuffer: ArrayBuffer, docType: string): Promise<{
7
+ styles: string[];
8
+ }>;
9
+ export declare function extractUniqueStyleIds(data: Message[]): string[];
10
+ export declare function extractStylesForJSON(arrayBuffer: ArrayBuffer): Promise<{
11
+ content: string;
12
+ styles: string[];
13
+ }>;
14
+ export declare function collectStyles(obj: unknown, styles?: string[]): string[];
15
+ export declare function processHTML(arrayBuffer: ArrayBuffer): Promise<{
16
+ styles: string[];
17
+ }>;
18
+ export declare function extractStylesFromZip(zipFile: File): Promise<{
19
+ styles: string[];
20
+ }>;
21
+ export declare function arrayBufferToString(arrayBuffer: ArrayBuffer): string;
22
+ export declare function extractStyleNamesFromHTML(doc: Document): string[];
@@ -0,0 +1,105 @@
1
+ /**
2
+ * @license MIT
3
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
4
+ */
5
+ import JSZip from 'jszip';
6
+ import { DocxTransformer } from './transform.docx';
7
+ export async function extractStylesForDoc(arrayBuffer, docType) {
8
+ const messages = [];
9
+ // Convert the ArrayBuffer to HTML using Mammoth.js
10
+ await new DocxTransformer(docType, (type, message) => messages.push({ type, message })).transform(arrayBuffer);
11
+ // Extract styles from the HTML (adapt as needed for your styling approach)
12
+ const styles = extractUniqueStyleIds(messages);
13
+ return { styles };
14
+ }
15
+ export function extractUniqueStyleIds(data) {
16
+ const styleIds = [];
17
+ data ??= [];
18
+ for (const item of data) {
19
+ const match = new RegExp(/Style ID: (.{0,100}?)(?=\))/).exec(item.message);
20
+ const styleId = match?.[1];
21
+ if (styleId && !styleIds.includes(styleId)) {
22
+ styleIds.push(styleId);
23
+ }
24
+ }
25
+ return styleIds;
26
+ }
27
+ export function extractStylesForJSON(arrayBuffer) {
28
+ const decoder = new TextDecoder('utf-8');
29
+ const content = decoder.decode(arrayBuffer);
30
+ const jsonObject = JSON.parse(content);
31
+ if (typeof jsonObject !== 'object' || jsonObject === null) {
32
+ throw new Error('Invalid JSON document');
33
+ }
34
+ const styles = [];
35
+ collectStyles(jsonObject, styles);
36
+ return Promise.resolve({ content, styles });
37
+ }
38
+ // Preprocessor to handle the JSON formatted documents
39
+ export function collectStyles(obj, styles = []) {
40
+ if (typeof obj !== 'object' || obj === null) {
41
+ return styles;
42
+ }
43
+ for (const [key, value] of Object.entries(obj)) {
44
+ if (typeof value === 'object' && value !== null) {
45
+ // Recursively traverse nested objects
46
+ collectStyles(value, styles);
47
+ }
48
+ else if (key === 'styleName' &&
49
+ typeof value === 'string' &&
50
+ !styles.includes(value)) {
51
+ // Add the style name to the list if it's not already included
52
+ styles.push(value);
53
+ }
54
+ }
55
+ return styles;
56
+ }
57
+ export function processHTML(arrayBuffer) {
58
+ return new Promise((resolve) => {
59
+ const content = arrayBufferToString(arrayBuffer);
60
+ // Use DOMParser to parse HTML content
61
+ const parser = new DOMParser();
62
+ const doc = parser.parseFromString(content, 'text/html');
63
+ // Extract style names using regular expressions
64
+ const styleNames = extractStyleNamesFromHTML(doc);
65
+ resolve({ styles: styleNames });
66
+ });
67
+ }
68
+ export async function extractStylesFromZip(zipFile) {
69
+ const MAX_FILES = 10000;
70
+ const MAX_SIZE = 1073741824; // 1 GB
71
+ if (zipFile.size > MAX_SIZE &&
72
+ !confirm(`zip is ${zipFile.size / MAX_SIZE} GB. continue?`)) {
73
+ throw new Error('Size of the file is more than the limit 25 mb');
74
+ }
75
+ const loadedZip = await JSZip.loadAsync(zipFile); //NOSONAR size validated. Safe to extract.
76
+ // Check if the total number of files exceeds the limit
77
+ const totalFiles = Object.keys(loadedZip.files).length;
78
+ if (totalFiles > MAX_FILES &&
79
+ !confirm(`zip contains an excessive ${totalFiles} files. continue?`)) {
80
+ throw new Error(`Number of files in the zip (${totalFiles}) exceeds the limit (${MAX_FILES})`);
81
+ }
82
+ const htmlFiles = Object.keys(loadedZip.files).filter((fileName) => fileName.endsWith('.htm'));
83
+ let combinedStyles = [];
84
+ for (const fileName of htmlFiles) {
85
+ const arrayBuffer = await loadedZip.files[fileName].async('arraybuffer');
86
+ const { styles } = await processHTML(arrayBuffer);
87
+ // Combine styles
88
+ combinedStyles = [...new Set([...combinedStyles, ...styles])];
89
+ }
90
+ return { styles: combinedStyles };
91
+ }
92
+ export function arrayBufferToString(arrayBuffer) {
93
+ return new TextDecoder().decode(new Uint8Array(arrayBuffer));
94
+ }
95
+ export function extractStyleNamesFromHTML(doc) {
96
+ const styleNames = [];
97
+ // Extract class names from HTML elements and add to style names
98
+ const elementsWithClass = doc.querySelectorAll('[class]');
99
+ for (const element of Array.from(elementsWithClass)) {
100
+ const classes = element.className.split(/\s{1,100}/); // Split by whitespace
101
+ styleNames.push(...classes);
102
+ }
103
+ // Return unique style names
104
+ return [...new Set(styleNames)];
105
+ }
package/transform.zip.js CHANGED
@@ -154,25 +154,32 @@ async function loopHTMLFiles(htmlFiles, updateSrc) {
154
154
  const processedHtmlContents = (await Promise.all(htmlFiles.files
155
155
  .filter((htmlFile) => !!htmlFile)
156
156
  .map((f) => processFile(f, htmlFiles.imageFiles, updateSrc)))).filter((x) => x?.length);
157
+ if (processedHtmlContents.length === 0 && htmlFiles.files.length > 0) {
158
+ throw new Error(`File contents are empty`);
159
+ }
157
160
  return sortedNodeList(processedHtmlContents);
158
161
  }
159
162
  async function processFile(file, imageFiles, updateSrc) {
160
163
  const htmlContent = await file.content();
161
164
  const htmlFileName = file.name ?? 'Unknown file';
165
+ // Reject files with zero bytes
166
+ if (!htmlContent?.length) {
167
+ throw new Error(`File ${htmlFileName} has zero bytes`);
168
+ }
162
169
  // Get content before <head> (first 1000 chars should be enough)
163
170
  const beforeHead = htmlContent.substring(0, 1000);
164
- // Check 1: Reject old DOCTYPE declarations
171
+ // Reject old DOCTYPE declarations
165
172
  if (beforeHead.includes('<!DOCTYPE HTML PUBLIC')) {
166
- throw new Error(`Incorrect file format: ${htmlFileName}`);
173
+ throw new Error(`Incorrect file format (was "!DOCTYPE HTML PUBLIC"): ${htmlFileName}`);
167
174
  }
168
- // Check 2: Reject XML declarations (XHTML format)
175
+ // Reject XML declarations (XHTML format)
169
176
  if (beforeHead.trimStart().startsWith('<?xml')) {
170
- throw new Error(`Incorrect file format: ${htmlFileName}`);
177
+ throw new Error(`Incorrect file format (was "XHTML"): ${htmlFileName}`);
171
178
  }
172
- // Check 3: Must have <html lang="...">
179
+ // Must have <html lang="...">
173
180
  // Option A: Exact match for en-US
174
181
  if (!beforeHead.includes('<html lang="en-US">')) {
175
- throw new Error(`Incorrect file format: ${htmlFileName}`);
182
+ throw new Error(`Incorrect file format (missing "<html lang=..."): ${htmlFileName}`);
176
183
  }
177
184
  const domCollection = new DOMParser().parseFromString(htmlContent, 'text/html');
178
185
  //Get the title text