@modusoperandi/licit-import-utils 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -1,21 +1,21 @@
1
- MIT License
2
-
3
- Copyright (c) 2026 Modus Operandi Inc.
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Modus Operandi Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,2 @@
1
+ # licit-import-utils
2
+ This is a utility package for importing files like json or docx into Licit compatible documents.
package/index.d.ts CHANGED
@@ -4,5 +4,8 @@
4
4
  */
5
5
  export * from './types';
6
6
  export * from './licit-transform';
7
+ export * from './preprocess.utils';
7
8
  export * from './transform.docx';
9
+ export * from './transform.utils';
8
10
  export * from './transform.zip';
11
+ export { LicitDocumentJSON, LicitElementJSON } from './licit-elements';
package/index.js CHANGED
@@ -4,5 +4,7 @@
4
4
  */
5
5
  export * from './types';
6
6
  export * from './licit-transform';
7
+ export * from './preprocess.utils';
7
8
  export * from './transform.docx';
9
+ export * from './transform.utils';
8
10
  export * from './transform.zip';
@@ -108,6 +108,9 @@ interface LicitTableAttrsJSON extends LicitElementAttrsJSON {
108
108
  marginLeft: null;
109
109
  vignette: boolean;
110
110
  }
111
+ interface LicitTableRowAttrsJSON extends LicitElementAttrsJSON {
112
+ rowHeight: string;
113
+ }
111
114
  interface LicitTableCellAttrsJSON extends LicitElementAttrsJSON {
112
115
  colspan: number;
113
116
  rowspan: number;
@@ -134,6 +137,7 @@ interface LicitTableCellImageJSON extends LicitElementJSON {
134
137
  }
135
138
  interface LicitTableRowJSON extends LicitElementJSON {
136
139
  type: 'table_row';
140
+ attrs: LicitTableRowAttrsJSON;
137
141
  content: LicitTableCellJSON[];
138
142
  }
139
143
  export interface LicitTableJSON extends LicitElementJSON {
@@ -159,6 +163,15 @@ interface LicitOrderedListJSON extends LicitElementJSON {
159
163
  attrs: LicitBulletListAttrsJSON;
160
164
  content: LicitBulletListItemJSON[];
161
165
  }
166
+ interface CellStyleInfo {
167
+ className?: string;
168
+ id?: string;
169
+ marginTop?: string;
170
+ marginBottom?: string;
171
+ fontSize?: string;
172
+ letterSpacing?: string;
173
+ cellWidth?: string;
174
+ }
162
175
  export interface LicitDocumentJSON extends LicitElementJSON {
163
176
  type: 'doc';
164
177
  attrs: LicitDocumentAttrsJSON;
@@ -627,7 +640,8 @@ export declare class LicitTableCellParagraph extends LicitElement {
627
640
  colWidth: [number];
628
641
  content: any[];
629
642
  vAlign: string;
630
- constructor(node: HTMLElement, bgColor?: string, colwidth?: [number], vericalAlignment?: string);
643
+ cellStyleInfo?: CellStyleInfo;
644
+ constructor(node: HTMLElement, bgColor?: string, colwidth?: [number], vericalAlignment?: string, cellStyleInfo?: CellStyleInfo);
631
645
  render(): {
632
646
  type: string;
633
647
  attrs: {
@@ -684,7 +698,8 @@ export declare class LicitTableCellImageElement extends LicitElement {
684
698
  alt: string;
685
699
  fillImg: number;
686
700
  fitToParent: number;
687
- constructor(src: string, fillImg: number, fitToParent: number, bgColor?: string, imgHeight?: string, colWidth?: [number], alt?: string);
701
+ cellStyleInfo?: CellStyleInfo;
702
+ constructor(src: string, fillImg: number, fitToParent: number, bgColor?: string, imgHeight?: string, colWidth?: [number], alt?: string, cellStyleInfo?: CellStyleInfo);
688
703
  render(): LicitTableCellImageJSON;
689
704
  }
690
705
  export declare class LicitVignetteElement extends LicitElement {
@@ -747,7 +762,8 @@ export declare class LicitTableCellParaElement extends LicitElement {
747
762
  vAlign: string;
748
763
  isTableHeader: boolean;
749
764
  isTransparentTable: boolean;
750
- constructor(node: HTMLElement, bgColor?: string, colwidth?: [number], vericalAlignment?: string, isTableHeader?: boolean, isTransparentTable?: boolean);
765
+ cellStyleInfo?: CellStyleInfo;
766
+ constructor(node: HTMLElement, bgColor?: string, colwidth?: [number], vericalAlignment?: string, isTableHeader?: boolean, isTransparentTable?: boolean, cellStyleInfo?: CellStyleInfo);
751
767
  ConvertElements(node: HTMLElement): void;
752
768
  processChildNode(childNode: ChildNode): void;
753
769
  processChildOL(childNode: ChildNode): void;
@@ -759,6 +775,8 @@ export declare class LicitTableCellParaElement extends LicitElement {
759
775
  render(): LicitTableCellJSON;
760
776
  }
761
777
  export declare class LicitTableRowElement extends LicitElement {
778
+ height?: string;
779
+ rowHeight?: string;
762
780
  getBaseElement(): LicitTableRowJSON;
763
781
  cells: LicitTableCellElement[];
764
782
  addCell(cell: LicitTableCellElement): void;
@@ -769,6 +787,8 @@ export declare class LicitTableElement extends LicitElement {
769
787
  rows: LicitTableRowElement[];
770
788
  isVignette: boolean;
771
789
  capco?: string;
790
+ noOfColumns?: number;
791
+ tableHeight?: string;
772
792
  constructor(isVignette?: boolean, capco?: string);
773
793
  addRow(row: LicitTableRowElement): void;
774
794
  render(): LicitTableJSON;
package/licit-elements.js CHANGED
@@ -1876,11 +1876,13 @@ export class LicitTableCellParagraph extends LicitElement {
1876
1876
  colWidth;
1877
1877
  content = [];
1878
1878
  vAlign;
1879
- constructor(node, bgColor, colwidth, vericalAlignment) {
1879
+ cellStyleInfo;
1880
+ constructor(node, bgColor, colwidth, vericalAlignment, cellStyleInfo) {
1880
1881
  super();
1881
1882
  this.bgColor = bgColor;
1882
1883
  this.colWidth = colwidth;
1883
1884
  this.vAlign = vericalAlignment;
1885
+ this.cellStyleInfo = cellStyleInfo;
1884
1886
  const paragraph = new NewLicitParagraphElement(node, null);
1885
1887
  if (paragraph) {
1886
1888
  this.content.push(paragraph.render());
@@ -1996,7 +1998,8 @@ export class LicitTableCellImageElement extends LicitElement {
1996
1998
  alt;
1997
1999
  fillImg;
1998
2000
  fitToParent;
1999
- constructor(src, fillImg, fitToParent, bgColor, imgHeight, colWidth, alt) {
2001
+ cellStyleInfo;
2002
+ constructor(src, fillImg, fitToParent, bgColor, imgHeight, colWidth, alt, cellStyleInfo) {
2000
2003
  super();
2001
2004
  this.src = src;
2002
2005
  this.bgColor = bgColor;
@@ -2005,6 +2008,7 @@ export class LicitTableCellImageElement extends LicitElement {
2005
2008
  this.alt = alt;
2006
2009
  this.fillImg = fillImg;
2007
2010
  this.fitToParent = fitToParent;
2011
+ this.cellStyleInfo = cellStyleInfo;
2008
2012
  }
2009
2013
  render() {
2010
2014
  const element = this.getBaseElement();
@@ -2165,6 +2169,12 @@ export class LicitTableCellParaElement extends LicitElement {
2165
2169
  colwidth: this.colWidth || defaultColWidth,
2166
2170
  background: this.bgColor || defaultBgColor,
2167
2171
  vAlign: this.vAlign || 'middle',
2172
+ cellWidth: this.cellStyleInfo?.cellWidth ?? null,
2173
+ cellStyle: this.cellStyleInfo?.className ?? null,
2174
+ fontSize: this.cellStyleInfo?.fontSize ?? null,
2175
+ letterSpacing: this.cellStyleInfo?.letterSpacing ?? null,
2176
+ marginTop: this.cellStyleInfo?.marginTop ?? null,
2177
+ marginBottom: this.cellStyleInfo?.marginBottom ?? null,
2168
2178
  },
2169
2179
  content: [],
2170
2180
  };
@@ -2178,13 +2188,15 @@ export class LicitTableCellParaElement extends LicitElement {
2178
2188
  vAlign;
2179
2189
  isTableHeader;
2180
2190
  isTransparentTable;
2181
- constructor(node, bgColor, colwidth, vericalAlignment, isTableHeader, isTransparentTable) {
2191
+ cellStyleInfo;
2192
+ constructor(node, bgColor, colwidth, vericalAlignment, isTableHeader, isTransparentTable, cellStyleInfo) {
2182
2193
  super();
2183
2194
  this.bgColor = bgColor;
2184
2195
  this.colWidth = colwidth;
2185
2196
  this.vAlign = vericalAlignment;
2186
2197
  this.isTableHeader = isTableHeader;
2187
2198
  this.isTransparentTable = isTransparentTable;
2199
+ this.cellStyleInfo = cellStyleInfo;
2188
2200
  this.ConvertElements(node);
2189
2201
  }
2190
2202
  ConvertElements(node) {
@@ -2299,9 +2311,14 @@ export class LicitTableCellParaElement extends LicitElement {
2299
2311
  }
2300
2312
  }
2301
2313
  export class LicitTableRowElement extends LicitElement {
2314
+ height;
2315
+ rowHeight;
2302
2316
  getBaseElement() {
2303
2317
  return {
2304
2318
  type: 'table_row',
2319
+ attrs: {
2320
+ rowHeight: this.rowHeight,
2321
+ },
2305
2322
  content: [],
2306
2323
  };
2307
2324
  }
@@ -2325,6 +2342,8 @@ export class LicitTableElement extends LicitElement {
2325
2342
  marginLeft: null,
2326
2343
  vignette: this.isVignette,
2327
2344
  capco: this.capco,
2345
+ noOfColumns: this.noOfColumns ?? null,
2346
+ tableHeight: this.tableHeight ?? null,
2328
2347
  },
2329
2348
  content: [],
2330
2349
  };
@@ -2332,6 +2351,8 @@ export class LicitTableElement extends LicitElement {
2332
2351
  rows = [];
2333
2352
  isVignette = false;
2334
2353
  capco;
2354
+ noOfColumns;
2355
+ tableHeight;
2335
2356
  constructor(isVignette, capco) {
2336
2357
  super();
2337
2358
  this.isVignette = isVignette;
@@ -3,8 +3,6 @@
3
3
  * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
4
4
  */
5
5
  import type { LicitDocumentJSON } from './licit-elements';
6
- import { LicitBulletListElement, LicitDocumentElement, LicitEnhancedImageElement, LicitTableRowElement } from './licit-elements';
7
- import type { UpdatedCapco } from './capco.util';
8
6
  import type { MessageSink } from './types';
9
7
  export interface ParserElement {
10
8
  node: Element;
@@ -13,12 +11,6 @@ export interface ParserElement {
13
11
  level: number;
14
12
  subText: string;
15
13
  }
16
- interface ImageInfo {
17
- src: string;
18
- alt: string;
19
- width: number;
20
- height: number;
21
- }
22
14
  declare enum ParserElementType {
23
15
  ChapterTitle = 0,
24
16
  ChapterSubtitle = 1,
@@ -88,73 +80,63 @@ export interface AddCellOptions {
88
80
  }
89
81
  export declare class LicitConverter {
90
82
  private readonly config;
91
- elementsParsedMap: Map<string, boolean>;
92
- elements: ParserElement[];
83
+ private readonly elementsParsedMap;
84
+ private elements;
93
85
  constructor(config: TransformConfig);
94
86
  parseHTML(html: Document, isDoctorine: boolean, moDocType?: string): LicitDocumentJSON;
95
87
  parseFrameMakerHTML5(html: Element[]): LicitDocumentJSON;
96
- render_FrameMakerHTML5_zip(nodes: NodeList, infoIconData?: HTMLOListElement[], _moDocType?: string, renderedContentList?: Node[]): LicitDocumentJSON;
97
- render_FrameMakerHTML5_zip_SwitchHelper(e: ParserElement, infoIconData: HTMLOListElement[], renderedContentList: Node[], isNumberReseted: boolean, licitDocument: LicitDocumentElement): boolean;
88
+ private render_FrameMakerHTML5_zip;
89
+ private render_FrameMakerHTML5_zip_SwitchHelper;
98
90
  private handleNodes;
99
- fetchRenderedContent(nodes: NodeList): Node[];
100
- /**
101
- * Returns a map elements which were parsed.
102
- *
103
- * @returns Map of elements
104
- */
105
- getElementsParsedMap(): Map<string, boolean>;
106
- getCustomStyle(styleName: string): StyleInfo | undefined;
107
- handleOrderedListItem(e: ParserElement, licitDocument: LicitDocumentElement): void;
91
+ private fetchRenderedContent;
92
+ private getCustomStyle;
93
+ private handleOrderedListItem;
108
94
  /**
109
95
  * Renders the HTML as a Licit JSON structure
110
96
  *
111
97
  * @returns The document as an `LicitDocumentJSON` object
112
98
  */
113
- render(nodes: NodeListOf<Element>): LicitDocumentJSON;
114
- renderSwitchHelper(e: ParserElement, licitDocument: LicitDocumentElement): void;
99
+ private render;
100
+ private renderSwitchHelper;
115
101
  private renderTable;
116
102
  private renderParagraph;
117
103
  private renderHeader;
118
104
  private buildElements;
119
- checkChildNode(node: HTMLElement | Element, nextNode: HTMLElement | Element): number;
120
- render_doc(nodes: NodeListOf<Element>, infoIconData: HTMLOListElement[] | undefined, moDocType: string): LicitDocumentJSON;
121
- render_docSwitchHelper(e: ParserElement, licitDocument: LicitDocumentElement, tocRemoved: boolean, infoIconData: HTMLOListElement[], moDocType: string): boolean;
122
- renderTypeParagraph(e: ParserElement, licitDocument: LicitDocumentElement, infoIconData?: HTMLOListElement[]): void;
123
- handle_UrlText(text: string, licitDocument: LicitDocumentElement, infoIconData?: HTMLOListElement[]): void;
124
- text_WithoutUrl(n: Node, licitDocument: LicitDocumentElement, infoIconData?: HTMLOListElement[]): void;
105
+ private checkChildNode;
106
+ private render_doc;
107
+ private render_docSwitchHelper;
108
+ private renderTypeParagraph;
109
+ private handle_UrlText;
110
+ private text_WithoutUrl;
125
111
  private handleNode;
126
- mergeSpans(node: Element, nextNode: Element): number;
127
- updateChildCapcoContent(e: ParserElement): void;
128
- updateChildCapcoContentLoopHelper(childNodes: ChildNode[], res: UpdatedCapco): void;
129
- processChildNodesCapco(childNodes: NodeListOf<ChildNode>): void;
130
- updateCapcoToParagraph(child: ChildNode, res: UpdatedCapco): void;
131
- processTableCapco(tableNode: HTMLTableElement): void;
132
- figureTitleCase(e: ParserElement, licitDocument: LicitDocumentElement): void;
133
- handleImageChild(child: Element, licitDocument: LicitDocumentElement): void;
134
- renderNewFigureTitle(e: ParserElement, licitDocument: LicitDocumentElement): void;
135
- figureParagraphCase(e: ParserElement, licitDocument: LicitDocumentElement, infoIconData: HTMLOListElement[] | undefined, renderedContentList: Node[]): void;
136
- figureNoteCase(e: ParserElement, licitDocument: LicitDocumentElement): void;
137
- figureTableTitleCase(e: ParserElement, licitDocument: LicitDocumentElement): void;
112
+ private mergeSpans;
113
+ private updateChildCapcoContent;
114
+ private updateChildCapcoContentLoopHelper;
115
+ private processChildNodesCapco;
116
+ private updateCapcoToParagraph;
117
+ private processTableCapco;
118
+ private figureTitleCase;
119
+ private handleImageChild;
120
+ private renderNewFigureTitle;
121
+ private figureParagraphCase;
122
+ private figureNoteCase;
123
+ private figureTableTitleCase;
138
124
  private renderDocVignet;
139
125
  private parseUntypedDocVignet;
140
126
  private parseTypedDocVignet;
141
- parseTypedDocVignetHelper(val: string, bgColor: string, borderColor: string, boxWidth: number): {
142
- bgColor: string;
143
- borderColor: string;
144
- boxWidth: number;
145
- };
127
+ private parseTypedDocVignetHelper;
146
128
  private renderDocTable;
147
129
  private renderEnhancedTable;
148
130
  private getLicitTable;
149
- renderNewLicitImage(imageElement: HTMLImageElement, capco: string | null): LicitEnhancedImageElement;
150
- renderDocBulletItems(e: ParserElement, licitDocument: LicitDocumentElement): void;
151
- processBulletNodes(childNodes: Node[], bulletList: LicitBulletListElement, licitDocument: any, indent: number, e: any): void;
152
- addElementLicit(licitDocument: any, bulletList: LicitBulletListElement): void;
153
- removeEmptyATags(node: Node): void;
131
+ private renderNewLicitImage;
132
+ private renderDocBulletItems;
133
+ private processBulletNodes;
134
+ private addElementLicit;
135
+ private removeEmptyATags;
154
136
  private handleULNode;
155
137
  private renderDocFigure;
156
- renderImage(imgElement: HTMLImageElement, licitDocument: LicitDocumentElement): void;
157
- parseOL(e: ParserElement, licitDocument: LicitDocumentElement): void;
138
+ private renderImage;
139
+ private parseOL;
158
140
  /**
159
141
  * To parse table data
160
142
  * @param e - element
@@ -166,114 +148,131 @@ export declare class LicitConverter {
166
148
  * @param isTransparent - flag to distinguish preface table
167
149
  * @returns void
168
150
  */
169
- parseTableContent(_e: any, tableTag: any, querySel: any, isChapterHeader: any, licitTable: any, widthArray: number[], isTransparent: boolean): void;
170
- parseTableContentInnerLoopHelper(cells: any, _cellIndex: number, isChapterHeader: boolean, licitRow: LicitTableRowElement, widthArray: number[], isTransparent: boolean): void;
151
+ private parseTableContent;
152
+ private parseTableContentInnerLoopHelper;
171
153
  private addCell;
154
+ /**
155
+ * Extracts style information from a table cell element per the ingest requirements.
156
+ * Captures: margins (top/bottom), font-size overrides, and letter-spacing for non-breaking spaces.
157
+ *
158
+ * @param cell - The HTMLTableCellElement to extract styles from
159
+ * @returns Object containing extracted style information
160
+ */
161
+ private extractCellStyles;
162
+ /**
163
+ * Extracts margin and font-size properties from a style string.
164
+ *
165
+ * @param style - The style attribute string
166
+ * @param styleInfo - The style info object to populate
167
+ */
168
+ private extractParagraphStyles;
169
+ /**
170
+ * Extracts the first letter-spacing value from spans containing non-breaking spaces.
171
+ *
172
+ * @param spans - NodeList of span elements with letter-spacing styles
173
+ * @param styleInfo - The style info object to populate
174
+ */
175
+ private extractLetterSpacing;
172
176
  checkCellStyle(style: string | null): string | null;
173
177
  private addTableImageCell;
174
- ParseNestedList(_listType: string, node: ChildNode, licitDocument: LicitDocumentElement, indent: number): void;
178
+ private ParseNestedList;
175
179
  /**
176
180
  * Returns the level of an element as described by the number at the end of its classname
177
181
  *
178
182
  * @param className - The className of the element
179
183
  * @returns The level as a number or zero if the level cannot be determined
180
184
  */
181
- extractLevel(className: string): number;
185
+ private extractLevel;
182
186
  /**
183
187
  * Determines if an element is a table or image then calls the appropriate parse method
184
188
  */
185
- parseTableFigure(element: Element): void;
189
+ private parseTableFigure;
186
190
  /**
187
191
  * Parse a table element
188
192
  */
189
- parseTable(element: Element, useEnhancedTables: boolean): void;
193
+ private parseTable;
190
194
  /**
191
195
  * Parse a table element
192
196
  */
193
- parseVignet(element: Element): void;
197
+ private parseVignet;
194
198
  /**
195
199
  * Parse a figure (image) element
196
200
  */
197
- parseFigure(element: Element): void;
201
+ private parseFigure;
198
202
  /**
199
203
  * Parse a note element
200
204
  */
201
- parseNote(element: Element): void;
205
+ private parseNote;
202
206
  /**
203
207
  * Parse a hr element
204
208
  */
205
- parseHR(element: Element): void;
209
+ private parseHR;
206
210
  /**
207
211
  * Parse a chapter title element
208
212
  */
209
- parseChapterTitle(element: Element): void;
213
+ private parseChapterTitle;
210
214
  /**
211
215
  * Parse a chapter subtitle element
212
216
  */
213
- parseChapterSubtitle(element: Element): void;
217
+ private parseChapterSubtitle;
214
218
  /**
215
219
  * Parse a header element
216
220
  */
217
- parseHeader(element: Element, nextElement: Element): void;
221
+ private parseHeader;
218
222
  /**
219
223
  * Parse a bullet point item element
220
224
  */
221
- parseBullet(element: Element): void;
225
+ private parseBullet;
222
226
  /**
223
227
  * Parse a ordered list point item element
224
228
  */
225
- parseOrdered(element: Element): void;
229
+ private parseOrdered;
226
230
  /**
227
231
  * Parse a paragraph element
228
232
  */
229
- parseParagraph(element: Element): void;
230
- parseDynamicHeader(element: Element): void;
233
+ private parseParagraph;
234
+ private parseDynamicHeader;
231
235
  /** Sanitize the text content by removing specific characters */
232
- sanitizeText(element: Element): void;
236
+ private sanitizeText;
233
237
  /**
234
238
  * Parse a figure (image) title element
235
239
  */
236
- parseFigureTitle(element: Element): void;
240
+ private parseFigureTitle;
237
241
  /**
238
242
  * Parse a ChangeBarPara element
239
243
  */
240
- parseChangeBarPara(element: Element): void;
244
+ private parseChangeBarPara;
241
245
  /**
242
246
  * Parse a table title element
243
247
  */
244
- parseTableTitle(element: Element): void;
248
+ private parseTableTitle;
245
249
  /**
246
250
  * Parse an unknown element. Currently does nothing besides printing a warning to the console.
247
251
  */
248
- parseUnknownElement(element: Element, message: string): void;
252
+ private parseUnknownElement;
249
253
  /**
250
254
  * Parse a section title element
251
255
  */
252
- parseSectionTitle(element: Element): void;
256
+ private parseSectionTitle;
253
257
  /**
254
258
  * Parses an `Element` as determined by its `className`
255
259
  *
256
260
  * @param element - The `Element` to be parsed
257
261
  */
258
- parseElement(element: Element, nextElement: Element): void;
259
- parseElement_doc(element: Element, nextElement: Element): void;
262
+ private parseElement;
263
+ private parseElement_doc;
260
264
  /**
261
265
  * Cleans up the HTML by calling certain helper methods
262
266
  */
263
- sanitizeHTML(html: string): string;
264
- /**
265
- * Replaces characters in the HTML as defined by the `replacementChars` parameter in the config
266
- */
267
- replaceUnwantedChars(html: string): string;
267
+ private sanitizeHTML;
268
268
  /**
269
269
  * Replaces keywords in the HTML with links, as defined by the `replaceWithLinks` parameter in the config
270
270
  */
271
- replaceKeywordsWithLinks(html: string): string;
272
- matchClassToExcludeNumber(className: string): boolean;
273
- sanitizeElement(element: Element): void;
274
- removeLastNumber(inputString: string): string;
275
- getScaledWidth(width: number): string;
276
- isTransparentTable(element: Element): boolean;
271
+ private replaceKeywordsWithLinks;
272
+ private matchClassToExcludeNumber;
273
+ private sanitizeElement;
274
+ private getScaledWidth;
275
+ private isTransparentTable;
277
276
  /**
278
277
  * Extracts and calculates the column widths from a given HTML table element.
279
278
  *
@@ -286,24 +285,24 @@ export declare class LicitConverter {
286
285
  * @param {HTMLTableElement} table - The HTML table element from which column widths are to be extracted.
287
286
  * @returns {number[] | undefined} An array of column widths in pixels, or `undefined` if the widths are invalid or missing.
288
287
  */
289
- getColWidthArray(table: HTMLTableElement): number[] | undefined;
290
- setCellWidth(colSpan: number, cellIndex: number, colWidthArray: number[]): number[];
291
- scaleWidthArray(rawWidthArray: number[]): number[];
292
- getSumOfArray(array: number[]): number;
288
+ private getColWidthArray;
289
+ private setCellWidth;
290
+ private scaleWidthArray;
291
+ private getSumOfArray;
293
292
  /**
294
293
  * Determines the orientation (portrait or landscape) based on the total width.
295
294
  *
296
295
  * @param {number} totalWidth - The total width (in pixels) used to determine orientation.
297
296
  * @returns {'portrait' | 'landscape'} Returns 'portrait' if the width is less than 700 pixels; otherwise, returns 'landscape'.
298
297
  */
299
- findOrientation(totalWidth: number): 'portrait' | 'landscape';
298
+ private findOrientation;
300
299
  /**
301
300
  * Extracts image information from an HTMLImageElement.
302
301
  *
303
302
  * @param {HTMLImageElement} img - The image element to extract information from.
304
303
  * @returns {{ src: string; alt: string; width: number; height: number }} An object containing the image's source URL, alt text, width, and height.
305
304
  */
306
- extractImageInfo(img: HTMLImageElement): ImageInfo;
305
+ private extractImageInfo;
307
306
  /**
308
307
  * Extracts note paragraphs from the last row of an HTML table if that row
309
308
  * contains a note header such as "OVERALL NOTE:" or "NOTES:".
@@ -345,7 +344,7 @@ export declare class LicitConverter {
345
344
  * @param {Element} node - The DOM element to check.
346
345
  * @returns {boolean} `true` if the element qualifies as a table figure, otherwise `false`.
347
346
  */
348
- isTableFigureNode(node: Element): boolean;
347
+ private isTableFigureNode;
349
348
  /**
350
349
  * Determines whether the provided class name corresponds to a note-related node.
351
350
  *
@@ -260,14 +260,6 @@ export class LicitConverter {
260
260
  }
261
261
  return renderedArr;
262
262
  }
263
- /**
264
- * Returns a map elements which were parsed.
265
- *
266
- * @returns Map of elements
267
- */
268
- getElementsParsedMap() {
269
- return this.elementsParsedMap;
270
- }
271
263
  getCustomStyle(styleName) {
272
264
  return this.config.customStyles?.find((s) => s.styleName === styleName);
273
265
  }
@@ -467,7 +459,7 @@ export class LicitConverter {
467
459
  }
468
460
  const childNode = children[j];
469
461
  let nextChildNode = children[j + 1];
470
- // KNITE-1013: Handling paragraph combining logic for the case where
462
+ // Handling paragraph combining logic for the case where
471
463
  // heading is inside <OL>/<UL> and content is outside
472
464
  if (!nextChildNode &&
473
465
  (node.tagName === 'OL' || node.tagName === 'UL') &&
@@ -972,6 +964,7 @@ export class LicitConverter {
972
964
  renderDocTable(e, licitDocument) {
973
965
  const licitTable = new LicitTableElement();
974
966
  const colWidthsArray = this.getColWidthArray(e.node);
967
+ licitTable.noOfColumns = colWidthsArray?.length ?? 0;
975
968
  const tableHead = e.node.querySelector('thead');
976
969
  const table = e.node.querySelector('tbody');
977
970
  licitTable.capco = getCapcoFromNode(table);
@@ -1183,6 +1176,7 @@ export class LicitConverter {
1183
1176
  */
1184
1177
  parseTableContent(_e, tableTag, querySel, isChapterHeader, licitTable, widthArray, isTransparent) {
1185
1178
  const rows = tableTag.querySelectorAll('tr');
1179
+ let totalTableHeight = 0;
1186
1180
  for (let i = 0; i < rows.length; i++) {
1187
1181
  if (!isTransparent &&
1188
1182
  i == 0 &&
@@ -1191,11 +1185,21 @@ export class LicitConverter {
1191
1185
  isChapterHeader = true;
1192
1186
  }
1193
1187
  const licitRow = new LicitTableRowElement();
1188
+ // ** Capture row height **
1189
+ const rowHeight = rows[i].getAttribute('height');
1190
+ if (rowHeight) {
1191
+ licitRow.height = rowHeight;
1192
+ licitRow.rowHeight = rowHeight;
1193
+ totalTableHeight += parseFloat(rowHeight);
1194
+ }
1194
1195
  const cells = rows[i].querySelectorAll(querySel);
1195
1196
  this.parseTableContentInnerLoopHelper(cells, i, isChapterHeader, licitRow, widthArray, isTransparent);
1196
1197
  licitTable.addRow(licitRow);
1197
1198
  isChapterHeader = false;
1198
1199
  }
1200
+ if (totalTableHeight > 0) {
1201
+ licitTable.tableHeight = `${totalTableHeight}px`;
1202
+ }
1199
1203
  }
1200
1204
  parseTableContentInnerLoopHelper(cells, _cellIndex, isChapterHeader, licitRow, widthArray, isTransparent) {
1201
1205
  for (let j = 0; j < cells.length; j++) {
@@ -1241,13 +1245,19 @@ export class LicitConverter {
1241
1245
  let colWidth;
1242
1246
  let licitCell = null;
1243
1247
  const text = cell.textContent ?? '';
1248
+ // Extract cell-level style information**
1249
+ const cellStyleInfo = this.extractCellStyles(cell);
1250
+ if (widthArray?.length > 0) {
1251
+ const computedWidth = this.setCellWidth(colspan, cellIndex, widthArray);
1252
+ cellStyleInfo.cellWidth = computedWidth?.join(',');
1253
+ }
1244
1254
  if (cell.childNodes?.length <= 0) {
1245
1255
  //condition
1246
- licitCell = new LicitTableCellParaElement(cell, bgColor, null, verAlign, isChapterHeader, isTransparent);
1256
+ licitCell = new LicitTableCellParaElement(cell, bgColor, null, verAlign, isChapterHeader, isTransparent, cellStyleInfo);
1247
1257
  }
1248
1258
  else if ('' === text &&
1249
1259
  cell.childNodes[0].querySelector('img')) {
1250
- ({ licitCell } = this.addTableImageCell(cell, bgColor, isChapterHeader, licitCell, verAlign));
1260
+ ({ licitCell } = this.addTableImageCell(cell, bgColor, isChapterHeader, licitCell, verAlign, cellStyleInfo));
1251
1261
  }
1252
1262
  else {
1253
1263
  if (isChapterHeader) {
@@ -1255,7 +1265,7 @@ export class LicitConverter {
1255
1265
  cell.align = 'center'; // NOSONAR used by Licit parser (depricated)
1256
1266
  cell.setAttribute('classname', 'LC-Center');
1257
1267
  }
1258
- licitCell = new LicitTableCellParaElement(cell, bgColor, colWidth, verAlign, isChapterHeader, isTransparent);
1268
+ licitCell = new LicitTableCellParaElement(cell, bgColor, colWidth, verAlign, isChapterHeader, isTransparent, cellStyleInfo);
1259
1269
  }
1260
1270
  licitCell.rowspan = rowspan;
1261
1271
  licitCell.colspan = colspan;
@@ -1264,6 +1274,80 @@ export class LicitConverter {
1264
1274
  }
1265
1275
  licitRow.addCell(licitCell);
1266
1276
  }
1277
+ /**
1278
+ * Extracts style information from a table cell element per the ingest requirements.
1279
+ * Captures: margins (top/bottom), font-size overrides, and letter-spacing for non-breaking spaces.
1280
+ *
1281
+ * @param cell - The HTMLTableCellElement to extract styles from
1282
+ * @returns Object containing extracted style information
1283
+ */
1284
+ extractCellStyles(cell) {
1285
+ const styleInfo = {};
1286
+ // Capture class and ID from the paragraph inside the cell
1287
+ const paragraph = cell.querySelector('p');
1288
+ if (paragraph) {
1289
+ if (paragraph.className) {
1290
+ styleInfo.className = paragraph.className;
1291
+ }
1292
+ if (paragraph.id) {
1293
+ styleInfo.id = paragraph.id;
1294
+ }
1295
+ // Extract style attributes from the paragraph's style attribute
1296
+ const style = paragraph.getAttribute('style');
1297
+ if (style) {
1298
+ this.extractParagraphStyles(style, styleInfo);
1299
+ }
1300
+ // Extract letter-spacing for non-breaking spaces
1301
+ const spans = paragraph.querySelectorAll('span[style*="letter-spacing"]');
1302
+ this.extractLetterSpacing(spans, styleInfo);
1303
+ }
1304
+ return styleInfo;
1305
+ }
1306
+ /**
1307
+ * Extracts margin and font-size properties from a style string.
1308
+ *
1309
+ * @param style - The style attribute string
1310
+ * @param styleInfo - The style info object to populate
1311
+ */
1312
+ extractParagraphStyles(style, styleInfo) {
1313
+ const styleProps = style.split(';');
1314
+ for (const prop of styleProps) {
1315
+ const trimmedProp = prop.trim();
1316
+ if (trimmedProp.startsWith('margin-top')) {
1317
+ styleInfo.marginTop = trimmedProp.split(':')[1]?.trim();
1318
+ }
1319
+ else if (trimmedProp.startsWith('margin-bottom')) {
1320
+ styleInfo.marginBottom = trimmedProp.split(':')[1]?.trim();
1321
+ }
1322
+ else if (trimmedProp.startsWith('font-size')) {
1323
+ styleInfo.fontSize = trimmedProp.split(':')[1]?.trim();
1324
+ }
1325
+ }
1326
+ }
1327
+ /**
1328
+ * Extracts the first letter-spacing value from spans containing non-breaking spaces.
1329
+ *
1330
+ * @param spans - NodeList of span elements with letter-spacing styles
1331
+ * @param styleInfo - The style info object to populate
1332
+ */
1333
+ extractLetterSpacing(spans, styleInfo) {
1334
+ const letterSpacingRegex = /letter-spacing\s{0,1000}:\s{0,1000}([^;]{1,1000})/;
1335
+ for (const span of Array.from(spans)) {
1336
+ // Check if this span contains a non-breaking space
1337
+ const content = span.innerHTML;
1338
+ if (content.includes('&#160;') || content.includes('&nbsp;')) {
1339
+ const spanStyle = span.getAttribute('style');
1340
+ if (spanStyle) {
1341
+ const match = letterSpacingRegex.exec(spanStyle);
1342
+ if (match) {
1343
+ // Store the first letter-spacing value found
1344
+ styleInfo.letterSpacing = match[1].trim();
1345
+ break;
1346
+ }
1347
+ }
1348
+ }
1349
+ }
1350
+ }
1267
1351
  checkCellStyle(style) {
1268
1352
  let borderColor = null;
1269
1353
  if (style != null) {
@@ -1282,7 +1366,7 @@ export class LicitConverter {
1282
1366
  }
1283
1367
  return borderColor;
1284
1368
  }
1285
- addTableImageCell(cell, bgColor, isChapterHeader, licitCell, verAlign) {
1369
+ addTableImageCell(cell, bgColor, isChapterHeader, licitCell, verAlign, cellStyleInfo) {
1286
1370
  const image = cell.childNodes[0].querySelector('img');
1287
1371
  let altText = null;
1288
1372
  let imgHeight = null;
@@ -1303,10 +1387,10 @@ export class LicitConverter {
1303
1387
  const source = image?.getAttribute('srcRelative') ?? image?.src;
1304
1388
  if (source) {
1305
1389
  // seybi excluded image
1306
- licitCell = new LicitTableCellImageElement(source, fillImg, fitoParent, bgColor, imgHeight, colWidth, altText);
1390
+ licitCell = new LicitTableCellImageElement(source, fillImg, fitoParent, bgColor, imgHeight, colWidth, altText, cellStyleInfo);
1307
1391
  }
1308
1392
  else {
1309
- licitCell = new LicitTableCellParagraph(cell, bgColor, colWidth, verAlign);
1393
+ licitCell = new LicitTableCellParagraph(cell, bgColor, colWidth, verAlign, cellStyleInfo);
1310
1394
  }
1311
1395
  return { bgColor, isChapterHeader, licitCell };
1312
1396
  }
@@ -1894,16 +1978,6 @@ export class LicitConverter {
1894
1978
  sanitizeHTML(html) {
1895
1979
  return this.replaceKeywordsWithLinks(html);
1896
1980
  }
1897
- /**
1898
- * Replaces characters in the HTML as defined by the `replacementChars` parameter in the config
1899
- */
1900
- replaceUnwantedChars(html) {
1901
- const chars = this.config.replacementChars;
1902
- for (const char of chars) {
1903
- html = html.replace(char.find, char.replace);
1904
- }
1905
- return html;
1906
- }
1907
1981
  /**
1908
1982
  * Replaces keywords in the HTML with links, as defined by the `replaceWithLinks` parameter in the config
1909
1983
  */
@@ -1916,7 +1990,7 @@ export class LicitConverter {
1916
1990
  }
1917
1991
  return html;
1918
1992
  }
1919
- //FS : For skipping triming inside table, add more classes to the class list for future use
1993
+ // For skipping triming inside table, add more classes to the class list for future use
1920
1994
  matchClassToExcludeNumber(className) {
1921
1995
  let trimmedClassName = className.trim();
1922
1996
  trimmedClassName = trimmedClassName.toLowerCase();
@@ -1950,14 +2024,6 @@ export class LicitConverter {
1950
2024
  };
1951
2025
  stripTextContent(element);
1952
2026
  }
1953
- removeLastNumber(inputString) {
1954
- let lastNonDigitIndex = inputString.length - 1;
1955
- while (lastNonDigitIndex >= 0 &&
1956
- !Number.isNaN(Number.parseInt(inputString[lastNonDigitIndex]))) {
1957
- lastNonDigitIndex--;
1958
- }
1959
- return inputString.slice(0, lastNonDigitIndex + 1);
1960
- }
1961
2027
  getScaledWidth(width) {
1962
2028
  if (width <= 200) {
1963
2029
  return width.toString();
package/package.json CHANGED
@@ -1,52 +1,52 @@
1
- {
2
- "name": "@modusoperandi/licit-import-utils",
3
- "version": "0.1.0",
4
- "license": "MIT",
5
- "type": "module",
6
- "subversion": "1",
7
- "description": "A utility package for importing files like json or docx into Licit compatible documents",
8
- "main": "index.js",
9
- "types": "index.d.ts",
10
- "repository": {
11
- "type": "git",
12
- "url": "git+https://github.com/MO-Movia/licit-import-utils.git"
13
- },
14
- "scripts": {
15
- "test": "jest",
16
- "test:unit": "jest",
17
- "test:coverage": "jest --env=jsdom --coverage",
18
- "build:clean": "rm -rf dist/ && rm -f modusoperandi-*.*.*.tgz",
19
- "lint": "eslint src",
20
- "ci:build": "tsc -b tsconfig.prod.json --clean && tsc -b tsconfig.prod.json && npx copyfiles@2.4.1 package.json LICENSE dist",
21
- "ci:bom": "npx @cyclonedx/cyclonedx-npm --ignore-npm-errors --short-PURLs --output-format XML --output-file dist/bom.xml",
22
- "verify": "npm run lint -- --fix && npm run ci:build && npm run test:coverage && echo 'All Tests Passed!'"
23
- },
24
- "peerDependencies": {
25
- "@modusoperandi/mammoth": "^1.7.0-6",
26
- "jszip": "^3.10.1"
27
- },
28
- "peerDependenciesMeta": {
29
- "@modusoperandi/mammoth": {
30
- "optional": true
31
- },
32
- "jszip": {
33
- "optional": true
34
- }
35
- },
36
- "dependencies": {
37
- "uuid": "^13.0.0"
38
- },
39
- "devDependencies": {
40
- "@modusoperandi/mammoth": "^1.7.0-6",
41
- "@modusoperandi/eslint-config": "^3.0.3",
42
- "@types/jest": "^30.0.0",
43
- "jszip": "^3.10.1",
44
- "eslint": "^9.39.2",
45
- "jest": "^30.2.0",
46
- "jest-environment-jsdom": "^30.2.0",
47
- "jest-junit": "^16.0.0",
48
- "ts-jest": "^29.4.6",
49
- "ts-node": "^10.9.2",
50
- "typescript": "^5.9.3"
51
- }
52
- }
1
+ {
2
+ "name": "@modusoperandi/licit-import-utils",
3
+ "version": "0.1.2",
4
+ "license": "MIT",
5
+ "type": "module",
6
+ "subversion": "1",
7
+ "description": "A utility package for importing files like json or docx into Licit compatible documents",
8
+ "main": "index.js",
9
+ "types": "index.d.ts",
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "git+https://github.com/MO-Movia/licit-import-utils.git"
13
+ },
14
+ "scripts": {
15
+ "test": "jest",
16
+ "test:unit": "jest",
17
+ "test:coverage": "jest --env=jsdom --coverage",
18
+ "build:clean": "rm -rf dist/ && rm -f modusoperandi-*.*.*.tgz",
19
+ "lint": "eslint src",
20
+ "ci:build": "tsc -b tsconfig.prod.json --clean && tsc -b tsconfig.prod.json && npx copyfiles@2.4.1 package.json LICENSE README.md dist",
21
+ "ci:bom": "npx @cyclonedx/cyclonedx-npm --ignore-npm-errors --short-PURLs --output-format XML --output-file dist/bom.xml",
22
+ "verify": "npm run lint -- --fix && npm run ci:build && npm run test:coverage && echo 'All Tests Passed!'"
23
+ },
24
+ "peerDependencies": {
25
+ "@modusoperandi/mammoth": "^1.7.0-6",
26
+ "jszip": "^3.10.1"
27
+ },
28
+ "peerDependenciesMeta": {
29
+ "@modusoperandi/mammoth": {
30
+ "optional": true
31
+ },
32
+ "jszip": {
33
+ "optional": true
34
+ }
35
+ },
36
+ "dependencies": {
37
+ "uuid": "^13.0.0"
38
+ },
39
+ "devDependencies": {
40
+ "@modusoperandi/mammoth": "^1.7.0-8",
41
+ "@modusoperandi/eslint-config": "^3.0.3",
42
+ "@types/jest": "^30.0.0",
43
+ "jszip": "^3.10.1",
44
+ "eslint": "^9.39.3",
45
+ "jest": "^30.2.0",
46
+ "jest-environment-jsdom": "^30.2.0",
47
+ "jest-junit": "^16.0.0",
48
+ "ts-jest": "^29.4.6",
49
+ "ts-node": "^10.9.2",
50
+ "typescript": "^5.9.3"
51
+ }
52
+ }
@@ -0,0 +1,22 @@
1
+ /**
2
+ * @license MIT
3
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
4
+ */
5
+ import type { Message } from './types';
6
+ export declare function extractStylesForDoc(arrayBuffer: ArrayBuffer, docType: string): Promise<{
7
+ styles: string[];
8
+ }>;
9
+ export declare function extractUniqueStyleIds(data: Message[]): string[];
10
+ export declare function extractStylesForJSON(arrayBuffer: ArrayBuffer): Promise<{
11
+ content: string;
12
+ styles: string[];
13
+ }>;
14
+ export declare function collectStyles(obj: unknown, styles?: string[]): string[];
15
+ export declare function processHTML(arrayBuffer: ArrayBuffer): Promise<{
16
+ styles: string[];
17
+ }>;
18
+ export declare function extractStylesFromZip(zipFile: File): Promise<{
19
+ styles: string[];
20
+ }>;
21
+ export declare function arrayBufferToString(arrayBuffer: ArrayBuffer): string;
22
+ export declare function extractStyleNamesFromHTML(doc: Document): string[];
@@ -0,0 +1,105 @@
1
+ /**
2
+ * @license MIT
3
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
4
+ */
5
+ import JSZip from 'jszip';
6
+ import { DocxTransformer } from './transform.docx';
7
+ export async function extractStylesForDoc(arrayBuffer, docType) {
8
+ const messages = [];
9
+ // Convert the ArrayBuffer to HTML using Mammoth.js
10
+ await new DocxTransformer(docType, (type, message) => messages.push({ type, message })).transform(arrayBuffer);
11
+ // Extract styles from the HTML (adapt as needed for your styling approach)
12
+ const styles = extractUniqueStyleIds(messages);
13
+ return { styles };
14
+ }
15
+ export function extractUniqueStyleIds(data) {
16
+ const styleIds = [];
17
+ data ??= [];
18
+ for (const item of data) {
19
+ const match = new RegExp(/Style ID: (.{0,100}?)(?=\))/).exec(item.message);
20
+ const styleId = match?.[1];
21
+ if (styleId && !styleIds.includes(styleId)) {
22
+ styleIds.push(styleId);
23
+ }
24
+ }
25
+ return styleIds;
26
+ }
27
+ export function extractStylesForJSON(arrayBuffer) {
28
+ const decoder = new TextDecoder('utf-8');
29
+ const content = decoder.decode(arrayBuffer);
30
+ const jsonObject = JSON.parse(content);
31
+ if (typeof jsonObject !== 'object' || jsonObject === null) {
32
+ throw new Error('Invalid JSON document');
33
+ }
34
+ const styles = [];
35
+ collectStyles(jsonObject, styles);
36
+ return Promise.resolve({ content, styles });
37
+ }
38
+ // Preprocessor to handle the JSON formatted documents
39
+ export function collectStyles(obj, styles = []) {
40
+ if (typeof obj !== 'object' || obj === null) {
41
+ return styles;
42
+ }
43
+ for (const [key, value] of Object.entries(obj)) {
44
+ if (typeof value === 'object' && value !== null) {
45
+ // Recursively traverse nested objects
46
+ collectStyles(value, styles);
47
+ }
48
+ else if (key === 'styleName' &&
49
+ typeof value === 'string' &&
50
+ !styles.includes(value)) {
51
+ // Add the style name to the list if it's not already included
52
+ styles.push(value);
53
+ }
54
+ }
55
+ return styles;
56
+ }
57
+ export function processHTML(arrayBuffer) {
58
+ return new Promise((resolve) => {
59
+ const content = arrayBufferToString(arrayBuffer);
60
+ // Use DOMParser to parse HTML content
61
+ const parser = new DOMParser();
62
+ const doc = parser.parseFromString(content, 'text/html');
63
+ // Extract style names using regular expressions
64
+ const styleNames = extractStyleNamesFromHTML(doc);
65
+ resolve({ styles: styleNames });
66
+ });
67
+ }
68
+ export async function extractStylesFromZip(zipFile) {
69
+ const MAX_FILES = 10000;
70
+ const MAX_SIZE = 1073741824; // 1 GB
71
+ if (zipFile.size > MAX_SIZE &&
72
+ !confirm(`zip is ${zipFile.size / MAX_SIZE} GB. continue?`)) {
73
+ throw new Error('Size of the file is more than the limit 25 mb');
74
+ }
75
+ const loadedZip = await JSZip.loadAsync(zipFile); //NOSONAR size validated. Safe to extract.
76
+ // Check if the total number of files exceeds the limit
77
+ const totalFiles = Object.keys(loadedZip.files).length;
78
+ if (totalFiles > MAX_FILES &&
79
+ !confirm(`zip contains an excessive ${totalFiles} files. continue?`)) {
80
+ throw new Error(`Number of files in the zip (${totalFiles}) exceeds the limit (${MAX_FILES})`);
81
+ }
82
+ const htmlFiles = Object.keys(loadedZip.files).filter((fileName) => fileName.endsWith('.htm'));
83
+ let combinedStyles = [];
84
+ for (const fileName of htmlFiles) {
85
+ const arrayBuffer = await loadedZip.files[fileName].async('arraybuffer');
86
+ const { styles } = await processHTML(arrayBuffer);
87
+ // Combine styles
88
+ combinedStyles = [...new Set([...combinedStyles, ...styles])];
89
+ }
90
+ return { styles: combinedStyles };
91
+ }
92
+ export function arrayBufferToString(arrayBuffer) {
93
+ return new TextDecoder().decode(new Uint8Array(arrayBuffer));
94
+ }
95
+ export function extractStyleNamesFromHTML(doc) {
96
+ const styleNames = [];
97
+ // Extract class names from HTML elements and add to style names
98
+ const elementsWithClass = doc.querySelectorAll('[class]');
99
+ for (const element of Array.from(elementsWithClass)) {
100
+ const classes = element.className.split(/\s{1,100}/); // Split by whitespace
101
+ styleNames.push(...classes);
102
+ }
103
+ // Return unique style names
104
+ return [...new Set(styleNames)];
105
+ }
package/transform.zip.js CHANGED
@@ -154,25 +154,32 @@ async function loopHTMLFiles(htmlFiles, updateSrc) {
154
154
  const processedHtmlContents = (await Promise.all(htmlFiles.files
155
155
  .filter((htmlFile) => !!htmlFile)
156
156
  .map((f) => processFile(f, htmlFiles.imageFiles, updateSrc)))).filter((x) => x?.length);
157
+ if (processedHtmlContents.length === 0 && htmlFiles.files.length > 0) {
158
+ throw new Error(`File contents are empty`);
159
+ }
157
160
  return sortedNodeList(processedHtmlContents);
158
161
  }
159
162
  async function processFile(file, imageFiles, updateSrc) {
160
163
  const htmlContent = await file.content();
161
164
  const htmlFileName = file.name ?? 'Unknown file';
165
+ // Reject files with zero bytes
166
+ if (!htmlContent?.length) {
167
+ throw new Error(`File ${htmlFileName} has zero bytes`);
168
+ }
162
169
  // Get content before <head> (first 1000 chars should be enough)
163
170
  const beforeHead = htmlContent.substring(0, 1000);
164
- // Check 1: Reject old DOCTYPE declarations
171
+ // Reject old DOCTYPE declarations
165
172
  if (beforeHead.includes('<!DOCTYPE HTML PUBLIC')) {
166
- throw new Error(`Incorrect file format: ${htmlFileName}`);
173
+ throw new Error(`Incorrect file format (was "!DOCTYPE HTML PUBLIC"): ${htmlFileName}`);
167
174
  }
168
- // Check 2: Reject XML declarations (XHTML format)
175
+ // Reject XML declarations (XHTML format)
169
176
  if (beforeHead.trimStart().startsWith('<?xml')) {
170
- throw new Error(`Incorrect file format: ${htmlFileName}`);
177
+ throw new Error(`Incorrect file format (was "XHTML"): ${htmlFileName}`);
171
178
  }
172
- // Check 3: Must have <html lang="...">
179
+ // Must have <html lang="...">
173
180
  // Option A: Exact match for en-US
174
181
  if (!beforeHead.includes('<html lang="en-US">')) {
175
- throw new Error(`Incorrect file format: ${htmlFileName}`);
182
+ throw new Error(`Incorrect file format (missing "<html lang=..."): ${htmlFileName}`);
176
183
  }
177
184
  const domCollection = new DOMParser().parseFromString(htmlContent, 'text/html');
178
185
  //Get the title text