@modusoperandi/licit-import-utils 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +2 -0
- package/index.d.ts +3 -0
- package/index.js +2 -0
- package/licit-elements.d.ts +23 -3
- package/licit-elements.js +24 -3
- package/licit-transform.d.ts +95 -96
- package/licit-transform.js +100 -34
- package/package.json +52 -52
- package/preprocess.utils.d.ts +22 -0
- package/preprocess.utils.js +105 -0
- package/transform.zip.js +13 -6
package/LICENSE
CHANGED
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2026 Modus Operandi Inc.
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Modus Operandi Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
package/index.d.ts
CHANGED
|
@@ -4,5 +4,8 @@
|
|
|
4
4
|
*/
|
|
5
5
|
export * from './types';
|
|
6
6
|
export * from './licit-transform';
|
|
7
|
+
export * from './preprocess.utils';
|
|
7
8
|
export * from './transform.docx';
|
|
9
|
+
export * from './transform.utils';
|
|
8
10
|
export * from './transform.zip';
|
|
11
|
+
export { LicitDocumentJSON, LicitElementJSON } from './licit-elements';
|
package/index.js
CHANGED
package/licit-elements.d.ts
CHANGED
|
@@ -108,6 +108,9 @@ interface LicitTableAttrsJSON extends LicitElementAttrsJSON {
|
|
|
108
108
|
marginLeft: null;
|
|
109
109
|
vignette: boolean;
|
|
110
110
|
}
|
|
111
|
+
interface LicitTableRowAttrsJSON extends LicitElementAttrsJSON {
|
|
112
|
+
rowHeight: string;
|
|
113
|
+
}
|
|
111
114
|
interface LicitTableCellAttrsJSON extends LicitElementAttrsJSON {
|
|
112
115
|
colspan: number;
|
|
113
116
|
rowspan: number;
|
|
@@ -134,6 +137,7 @@ interface LicitTableCellImageJSON extends LicitElementJSON {
|
|
|
134
137
|
}
|
|
135
138
|
interface LicitTableRowJSON extends LicitElementJSON {
|
|
136
139
|
type: 'table_row';
|
|
140
|
+
attrs: LicitTableRowAttrsJSON;
|
|
137
141
|
content: LicitTableCellJSON[];
|
|
138
142
|
}
|
|
139
143
|
export interface LicitTableJSON extends LicitElementJSON {
|
|
@@ -159,6 +163,15 @@ interface LicitOrderedListJSON extends LicitElementJSON {
|
|
|
159
163
|
attrs: LicitBulletListAttrsJSON;
|
|
160
164
|
content: LicitBulletListItemJSON[];
|
|
161
165
|
}
|
|
166
|
+
interface CellStyleInfo {
|
|
167
|
+
className?: string;
|
|
168
|
+
id?: string;
|
|
169
|
+
marginTop?: string;
|
|
170
|
+
marginBottom?: string;
|
|
171
|
+
fontSize?: string;
|
|
172
|
+
letterSpacing?: string;
|
|
173
|
+
cellWidth?: string;
|
|
174
|
+
}
|
|
162
175
|
export interface LicitDocumentJSON extends LicitElementJSON {
|
|
163
176
|
type: 'doc';
|
|
164
177
|
attrs: LicitDocumentAttrsJSON;
|
|
@@ -627,7 +640,8 @@ export declare class LicitTableCellParagraph extends LicitElement {
|
|
|
627
640
|
colWidth: [number];
|
|
628
641
|
content: any[];
|
|
629
642
|
vAlign: string;
|
|
630
|
-
|
|
643
|
+
cellStyleInfo?: CellStyleInfo;
|
|
644
|
+
constructor(node: HTMLElement, bgColor?: string, colwidth?: [number], vericalAlignment?: string, cellStyleInfo?: CellStyleInfo);
|
|
631
645
|
render(): {
|
|
632
646
|
type: string;
|
|
633
647
|
attrs: {
|
|
@@ -684,7 +698,8 @@ export declare class LicitTableCellImageElement extends LicitElement {
|
|
|
684
698
|
alt: string;
|
|
685
699
|
fillImg: number;
|
|
686
700
|
fitToParent: number;
|
|
687
|
-
|
|
701
|
+
cellStyleInfo?: CellStyleInfo;
|
|
702
|
+
constructor(src: string, fillImg: number, fitToParent: number, bgColor?: string, imgHeight?: string, colWidth?: [number], alt?: string, cellStyleInfo?: CellStyleInfo);
|
|
688
703
|
render(): LicitTableCellImageJSON;
|
|
689
704
|
}
|
|
690
705
|
export declare class LicitVignetteElement extends LicitElement {
|
|
@@ -747,7 +762,8 @@ export declare class LicitTableCellParaElement extends LicitElement {
|
|
|
747
762
|
vAlign: string;
|
|
748
763
|
isTableHeader: boolean;
|
|
749
764
|
isTransparentTable: boolean;
|
|
750
|
-
|
|
765
|
+
cellStyleInfo?: CellStyleInfo;
|
|
766
|
+
constructor(node: HTMLElement, bgColor?: string, colwidth?: [number], vericalAlignment?: string, isTableHeader?: boolean, isTransparentTable?: boolean, cellStyleInfo?: CellStyleInfo);
|
|
751
767
|
ConvertElements(node: HTMLElement): void;
|
|
752
768
|
processChildNode(childNode: ChildNode): void;
|
|
753
769
|
processChildOL(childNode: ChildNode): void;
|
|
@@ -759,6 +775,8 @@ export declare class LicitTableCellParaElement extends LicitElement {
|
|
|
759
775
|
render(): LicitTableCellJSON;
|
|
760
776
|
}
|
|
761
777
|
export declare class LicitTableRowElement extends LicitElement {
|
|
778
|
+
height?: string;
|
|
779
|
+
rowHeight?: string;
|
|
762
780
|
getBaseElement(): LicitTableRowJSON;
|
|
763
781
|
cells: LicitTableCellElement[];
|
|
764
782
|
addCell(cell: LicitTableCellElement): void;
|
|
@@ -769,6 +787,8 @@ export declare class LicitTableElement extends LicitElement {
|
|
|
769
787
|
rows: LicitTableRowElement[];
|
|
770
788
|
isVignette: boolean;
|
|
771
789
|
capco?: string;
|
|
790
|
+
noOfColumns?: number;
|
|
791
|
+
tableHeight?: string;
|
|
772
792
|
constructor(isVignette?: boolean, capco?: string);
|
|
773
793
|
addRow(row: LicitTableRowElement): void;
|
|
774
794
|
render(): LicitTableJSON;
|
package/licit-elements.js
CHANGED
|
@@ -1876,11 +1876,13 @@ export class LicitTableCellParagraph extends LicitElement {
|
|
|
1876
1876
|
colWidth;
|
|
1877
1877
|
content = [];
|
|
1878
1878
|
vAlign;
|
|
1879
|
-
|
|
1879
|
+
cellStyleInfo;
|
|
1880
|
+
constructor(node, bgColor, colwidth, vericalAlignment, cellStyleInfo) {
|
|
1880
1881
|
super();
|
|
1881
1882
|
this.bgColor = bgColor;
|
|
1882
1883
|
this.colWidth = colwidth;
|
|
1883
1884
|
this.vAlign = vericalAlignment;
|
|
1885
|
+
this.cellStyleInfo = cellStyleInfo;
|
|
1884
1886
|
const paragraph = new NewLicitParagraphElement(node, null);
|
|
1885
1887
|
if (paragraph) {
|
|
1886
1888
|
this.content.push(paragraph.render());
|
|
@@ -1996,7 +1998,8 @@ export class LicitTableCellImageElement extends LicitElement {
|
|
|
1996
1998
|
alt;
|
|
1997
1999
|
fillImg;
|
|
1998
2000
|
fitToParent;
|
|
1999
|
-
|
|
2001
|
+
cellStyleInfo;
|
|
2002
|
+
constructor(src, fillImg, fitToParent, bgColor, imgHeight, colWidth, alt, cellStyleInfo) {
|
|
2000
2003
|
super();
|
|
2001
2004
|
this.src = src;
|
|
2002
2005
|
this.bgColor = bgColor;
|
|
@@ -2005,6 +2008,7 @@ export class LicitTableCellImageElement extends LicitElement {
|
|
|
2005
2008
|
this.alt = alt;
|
|
2006
2009
|
this.fillImg = fillImg;
|
|
2007
2010
|
this.fitToParent = fitToParent;
|
|
2011
|
+
this.cellStyleInfo = cellStyleInfo;
|
|
2008
2012
|
}
|
|
2009
2013
|
render() {
|
|
2010
2014
|
const element = this.getBaseElement();
|
|
@@ -2165,6 +2169,12 @@ export class LicitTableCellParaElement extends LicitElement {
|
|
|
2165
2169
|
colwidth: this.colWidth || defaultColWidth,
|
|
2166
2170
|
background: this.bgColor || defaultBgColor,
|
|
2167
2171
|
vAlign: this.vAlign || 'middle',
|
|
2172
|
+
cellWidth: this.cellStyleInfo?.cellWidth ?? null,
|
|
2173
|
+
cellStyle: this.cellStyleInfo?.className ?? null,
|
|
2174
|
+
fontSize: this.cellStyleInfo?.fontSize ?? null,
|
|
2175
|
+
letterSpacing: this.cellStyleInfo?.letterSpacing ?? null,
|
|
2176
|
+
marginTop: this.cellStyleInfo?.marginTop ?? null,
|
|
2177
|
+
marginBottom: this.cellStyleInfo?.marginBottom ?? null,
|
|
2168
2178
|
},
|
|
2169
2179
|
content: [],
|
|
2170
2180
|
};
|
|
@@ -2178,13 +2188,15 @@ export class LicitTableCellParaElement extends LicitElement {
|
|
|
2178
2188
|
vAlign;
|
|
2179
2189
|
isTableHeader;
|
|
2180
2190
|
isTransparentTable;
|
|
2181
|
-
|
|
2191
|
+
cellStyleInfo;
|
|
2192
|
+
constructor(node, bgColor, colwidth, vericalAlignment, isTableHeader, isTransparentTable, cellStyleInfo) {
|
|
2182
2193
|
super();
|
|
2183
2194
|
this.bgColor = bgColor;
|
|
2184
2195
|
this.colWidth = colwidth;
|
|
2185
2196
|
this.vAlign = vericalAlignment;
|
|
2186
2197
|
this.isTableHeader = isTableHeader;
|
|
2187
2198
|
this.isTransparentTable = isTransparentTable;
|
|
2199
|
+
this.cellStyleInfo = cellStyleInfo;
|
|
2188
2200
|
this.ConvertElements(node);
|
|
2189
2201
|
}
|
|
2190
2202
|
ConvertElements(node) {
|
|
@@ -2299,9 +2311,14 @@ export class LicitTableCellParaElement extends LicitElement {
|
|
|
2299
2311
|
}
|
|
2300
2312
|
}
|
|
2301
2313
|
export class LicitTableRowElement extends LicitElement {
|
|
2314
|
+
height;
|
|
2315
|
+
rowHeight;
|
|
2302
2316
|
getBaseElement() {
|
|
2303
2317
|
return {
|
|
2304
2318
|
type: 'table_row',
|
|
2319
|
+
attrs: {
|
|
2320
|
+
rowHeight: this.rowHeight,
|
|
2321
|
+
},
|
|
2305
2322
|
content: [],
|
|
2306
2323
|
};
|
|
2307
2324
|
}
|
|
@@ -2325,6 +2342,8 @@ export class LicitTableElement extends LicitElement {
|
|
|
2325
2342
|
marginLeft: null,
|
|
2326
2343
|
vignette: this.isVignette,
|
|
2327
2344
|
capco: this.capco,
|
|
2345
|
+
noOfColumns: this.noOfColumns ?? null,
|
|
2346
|
+
tableHeight: this.tableHeight ?? null,
|
|
2328
2347
|
},
|
|
2329
2348
|
content: [],
|
|
2330
2349
|
};
|
|
@@ -2332,6 +2351,8 @@ export class LicitTableElement extends LicitElement {
|
|
|
2332
2351
|
rows = [];
|
|
2333
2352
|
isVignette = false;
|
|
2334
2353
|
capco;
|
|
2354
|
+
noOfColumns;
|
|
2355
|
+
tableHeight;
|
|
2335
2356
|
constructor(isVignette, capco) {
|
|
2336
2357
|
super();
|
|
2337
2358
|
this.isVignette = isVignette;
|
package/licit-transform.d.ts
CHANGED
|
@@ -3,8 +3,6 @@
|
|
|
3
3
|
* @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
|
|
4
4
|
*/
|
|
5
5
|
import type { LicitDocumentJSON } from './licit-elements';
|
|
6
|
-
import { LicitBulletListElement, LicitDocumentElement, LicitEnhancedImageElement, LicitTableRowElement } from './licit-elements';
|
|
7
|
-
import type { UpdatedCapco } from './capco.util';
|
|
8
6
|
import type { MessageSink } from './types';
|
|
9
7
|
export interface ParserElement {
|
|
10
8
|
node: Element;
|
|
@@ -13,12 +11,6 @@ export interface ParserElement {
|
|
|
13
11
|
level: number;
|
|
14
12
|
subText: string;
|
|
15
13
|
}
|
|
16
|
-
interface ImageInfo {
|
|
17
|
-
src: string;
|
|
18
|
-
alt: string;
|
|
19
|
-
width: number;
|
|
20
|
-
height: number;
|
|
21
|
-
}
|
|
22
14
|
declare enum ParserElementType {
|
|
23
15
|
ChapterTitle = 0,
|
|
24
16
|
ChapterSubtitle = 1,
|
|
@@ -88,73 +80,63 @@ export interface AddCellOptions {
|
|
|
88
80
|
}
|
|
89
81
|
export declare class LicitConverter {
|
|
90
82
|
private readonly config;
|
|
91
|
-
|
|
92
|
-
elements
|
|
83
|
+
private readonly elementsParsedMap;
|
|
84
|
+
private elements;
|
|
93
85
|
constructor(config: TransformConfig);
|
|
94
86
|
parseHTML(html: Document, isDoctorine: boolean, moDocType?: string): LicitDocumentJSON;
|
|
95
87
|
parseFrameMakerHTML5(html: Element[]): LicitDocumentJSON;
|
|
96
|
-
render_FrameMakerHTML5_zip
|
|
97
|
-
render_FrameMakerHTML5_zip_SwitchHelper
|
|
88
|
+
private render_FrameMakerHTML5_zip;
|
|
89
|
+
private render_FrameMakerHTML5_zip_SwitchHelper;
|
|
98
90
|
private handleNodes;
|
|
99
|
-
fetchRenderedContent
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
*
|
|
103
|
-
* @returns Map of elements
|
|
104
|
-
*/
|
|
105
|
-
getElementsParsedMap(): Map<string, boolean>;
|
|
106
|
-
getCustomStyle(styleName: string): StyleInfo | undefined;
|
|
107
|
-
handleOrderedListItem(e: ParserElement, licitDocument: LicitDocumentElement): void;
|
|
91
|
+
private fetchRenderedContent;
|
|
92
|
+
private getCustomStyle;
|
|
93
|
+
private handleOrderedListItem;
|
|
108
94
|
/**
|
|
109
95
|
* Renders the HTML as a Licit JSON structure
|
|
110
96
|
*
|
|
111
97
|
* @returns The document as an `LicitDocumentJSON` object
|
|
112
98
|
*/
|
|
113
|
-
render
|
|
114
|
-
renderSwitchHelper
|
|
99
|
+
private render;
|
|
100
|
+
private renderSwitchHelper;
|
|
115
101
|
private renderTable;
|
|
116
102
|
private renderParagraph;
|
|
117
103
|
private renderHeader;
|
|
118
104
|
private buildElements;
|
|
119
|
-
checkChildNode
|
|
120
|
-
render_doc
|
|
121
|
-
render_docSwitchHelper
|
|
122
|
-
renderTypeParagraph
|
|
123
|
-
handle_UrlText
|
|
124
|
-
text_WithoutUrl
|
|
105
|
+
private checkChildNode;
|
|
106
|
+
private render_doc;
|
|
107
|
+
private render_docSwitchHelper;
|
|
108
|
+
private renderTypeParagraph;
|
|
109
|
+
private handle_UrlText;
|
|
110
|
+
private text_WithoutUrl;
|
|
125
111
|
private handleNode;
|
|
126
|
-
mergeSpans
|
|
127
|
-
updateChildCapcoContent
|
|
128
|
-
updateChildCapcoContentLoopHelper
|
|
129
|
-
processChildNodesCapco
|
|
130
|
-
updateCapcoToParagraph
|
|
131
|
-
processTableCapco
|
|
132
|
-
figureTitleCase
|
|
133
|
-
handleImageChild
|
|
134
|
-
renderNewFigureTitle
|
|
135
|
-
figureParagraphCase
|
|
136
|
-
figureNoteCase
|
|
137
|
-
figureTableTitleCase
|
|
112
|
+
private mergeSpans;
|
|
113
|
+
private updateChildCapcoContent;
|
|
114
|
+
private updateChildCapcoContentLoopHelper;
|
|
115
|
+
private processChildNodesCapco;
|
|
116
|
+
private updateCapcoToParagraph;
|
|
117
|
+
private processTableCapco;
|
|
118
|
+
private figureTitleCase;
|
|
119
|
+
private handleImageChild;
|
|
120
|
+
private renderNewFigureTitle;
|
|
121
|
+
private figureParagraphCase;
|
|
122
|
+
private figureNoteCase;
|
|
123
|
+
private figureTableTitleCase;
|
|
138
124
|
private renderDocVignet;
|
|
139
125
|
private parseUntypedDocVignet;
|
|
140
126
|
private parseTypedDocVignet;
|
|
141
|
-
parseTypedDocVignetHelper
|
|
142
|
-
bgColor: string;
|
|
143
|
-
borderColor: string;
|
|
144
|
-
boxWidth: number;
|
|
145
|
-
};
|
|
127
|
+
private parseTypedDocVignetHelper;
|
|
146
128
|
private renderDocTable;
|
|
147
129
|
private renderEnhancedTable;
|
|
148
130
|
private getLicitTable;
|
|
149
|
-
renderNewLicitImage
|
|
150
|
-
renderDocBulletItems
|
|
151
|
-
processBulletNodes
|
|
152
|
-
addElementLicit
|
|
153
|
-
removeEmptyATags
|
|
131
|
+
private renderNewLicitImage;
|
|
132
|
+
private renderDocBulletItems;
|
|
133
|
+
private processBulletNodes;
|
|
134
|
+
private addElementLicit;
|
|
135
|
+
private removeEmptyATags;
|
|
154
136
|
private handleULNode;
|
|
155
137
|
private renderDocFigure;
|
|
156
|
-
renderImage
|
|
157
|
-
parseOL
|
|
138
|
+
private renderImage;
|
|
139
|
+
private parseOL;
|
|
158
140
|
/**
|
|
159
141
|
* To parse table data
|
|
160
142
|
* @param e - element
|
|
@@ -166,114 +148,131 @@ export declare class LicitConverter {
|
|
|
166
148
|
* @param isTransparent - flag to distinguish preface table
|
|
167
149
|
* @returns void
|
|
168
150
|
*/
|
|
169
|
-
parseTableContent
|
|
170
|
-
parseTableContentInnerLoopHelper
|
|
151
|
+
private parseTableContent;
|
|
152
|
+
private parseTableContentInnerLoopHelper;
|
|
171
153
|
private addCell;
|
|
154
|
+
/**
|
|
155
|
+
* Extracts style information from a table cell element per the ingest requirements.
|
|
156
|
+
* Captures: margins (top/bottom), font-size overrides, and letter-spacing for non-breaking spaces.
|
|
157
|
+
*
|
|
158
|
+
* @param cell - The HTMLTableCellElement to extract styles from
|
|
159
|
+
* @returns Object containing extracted style information
|
|
160
|
+
*/
|
|
161
|
+
private extractCellStyles;
|
|
162
|
+
/**
|
|
163
|
+
* Extracts margin and font-size properties from a style string.
|
|
164
|
+
*
|
|
165
|
+
* @param style - The style attribute string
|
|
166
|
+
* @param styleInfo - The style info object to populate
|
|
167
|
+
*/
|
|
168
|
+
private extractParagraphStyles;
|
|
169
|
+
/**
|
|
170
|
+
* Extracts the first letter-spacing value from spans containing non-breaking spaces.
|
|
171
|
+
*
|
|
172
|
+
* @param spans - NodeList of span elements with letter-spacing styles
|
|
173
|
+
* @param styleInfo - The style info object to populate
|
|
174
|
+
*/
|
|
175
|
+
private extractLetterSpacing;
|
|
172
176
|
checkCellStyle(style: string | null): string | null;
|
|
173
177
|
private addTableImageCell;
|
|
174
|
-
ParseNestedList
|
|
178
|
+
private ParseNestedList;
|
|
175
179
|
/**
|
|
176
180
|
* Returns the level of an element as described by the number at the end of its classname
|
|
177
181
|
*
|
|
178
182
|
* @param className - The className of the element
|
|
179
183
|
* @returns The level as a number or zero if the level cannot be determined
|
|
180
184
|
*/
|
|
181
|
-
extractLevel
|
|
185
|
+
private extractLevel;
|
|
182
186
|
/**
|
|
183
187
|
* Determines if an element is a table or image then calls the appropriate parse method
|
|
184
188
|
*/
|
|
185
|
-
parseTableFigure
|
|
189
|
+
private parseTableFigure;
|
|
186
190
|
/**
|
|
187
191
|
* Parse a table element
|
|
188
192
|
*/
|
|
189
|
-
parseTable
|
|
193
|
+
private parseTable;
|
|
190
194
|
/**
|
|
191
195
|
* Parse a table element
|
|
192
196
|
*/
|
|
193
|
-
parseVignet
|
|
197
|
+
private parseVignet;
|
|
194
198
|
/**
|
|
195
199
|
* Parse a figure (image) element
|
|
196
200
|
*/
|
|
197
|
-
parseFigure
|
|
201
|
+
private parseFigure;
|
|
198
202
|
/**
|
|
199
203
|
* Parse a note element
|
|
200
204
|
*/
|
|
201
|
-
parseNote
|
|
205
|
+
private parseNote;
|
|
202
206
|
/**
|
|
203
207
|
* Parse a hr element
|
|
204
208
|
*/
|
|
205
|
-
parseHR
|
|
209
|
+
private parseHR;
|
|
206
210
|
/**
|
|
207
211
|
* Parse a chapter title element
|
|
208
212
|
*/
|
|
209
|
-
parseChapterTitle
|
|
213
|
+
private parseChapterTitle;
|
|
210
214
|
/**
|
|
211
215
|
* Parse a chapter subtitle element
|
|
212
216
|
*/
|
|
213
|
-
parseChapterSubtitle
|
|
217
|
+
private parseChapterSubtitle;
|
|
214
218
|
/**
|
|
215
219
|
* Parse a header element
|
|
216
220
|
*/
|
|
217
|
-
parseHeader
|
|
221
|
+
private parseHeader;
|
|
218
222
|
/**
|
|
219
223
|
* Parse a bullet point item element
|
|
220
224
|
*/
|
|
221
|
-
parseBullet
|
|
225
|
+
private parseBullet;
|
|
222
226
|
/**
|
|
223
227
|
* Parse a ordered list point item element
|
|
224
228
|
*/
|
|
225
|
-
parseOrdered
|
|
229
|
+
private parseOrdered;
|
|
226
230
|
/**
|
|
227
231
|
* Parse a paragraph element
|
|
228
232
|
*/
|
|
229
|
-
parseParagraph
|
|
230
|
-
parseDynamicHeader
|
|
233
|
+
private parseParagraph;
|
|
234
|
+
private parseDynamicHeader;
|
|
231
235
|
/** Sanitize the text content by removing specific characters */
|
|
232
|
-
sanitizeText
|
|
236
|
+
private sanitizeText;
|
|
233
237
|
/**
|
|
234
238
|
* Parse a figure (image) title element
|
|
235
239
|
*/
|
|
236
|
-
parseFigureTitle
|
|
240
|
+
private parseFigureTitle;
|
|
237
241
|
/**
|
|
238
242
|
* Parse a ChangeBarPara element
|
|
239
243
|
*/
|
|
240
|
-
parseChangeBarPara
|
|
244
|
+
private parseChangeBarPara;
|
|
241
245
|
/**
|
|
242
246
|
* Parse a table title element
|
|
243
247
|
*/
|
|
244
|
-
parseTableTitle
|
|
248
|
+
private parseTableTitle;
|
|
245
249
|
/**
|
|
246
250
|
* Parse an unknown element. Currently does nothing besides printing a warning to the console.
|
|
247
251
|
*/
|
|
248
|
-
parseUnknownElement
|
|
252
|
+
private parseUnknownElement;
|
|
249
253
|
/**
|
|
250
254
|
* Parse a section title element
|
|
251
255
|
*/
|
|
252
|
-
parseSectionTitle
|
|
256
|
+
private parseSectionTitle;
|
|
253
257
|
/**
|
|
254
258
|
* Parses an `Element` as determined by its `className`
|
|
255
259
|
*
|
|
256
260
|
* @param element - The `Element` to be parsed
|
|
257
261
|
*/
|
|
258
|
-
parseElement
|
|
259
|
-
parseElement_doc
|
|
262
|
+
private parseElement;
|
|
263
|
+
private parseElement_doc;
|
|
260
264
|
/**
|
|
261
265
|
* Cleans up the HTML by calling certain helper methods
|
|
262
266
|
*/
|
|
263
|
-
sanitizeHTML
|
|
264
|
-
/**
|
|
265
|
-
* Replaces characters in the HTML as defined by the `replacementChars` parameter in the config
|
|
266
|
-
*/
|
|
267
|
-
replaceUnwantedChars(html: string): string;
|
|
267
|
+
private sanitizeHTML;
|
|
268
268
|
/**
|
|
269
269
|
* Replaces keywords in the HTML with links, as defined by the `replaceWithLinks` parameter in the config
|
|
270
270
|
*/
|
|
271
|
-
replaceKeywordsWithLinks
|
|
272
|
-
matchClassToExcludeNumber
|
|
273
|
-
sanitizeElement
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
isTransparentTable(element: Element): boolean;
|
|
271
|
+
private replaceKeywordsWithLinks;
|
|
272
|
+
private matchClassToExcludeNumber;
|
|
273
|
+
private sanitizeElement;
|
|
274
|
+
private getScaledWidth;
|
|
275
|
+
private isTransparentTable;
|
|
277
276
|
/**
|
|
278
277
|
* Extracts and calculates the column widths from a given HTML table element.
|
|
279
278
|
*
|
|
@@ -286,24 +285,24 @@ export declare class LicitConverter {
|
|
|
286
285
|
* @param {HTMLTableElement} table - The HTML table element from which column widths are to be extracted.
|
|
287
286
|
* @returns {number[] | undefined} An array of column widths in pixels, or `undefined` if the widths are invalid or missing.
|
|
288
287
|
*/
|
|
289
|
-
getColWidthArray
|
|
290
|
-
setCellWidth
|
|
291
|
-
scaleWidthArray
|
|
292
|
-
getSumOfArray
|
|
288
|
+
private getColWidthArray;
|
|
289
|
+
private setCellWidth;
|
|
290
|
+
private scaleWidthArray;
|
|
291
|
+
private getSumOfArray;
|
|
293
292
|
/**
|
|
294
293
|
* Determines the orientation (portrait or landscape) based on the total width.
|
|
295
294
|
*
|
|
296
295
|
* @param {number} totalWidth - The total width (in pixels) used to determine orientation.
|
|
297
296
|
* @returns {'portrait' | 'landscape'} Returns 'portrait' if the width is less than 700 pixels; otherwise, returns 'landscape'.
|
|
298
297
|
*/
|
|
299
|
-
findOrientation
|
|
298
|
+
private findOrientation;
|
|
300
299
|
/**
|
|
301
300
|
* Extracts image information from an HTMLImageElement.
|
|
302
301
|
*
|
|
303
302
|
* @param {HTMLImageElement} img - The image element to extract information from.
|
|
304
303
|
* @returns {{ src: string; alt: string; width: number; height: number }} An object containing the image's source URL, alt text, width, and height.
|
|
305
304
|
*/
|
|
306
|
-
extractImageInfo
|
|
305
|
+
private extractImageInfo;
|
|
307
306
|
/**
|
|
308
307
|
* Extracts note paragraphs from the last row of an HTML table if that row
|
|
309
308
|
* contains a note header such as "OVERALL NOTE:" or "NOTES:".
|
|
@@ -345,7 +344,7 @@ export declare class LicitConverter {
|
|
|
345
344
|
* @param {Element} node - The DOM element to check.
|
|
346
345
|
* @returns {boolean} `true` if the element qualifies as a table figure, otherwise `false`.
|
|
347
346
|
*/
|
|
348
|
-
isTableFigureNode
|
|
347
|
+
private isTableFigureNode;
|
|
349
348
|
/**
|
|
350
349
|
* Determines whether the provided class name corresponds to a note-related node.
|
|
351
350
|
*
|
package/licit-transform.js
CHANGED
|
@@ -260,14 +260,6 @@ export class LicitConverter {
|
|
|
260
260
|
}
|
|
261
261
|
return renderedArr;
|
|
262
262
|
}
|
|
263
|
-
/**
|
|
264
|
-
* Returns a map elements which were parsed.
|
|
265
|
-
*
|
|
266
|
-
* @returns Map of elements
|
|
267
|
-
*/
|
|
268
|
-
getElementsParsedMap() {
|
|
269
|
-
return this.elementsParsedMap;
|
|
270
|
-
}
|
|
271
263
|
getCustomStyle(styleName) {
|
|
272
264
|
return this.config.customStyles?.find((s) => s.styleName === styleName);
|
|
273
265
|
}
|
|
@@ -467,7 +459,7 @@ export class LicitConverter {
|
|
|
467
459
|
}
|
|
468
460
|
const childNode = children[j];
|
|
469
461
|
let nextChildNode = children[j + 1];
|
|
470
|
-
//
|
|
462
|
+
// Handling paragraph combining logic for the case where
|
|
471
463
|
// heading is inside <OL>/<UL> and content is outside
|
|
472
464
|
if (!nextChildNode &&
|
|
473
465
|
(node.tagName === 'OL' || node.tagName === 'UL') &&
|
|
@@ -972,6 +964,7 @@ export class LicitConverter {
|
|
|
972
964
|
renderDocTable(e, licitDocument) {
|
|
973
965
|
const licitTable = new LicitTableElement();
|
|
974
966
|
const colWidthsArray = this.getColWidthArray(e.node);
|
|
967
|
+
licitTable.noOfColumns = colWidthsArray?.length ?? 0;
|
|
975
968
|
const tableHead = e.node.querySelector('thead');
|
|
976
969
|
const table = e.node.querySelector('tbody');
|
|
977
970
|
licitTable.capco = getCapcoFromNode(table);
|
|
@@ -1183,6 +1176,7 @@ export class LicitConverter {
|
|
|
1183
1176
|
*/
|
|
1184
1177
|
parseTableContent(_e, tableTag, querySel, isChapterHeader, licitTable, widthArray, isTransparent) {
|
|
1185
1178
|
const rows = tableTag.querySelectorAll('tr');
|
|
1179
|
+
let totalTableHeight = 0;
|
|
1186
1180
|
for (let i = 0; i < rows.length; i++) {
|
|
1187
1181
|
if (!isTransparent &&
|
|
1188
1182
|
i == 0 &&
|
|
@@ -1191,11 +1185,21 @@ export class LicitConverter {
|
|
|
1191
1185
|
isChapterHeader = true;
|
|
1192
1186
|
}
|
|
1193
1187
|
const licitRow = new LicitTableRowElement();
|
|
1188
|
+
// ** Capture row height **
|
|
1189
|
+
const rowHeight = rows[i].getAttribute('height');
|
|
1190
|
+
if (rowHeight) {
|
|
1191
|
+
licitRow.height = rowHeight;
|
|
1192
|
+
licitRow.rowHeight = rowHeight;
|
|
1193
|
+
totalTableHeight += parseFloat(rowHeight);
|
|
1194
|
+
}
|
|
1194
1195
|
const cells = rows[i].querySelectorAll(querySel);
|
|
1195
1196
|
this.parseTableContentInnerLoopHelper(cells, i, isChapterHeader, licitRow, widthArray, isTransparent);
|
|
1196
1197
|
licitTable.addRow(licitRow);
|
|
1197
1198
|
isChapterHeader = false;
|
|
1198
1199
|
}
|
|
1200
|
+
if (totalTableHeight > 0) {
|
|
1201
|
+
licitTable.tableHeight = `${totalTableHeight}px`;
|
|
1202
|
+
}
|
|
1199
1203
|
}
|
|
1200
1204
|
parseTableContentInnerLoopHelper(cells, _cellIndex, isChapterHeader, licitRow, widthArray, isTransparent) {
|
|
1201
1205
|
for (let j = 0; j < cells.length; j++) {
|
|
@@ -1241,13 +1245,19 @@ export class LicitConverter {
|
|
|
1241
1245
|
let colWidth;
|
|
1242
1246
|
let licitCell = null;
|
|
1243
1247
|
const text = cell.textContent ?? '';
|
|
1248
|
+
// Extract cell-level style information**
|
|
1249
|
+
const cellStyleInfo = this.extractCellStyles(cell);
|
|
1250
|
+
if (widthArray?.length > 0) {
|
|
1251
|
+
const computedWidth = this.setCellWidth(colspan, cellIndex, widthArray);
|
|
1252
|
+
cellStyleInfo.cellWidth = computedWidth?.join(',');
|
|
1253
|
+
}
|
|
1244
1254
|
if (cell.childNodes?.length <= 0) {
|
|
1245
1255
|
//condition
|
|
1246
|
-
licitCell = new LicitTableCellParaElement(cell, bgColor, null, verAlign, isChapterHeader, isTransparent);
|
|
1256
|
+
licitCell = new LicitTableCellParaElement(cell, bgColor, null, verAlign, isChapterHeader, isTransparent, cellStyleInfo);
|
|
1247
1257
|
}
|
|
1248
1258
|
else if ('' === text &&
|
|
1249
1259
|
cell.childNodes[0].querySelector('img')) {
|
|
1250
|
-
({ licitCell } = this.addTableImageCell(cell, bgColor, isChapterHeader, licitCell, verAlign));
|
|
1260
|
+
({ licitCell } = this.addTableImageCell(cell, bgColor, isChapterHeader, licitCell, verAlign, cellStyleInfo));
|
|
1251
1261
|
}
|
|
1252
1262
|
else {
|
|
1253
1263
|
if (isChapterHeader) {
|
|
@@ -1255,7 +1265,7 @@ export class LicitConverter {
|
|
|
1255
1265
|
cell.align = 'center'; // NOSONAR used by Licit parser (depricated)
|
|
1256
1266
|
cell.setAttribute('classname', 'LC-Center');
|
|
1257
1267
|
}
|
|
1258
|
-
licitCell = new LicitTableCellParaElement(cell, bgColor, colWidth, verAlign, isChapterHeader, isTransparent);
|
|
1268
|
+
licitCell = new LicitTableCellParaElement(cell, bgColor, colWidth, verAlign, isChapterHeader, isTransparent, cellStyleInfo);
|
|
1259
1269
|
}
|
|
1260
1270
|
licitCell.rowspan = rowspan;
|
|
1261
1271
|
licitCell.colspan = colspan;
|
|
@@ -1264,6 +1274,80 @@ export class LicitConverter {
|
|
|
1264
1274
|
}
|
|
1265
1275
|
licitRow.addCell(licitCell);
|
|
1266
1276
|
}
|
|
1277
|
+
/**
|
|
1278
|
+
* Extracts style information from a table cell element per the ingest requirements.
|
|
1279
|
+
* Captures: margins (top/bottom), font-size overrides, and letter-spacing for non-breaking spaces.
|
|
1280
|
+
*
|
|
1281
|
+
* @param cell - The HTMLTableCellElement to extract styles from
|
|
1282
|
+
* @returns Object containing extracted style information
|
|
1283
|
+
*/
|
|
1284
|
+
extractCellStyles(cell) {
|
|
1285
|
+
const styleInfo = {};
|
|
1286
|
+
// Capture class and ID from the paragraph inside the cell
|
|
1287
|
+
const paragraph = cell.querySelector('p');
|
|
1288
|
+
if (paragraph) {
|
|
1289
|
+
if (paragraph.className) {
|
|
1290
|
+
styleInfo.className = paragraph.className;
|
|
1291
|
+
}
|
|
1292
|
+
if (paragraph.id) {
|
|
1293
|
+
styleInfo.id = paragraph.id;
|
|
1294
|
+
}
|
|
1295
|
+
// Extract style attributes from the paragraph's style attribute
|
|
1296
|
+
const style = paragraph.getAttribute('style');
|
|
1297
|
+
if (style) {
|
|
1298
|
+
this.extractParagraphStyles(style, styleInfo);
|
|
1299
|
+
}
|
|
1300
|
+
// Extract letter-spacing for non-breaking spaces
|
|
1301
|
+
const spans = paragraph.querySelectorAll('span[style*="letter-spacing"]');
|
|
1302
|
+
this.extractLetterSpacing(spans, styleInfo);
|
|
1303
|
+
}
|
|
1304
|
+
return styleInfo;
|
|
1305
|
+
}
|
|
1306
|
+
/**
|
|
1307
|
+
* Extracts margin and font-size properties from a style string.
|
|
1308
|
+
*
|
|
1309
|
+
* @param style - The style attribute string
|
|
1310
|
+
* @param styleInfo - The style info object to populate
|
|
1311
|
+
*/
|
|
1312
|
+
extractParagraphStyles(style, styleInfo) {
|
|
1313
|
+
const styleProps = style.split(';');
|
|
1314
|
+
for (const prop of styleProps) {
|
|
1315
|
+
const trimmedProp = prop.trim();
|
|
1316
|
+
if (trimmedProp.startsWith('margin-top')) {
|
|
1317
|
+
styleInfo.marginTop = trimmedProp.split(':')[1]?.trim();
|
|
1318
|
+
}
|
|
1319
|
+
else if (trimmedProp.startsWith('margin-bottom')) {
|
|
1320
|
+
styleInfo.marginBottom = trimmedProp.split(':')[1]?.trim();
|
|
1321
|
+
}
|
|
1322
|
+
else if (trimmedProp.startsWith('font-size')) {
|
|
1323
|
+
styleInfo.fontSize = trimmedProp.split(':')[1]?.trim();
|
|
1324
|
+
}
|
|
1325
|
+
}
|
|
1326
|
+
}
|
|
1327
|
+
/**
|
|
1328
|
+
* Extracts the first letter-spacing value from spans containing non-breaking spaces.
|
|
1329
|
+
*
|
|
1330
|
+
* @param spans - NodeList of span elements with letter-spacing styles
|
|
1331
|
+
* @param styleInfo - The style info object to populate
|
|
1332
|
+
*/
|
|
1333
|
+
extractLetterSpacing(spans, styleInfo) {
|
|
1334
|
+
const letterSpacingRegex = /letter-spacing\s{0,1000}:\s{0,1000}([^;]{1,1000})/;
|
|
1335
|
+
for (const span of Array.from(spans)) {
|
|
1336
|
+
// Check if this span contains a non-breaking space
|
|
1337
|
+
const content = span.innerHTML;
|
|
1338
|
+
if (content.includes(' ') || content.includes(' ')) {
|
|
1339
|
+
const spanStyle = span.getAttribute('style');
|
|
1340
|
+
if (spanStyle) {
|
|
1341
|
+
const match = letterSpacingRegex.exec(spanStyle);
|
|
1342
|
+
if (match) {
|
|
1343
|
+
// Store the first letter-spacing value found
|
|
1344
|
+
styleInfo.letterSpacing = match[1].trim();
|
|
1345
|
+
break;
|
|
1346
|
+
}
|
|
1347
|
+
}
|
|
1348
|
+
}
|
|
1349
|
+
}
|
|
1350
|
+
}
|
|
1267
1351
|
checkCellStyle(style) {
|
|
1268
1352
|
let borderColor = null;
|
|
1269
1353
|
if (style != null) {
|
|
@@ -1282,7 +1366,7 @@ export class LicitConverter {
|
|
|
1282
1366
|
}
|
|
1283
1367
|
return borderColor;
|
|
1284
1368
|
}
|
|
1285
|
-
addTableImageCell(cell, bgColor, isChapterHeader, licitCell, verAlign) {
|
|
1369
|
+
addTableImageCell(cell, bgColor, isChapterHeader, licitCell, verAlign, cellStyleInfo) {
|
|
1286
1370
|
const image = cell.childNodes[0].querySelector('img');
|
|
1287
1371
|
let altText = null;
|
|
1288
1372
|
let imgHeight = null;
|
|
@@ -1303,10 +1387,10 @@ export class LicitConverter {
|
|
|
1303
1387
|
const source = image?.getAttribute('srcRelative') ?? image?.src;
|
|
1304
1388
|
if (source) {
|
|
1305
1389
|
// seybi excluded image
|
|
1306
|
-
licitCell = new LicitTableCellImageElement(source, fillImg, fitoParent, bgColor, imgHeight, colWidth, altText);
|
|
1390
|
+
licitCell = new LicitTableCellImageElement(source, fillImg, fitoParent, bgColor, imgHeight, colWidth, altText, cellStyleInfo);
|
|
1307
1391
|
}
|
|
1308
1392
|
else {
|
|
1309
|
-
licitCell = new LicitTableCellParagraph(cell, bgColor, colWidth, verAlign);
|
|
1393
|
+
licitCell = new LicitTableCellParagraph(cell, bgColor, colWidth, verAlign, cellStyleInfo);
|
|
1310
1394
|
}
|
|
1311
1395
|
return { bgColor, isChapterHeader, licitCell };
|
|
1312
1396
|
}
|
|
@@ -1894,16 +1978,6 @@ export class LicitConverter {
|
|
|
1894
1978
|
sanitizeHTML(html) {
|
|
1895
1979
|
return this.replaceKeywordsWithLinks(html);
|
|
1896
1980
|
}
|
|
1897
|
-
/**
|
|
1898
|
-
* Replaces characters in the HTML as defined by the `replacementChars` parameter in the config
|
|
1899
|
-
*/
|
|
1900
|
-
replaceUnwantedChars(html) {
|
|
1901
|
-
const chars = this.config.replacementChars;
|
|
1902
|
-
for (const char of chars) {
|
|
1903
|
-
html = html.replace(char.find, char.replace);
|
|
1904
|
-
}
|
|
1905
|
-
return html;
|
|
1906
|
-
}
|
|
1907
1981
|
/**
|
|
1908
1982
|
* Replaces keywords in the HTML with links, as defined by the `replaceWithLinks` parameter in the config
|
|
1909
1983
|
*/
|
|
@@ -1916,7 +1990,7 @@ export class LicitConverter {
|
|
|
1916
1990
|
}
|
|
1917
1991
|
return html;
|
|
1918
1992
|
}
|
|
1919
|
-
//
|
|
1993
|
+
// For skipping triming inside table, add more classes to the class list for future use
|
|
1920
1994
|
matchClassToExcludeNumber(className) {
|
|
1921
1995
|
let trimmedClassName = className.trim();
|
|
1922
1996
|
trimmedClassName = trimmedClassName.toLowerCase();
|
|
@@ -1950,14 +2024,6 @@ export class LicitConverter {
|
|
|
1950
2024
|
};
|
|
1951
2025
|
stripTextContent(element);
|
|
1952
2026
|
}
|
|
1953
|
-
removeLastNumber(inputString) {
|
|
1954
|
-
let lastNonDigitIndex = inputString.length - 1;
|
|
1955
|
-
while (lastNonDigitIndex >= 0 &&
|
|
1956
|
-
!Number.isNaN(Number.parseInt(inputString[lastNonDigitIndex]))) {
|
|
1957
|
-
lastNonDigitIndex--;
|
|
1958
|
-
}
|
|
1959
|
-
return inputString.slice(0, lastNonDigitIndex + 1);
|
|
1960
|
-
}
|
|
1961
2027
|
getScaledWidth(width) {
|
|
1962
2028
|
if (width <= 200) {
|
|
1963
2029
|
return width.toString();
|
package/package.json
CHANGED
|
@@ -1,52 +1,52 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@modusoperandi/licit-import-utils",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"license": "MIT",
|
|
5
|
-
"type": "module",
|
|
6
|
-
"subversion": "1",
|
|
7
|
-
"description": "A utility package for importing files like json or docx into Licit compatible documents",
|
|
8
|
-
"main": "index.js",
|
|
9
|
-
"types": "index.d.ts",
|
|
10
|
-
"repository": {
|
|
11
|
-
"type": "git",
|
|
12
|
-
"url": "git+https://github.com/MO-Movia/licit-import-utils.git"
|
|
13
|
-
},
|
|
14
|
-
"scripts": {
|
|
15
|
-
"test": "jest",
|
|
16
|
-
"test:unit": "jest",
|
|
17
|
-
"test:coverage": "jest --env=jsdom --coverage",
|
|
18
|
-
"build:clean": "rm -rf dist/ && rm -f modusoperandi-*.*.*.tgz",
|
|
19
|
-
"lint": "eslint src",
|
|
20
|
-
"ci:build": "tsc -b tsconfig.prod.json --clean && tsc -b tsconfig.prod.json && npx copyfiles@2.4.1 package.json LICENSE dist",
|
|
21
|
-
"ci:bom": "npx @cyclonedx/cyclonedx-npm --ignore-npm-errors --short-PURLs --output-format XML --output-file dist/bom.xml",
|
|
22
|
-
"verify": "npm run lint -- --fix && npm run ci:build && npm run test:coverage && echo 'All Tests Passed!'"
|
|
23
|
-
},
|
|
24
|
-
"peerDependencies": {
|
|
25
|
-
"@modusoperandi/mammoth": "^1.7.0-6",
|
|
26
|
-
"jszip": "^3.10.1"
|
|
27
|
-
},
|
|
28
|
-
"peerDependenciesMeta": {
|
|
29
|
-
"@modusoperandi/mammoth": {
|
|
30
|
-
"optional": true
|
|
31
|
-
},
|
|
32
|
-
"jszip": {
|
|
33
|
-
"optional": true
|
|
34
|
-
}
|
|
35
|
-
},
|
|
36
|
-
"dependencies": {
|
|
37
|
-
"uuid": "^13.0.0"
|
|
38
|
-
},
|
|
39
|
-
"devDependencies": {
|
|
40
|
-
"@modusoperandi/mammoth": "^1.7.0-
|
|
41
|
-
"@modusoperandi/eslint-config": "^3.0.3",
|
|
42
|
-
"@types/jest": "^30.0.0",
|
|
43
|
-
"jszip": "^3.10.1",
|
|
44
|
-
"eslint": "^9.39.
|
|
45
|
-
"jest": "^30.2.0",
|
|
46
|
-
"jest-environment-jsdom": "^30.2.0",
|
|
47
|
-
"jest-junit": "^16.0.0",
|
|
48
|
-
"ts-jest": "^29.4.6",
|
|
49
|
-
"ts-node": "^10.9.2",
|
|
50
|
-
"typescript": "^5.9.3"
|
|
51
|
-
}
|
|
52
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "@modusoperandi/licit-import-utils",
|
|
3
|
+
"version": "0.1.2",
|
|
4
|
+
"license": "MIT",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"subversion": "1",
|
|
7
|
+
"description": "A utility package for importing files like json or docx into Licit compatible documents",
|
|
8
|
+
"main": "index.js",
|
|
9
|
+
"types": "index.d.ts",
|
|
10
|
+
"repository": {
|
|
11
|
+
"type": "git",
|
|
12
|
+
"url": "git+https://github.com/MO-Movia/licit-import-utils.git"
|
|
13
|
+
},
|
|
14
|
+
"scripts": {
|
|
15
|
+
"test": "jest",
|
|
16
|
+
"test:unit": "jest",
|
|
17
|
+
"test:coverage": "jest --env=jsdom --coverage",
|
|
18
|
+
"build:clean": "rm -rf dist/ && rm -f modusoperandi-*.*.*.tgz",
|
|
19
|
+
"lint": "eslint src",
|
|
20
|
+
"ci:build": "tsc -b tsconfig.prod.json --clean && tsc -b tsconfig.prod.json && npx copyfiles@2.4.1 package.json LICENSE README.md dist",
|
|
21
|
+
"ci:bom": "npx @cyclonedx/cyclonedx-npm --ignore-npm-errors --short-PURLs --output-format XML --output-file dist/bom.xml",
|
|
22
|
+
"verify": "npm run lint -- --fix && npm run ci:build && npm run test:coverage && echo 'All Tests Passed!'"
|
|
23
|
+
},
|
|
24
|
+
"peerDependencies": {
|
|
25
|
+
"@modusoperandi/mammoth": "^1.7.0-6",
|
|
26
|
+
"jszip": "^3.10.1"
|
|
27
|
+
},
|
|
28
|
+
"peerDependenciesMeta": {
|
|
29
|
+
"@modusoperandi/mammoth": {
|
|
30
|
+
"optional": true
|
|
31
|
+
},
|
|
32
|
+
"jszip": {
|
|
33
|
+
"optional": true
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
"dependencies": {
|
|
37
|
+
"uuid": "^13.0.0"
|
|
38
|
+
},
|
|
39
|
+
"devDependencies": {
|
|
40
|
+
"@modusoperandi/mammoth": "^1.7.0-8",
|
|
41
|
+
"@modusoperandi/eslint-config": "^3.0.3",
|
|
42
|
+
"@types/jest": "^30.0.0",
|
|
43
|
+
"jszip": "^3.10.1",
|
|
44
|
+
"eslint": "^9.39.3",
|
|
45
|
+
"jest": "^30.2.0",
|
|
46
|
+
"jest-environment-jsdom": "^30.2.0",
|
|
47
|
+
"jest-junit": "^16.0.0",
|
|
48
|
+
"ts-jest": "^29.4.6",
|
|
49
|
+
"ts-node": "^10.9.2",
|
|
50
|
+
"typescript": "^5.9.3"
|
|
51
|
+
}
|
|
52
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license MIT
|
|
3
|
+
* @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
|
|
4
|
+
*/
|
|
5
|
+
import type { Message } from './types';
|
|
6
|
+
export declare function extractStylesForDoc(arrayBuffer: ArrayBuffer, docType: string): Promise<{
|
|
7
|
+
styles: string[];
|
|
8
|
+
}>;
|
|
9
|
+
export declare function extractUniqueStyleIds(data: Message[]): string[];
|
|
10
|
+
export declare function extractStylesForJSON(arrayBuffer: ArrayBuffer): Promise<{
|
|
11
|
+
content: string;
|
|
12
|
+
styles: string[];
|
|
13
|
+
}>;
|
|
14
|
+
export declare function collectStyles(obj: unknown, styles?: string[]): string[];
|
|
15
|
+
export declare function processHTML(arrayBuffer: ArrayBuffer): Promise<{
|
|
16
|
+
styles: string[];
|
|
17
|
+
}>;
|
|
18
|
+
export declare function extractStylesFromZip(zipFile: File): Promise<{
|
|
19
|
+
styles: string[];
|
|
20
|
+
}>;
|
|
21
|
+
export declare function arrayBufferToString(arrayBuffer: ArrayBuffer): string;
|
|
22
|
+
export declare function extractStyleNamesFromHTML(doc: Document): string[];
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license MIT
|
|
3
|
+
* @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
|
|
4
|
+
*/
|
|
5
|
+
import JSZip from 'jszip';
|
|
6
|
+
import { DocxTransformer } from './transform.docx';
|
|
7
|
+
export async function extractStylesForDoc(arrayBuffer, docType) {
|
|
8
|
+
const messages = [];
|
|
9
|
+
// Convert the ArrayBuffer to HTML using Mammoth.js
|
|
10
|
+
await new DocxTransformer(docType, (type, message) => messages.push({ type, message })).transform(arrayBuffer);
|
|
11
|
+
// Extract styles from the HTML (adapt as needed for your styling approach)
|
|
12
|
+
const styles = extractUniqueStyleIds(messages);
|
|
13
|
+
return { styles };
|
|
14
|
+
}
|
|
15
|
+
export function extractUniqueStyleIds(data) {
|
|
16
|
+
const styleIds = [];
|
|
17
|
+
data ??= [];
|
|
18
|
+
for (const item of data) {
|
|
19
|
+
const match = new RegExp(/Style ID: (.{0,100}?)(?=\))/).exec(item.message);
|
|
20
|
+
const styleId = match?.[1];
|
|
21
|
+
if (styleId && !styleIds.includes(styleId)) {
|
|
22
|
+
styleIds.push(styleId);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return styleIds;
|
|
26
|
+
}
|
|
27
|
+
export function extractStylesForJSON(arrayBuffer) {
|
|
28
|
+
const decoder = new TextDecoder('utf-8');
|
|
29
|
+
const content = decoder.decode(arrayBuffer);
|
|
30
|
+
const jsonObject = JSON.parse(content);
|
|
31
|
+
if (typeof jsonObject !== 'object' || jsonObject === null) {
|
|
32
|
+
throw new Error('Invalid JSON document');
|
|
33
|
+
}
|
|
34
|
+
const styles = [];
|
|
35
|
+
collectStyles(jsonObject, styles);
|
|
36
|
+
return Promise.resolve({ content, styles });
|
|
37
|
+
}
|
|
38
|
+
// Preprocessor to handle the JSON formatted documents
|
|
39
|
+
export function collectStyles(obj, styles = []) {
|
|
40
|
+
if (typeof obj !== 'object' || obj === null) {
|
|
41
|
+
return styles;
|
|
42
|
+
}
|
|
43
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
44
|
+
if (typeof value === 'object' && value !== null) {
|
|
45
|
+
// Recursively traverse nested objects
|
|
46
|
+
collectStyles(value, styles);
|
|
47
|
+
}
|
|
48
|
+
else if (key === 'styleName' &&
|
|
49
|
+
typeof value === 'string' &&
|
|
50
|
+
!styles.includes(value)) {
|
|
51
|
+
// Add the style name to the list if it's not already included
|
|
52
|
+
styles.push(value);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return styles;
|
|
56
|
+
}
|
|
57
|
+
export function processHTML(arrayBuffer) {
|
|
58
|
+
return new Promise((resolve) => {
|
|
59
|
+
const content = arrayBufferToString(arrayBuffer);
|
|
60
|
+
// Use DOMParser to parse HTML content
|
|
61
|
+
const parser = new DOMParser();
|
|
62
|
+
const doc = parser.parseFromString(content, 'text/html');
|
|
63
|
+
// Extract style names using regular expressions
|
|
64
|
+
const styleNames = extractStyleNamesFromHTML(doc);
|
|
65
|
+
resolve({ styles: styleNames });
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
export async function extractStylesFromZip(zipFile) {
|
|
69
|
+
const MAX_FILES = 10000;
|
|
70
|
+
const MAX_SIZE = 1073741824; // 1 GB
|
|
71
|
+
if (zipFile.size > MAX_SIZE &&
|
|
72
|
+
!confirm(`zip is ${zipFile.size / MAX_SIZE} GB. continue?`)) {
|
|
73
|
+
throw new Error('Size of the file is more than the limit 25 mb');
|
|
74
|
+
}
|
|
75
|
+
const loadedZip = await JSZip.loadAsync(zipFile); //NOSONAR size validated. Safe to extract.
|
|
76
|
+
// Check if the total number of files exceeds the limit
|
|
77
|
+
const totalFiles = Object.keys(loadedZip.files).length;
|
|
78
|
+
if (totalFiles > MAX_FILES &&
|
|
79
|
+
!confirm(`zip contains an excessive ${totalFiles} files. continue?`)) {
|
|
80
|
+
throw new Error(`Number of files in the zip (${totalFiles}) exceeds the limit (${MAX_FILES})`);
|
|
81
|
+
}
|
|
82
|
+
const htmlFiles = Object.keys(loadedZip.files).filter((fileName) => fileName.endsWith('.htm'));
|
|
83
|
+
let combinedStyles = [];
|
|
84
|
+
for (const fileName of htmlFiles) {
|
|
85
|
+
const arrayBuffer = await loadedZip.files[fileName].async('arraybuffer');
|
|
86
|
+
const { styles } = await processHTML(arrayBuffer);
|
|
87
|
+
// Combine styles
|
|
88
|
+
combinedStyles = [...new Set([...combinedStyles, ...styles])];
|
|
89
|
+
}
|
|
90
|
+
return { styles: combinedStyles };
|
|
91
|
+
}
|
|
92
|
+
export function arrayBufferToString(arrayBuffer) {
|
|
93
|
+
return new TextDecoder().decode(new Uint8Array(arrayBuffer));
|
|
94
|
+
}
|
|
95
|
+
export function extractStyleNamesFromHTML(doc) {
|
|
96
|
+
const styleNames = [];
|
|
97
|
+
// Extract class names from HTML elements and add to style names
|
|
98
|
+
const elementsWithClass = doc.querySelectorAll('[class]');
|
|
99
|
+
for (const element of Array.from(elementsWithClass)) {
|
|
100
|
+
const classes = element.className.split(/\s{1,100}/); // Split by whitespace
|
|
101
|
+
styleNames.push(...classes);
|
|
102
|
+
}
|
|
103
|
+
// Return unique style names
|
|
104
|
+
return [...new Set(styleNames)];
|
|
105
|
+
}
|
package/transform.zip.js
CHANGED
|
@@ -154,25 +154,32 @@ async function loopHTMLFiles(htmlFiles, updateSrc) {
|
|
|
154
154
|
const processedHtmlContents = (await Promise.all(htmlFiles.files
|
|
155
155
|
.filter((htmlFile) => !!htmlFile)
|
|
156
156
|
.map((f) => processFile(f, htmlFiles.imageFiles, updateSrc)))).filter((x) => x?.length);
|
|
157
|
+
if (processedHtmlContents.length === 0 && htmlFiles.files.length > 0) {
|
|
158
|
+
throw new Error(`File contents are empty`);
|
|
159
|
+
}
|
|
157
160
|
return sortedNodeList(processedHtmlContents);
|
|
158
161
|
}
|
|
159
162
|
async function processFile(file, imageFiles, updateSrc) {
|
|
160
163
|
const htmlContent = await file.content();
|
|
161
164
|
const htmlFileName = file.name ?? 'Unknown file';
|
|
165
|
+
// Reject files with zero bytes
|
|
166
|
+
if (!htmlContent?.length) {
|
|
167
|
+
throw new Error(`File ${htmlFileName} has zero bytes`);
|
|
168
|
+
}
|
|
162
169
|
// Get content before <head> (first 1000 chars should be enough)
|
|
163
170
|
const beforeHead = htmlContent.substring(0, 1000);
|
|
164
|
-
//
|
|
171
|
+
// Reject old DOCTYPE declarations
|
|
165
172
|
if (beforeHead.includes('<!DOCTYPE HTML PUBLIC')) {
|
|
166
|
-
throw new Error(`Incorrect file format: ${htmlFileName}`);
|
|
173
|
+
throw new Error(`Incorrect file format (was "!DOCTYPE HTML PUBLIC"): ${htmlFileName}`);
|
|
167
174
|
}
|
|
168
|
-
//
|
|
175
|
+
// Reject XML declarations (XHTML format)
|
|
169
176
|
if (beforeHead.trimStart().startsWith('<?xml')) {
|
|
170
|
-
throw new Error(`Incorrect file format: ${htmlFileName}`);
|
|
177
|
+
throw new Error(`Incorrect file format (was "XHTML"): ${htmlFileName}`);
|
|
171
178
|
}
|
|
172
|
-
//
|
|
179
|
+
// Must have <html lang="...">
|
|
173
180
|
// Option A: Exact match for en-US
|
|
174
181
|
if (!beforeHead.includes('<html lang="en-US">')) {
|
|
175
|
-
throw new Error(`Incorrect file format: ${htmlFileName}`);
|
|
182
|
+
throw new Error(`Incorrect file format (missing "<html lang=..."): ${htmlFileName}`);
|
|
176
183
|
}
|
|
177
184
|
const domCollection = new DOMParser().parseFromString(htmlContent, 'text/html');
|
|
178
185
|
//Get the title text
|