@modusoperandi/licit-import-utils 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +2 -0
- package/index.d.ts +3 -0
- package/index.js +2 -0
- package/licit-transform.d.ts +74 -97
- package/licit-transform.js +2 -28
- package/package.json +52 -52
- package/preprocess.utils.d.ts +22 -0
- package/preprocess.utils.js +105 -0
- package/transform.zip.js +13 -6
package/LICENSE
CHANGED
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2026 Modus Operandi Inc.
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Modus Operandi Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
package/index.d.ts
CHANGED
|
@@ -4,5 +4,8 @@
|
|
|
4
4
|
*/
|
|
5
5
|
export * from './types';
|
|
6
6
|
export * from './licit-transform';
|
|
7
|
+
export * from './preprocess.utils';
|
|
7
8
|
export * from './transform.docx';
|
|
9
|
+
export * from './transform.utils';
|
|
8
10
|
export * from './transform.zip';
|
|
11
|
+
export { LicitDocumentJSON, LicitElementJSON } from './licit-elements';
|
package/index.js
CHANGED
package/licit-transform.d.ts
CHANGED
|
@@ -3,8 +3,6 @@
|
|
|
3
3
|
* @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
|
|
4
4
|
*/
|
|
5
5
|
import type { LicitDocumentJSON } from './licit-elements';
|
|
6
|
-
import { LicitBulletListElement, LicitDocumentElement, LicitEnhancedImageElement, LicitTableRowElement } from './licit-elements';
|
|
7
|
-
import type { UpdatedCapco } from './capco.util';
|
|
8
6
|
import type { MessageSink } from './types';
|
|
9
7
|
export interface ParserElement {
|
|
10
8
|
node: Element;
|
|
@@ -13,12 +11,6 @@ export interface ParserElement {
|
|
|
13
11
|
level: number;
|
|
14
12
|
subText: string;
|
|
15
13
|
}
|
|
16
|
-
interface ImageInfo {
|
|
17
|
-
src: string;
|
|
18
|
-
alt: string;
|
|
19
|
-
width: number;
|
|
20
|
-
height: number;
|
|
21
|
-
}
|
|
22
14
|
declare enum ParserElementType {
|
|
23
15
|
ChapterTitle = 0,
|
|
24
16
|
ChapterSubtitle = 1,
|
|
@@ -88,73 +80,63 @@ export interface AddCellOptions {
|
|
|
88
80
|
}
|
|
89
81
|
export declare class LicitConverter {
|
|
90
82
|
private readonly config;
|
|
91
|
-
|
|
92
|
-
elements
|
|
83
|
+
private readonly elementsParsedMap;
|
|
84
|
+
private elements;
|
|
93
85
|
constructor(config: TransformConfig);
|
|
94
86
|
parseHTML(html: Document, isDoctorine: boolean, moDocType?: string): LicitDocumentJSON;
|
|
95
87
|
parseFrameMakerHTML5(html: Element[]): LicitDocumentJSON;
|
|
96
|
-
render_FrameMakerHTML5_zip
|
|
97
|
-
render_FrameMakerHTML5_zip_SwitchHelper
|
|
88
|
+
private render_FrameMakerHTML5_zip;
|
|
89
|
+
private render_FrameMakerHTML5_zip_SwitchHelper;
|
|
98
90
|
private handleNodes;
|
|
99
|
-
fetchRenderedContent
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
*
|
|
103
|
-
* @returns Map of elements
|
|
104
|
-
*/
|
|
105
|
-
getElementsParsedMap(): Map<string, boolean>;
|
|
106
|
-
getCustomStyle(styleName: string): StyleInfo | undefined;
|
|
107
|
-
handleOrderedListItem(e: ParserElement, licitDocument: LicitDocumentElement): void;
|
|
91
|
+
private fetchRenderedContent;
|
|
92
|
+
private getCustomStyle;
|
|
93
|
+
private handleOrderedListItem;
|
|
108
94
|
/**
|
|
109
95
|
* Renders the HTML as a Licit JSON structure
|
|
110
96
|
*
|
|
111
97
|
* @returns The document as an `LicitDocumentJSON` object
|
|
112
98
|
*/
|
|
113
|
-
render
|
|
114
|
-
renderSwitchHelper
|
|
99
|
+
private render;
|
|
100
|
+
private renderSwitchHelper;
|
|
115
101
|
private renderTable;
|
|
116
102
|
private renderParagraph;
|
|
117
103
|
private renderHeader;
|
|
118
104
|
private buildElements;
|
|
119
|
-
checkChildNode
|
|
120
|
-
render_doc
|
|
121
|
-
render_docSwitchHelper
|
|
122
|
-
renderTypeParagraph
|
|
123
|
-
handle_UrlText
|
|
124
|
-
text_WithoutUrl
|
|
105
|
+
private checkChildNode;
|
|
106
|
+
private render_doc;
|
|
107
|
+
private render_docSwitchHelper;
|
|
108
|
+
private renderTypeParagraph;
|
|
109
|
+
private handle_UrlText;
|
|
110
|
+
private text_WithoutUrl;
|
|
125
111
|
private handleNode;
|
|
126
|
-
mergeSpans
|
|
127
|
-
updateChildCapcoContent
|
|
128
|
-
updateChildCapcoContentLoopHelper
|
|
129
|
-
processChildNodesCapco
|
|
130
|
-
updateCapcoToParagraph
|
|
131
|
-
processTableCapco
|
|
132
|
-
figureTitleCase
|
|
133
|
-
handleImageChild
|
|
134
|
-
renderNewFigureTitle
|
|
135
|
-
figureParagraphCase
|
|
136
|
-
figureNoteCase
|
|
137
|
-
figureTableTitleCase
|
|
112
|
+
private mergeSpans;
|
|
113
|
+
private updateChildCapcoContent;
|
|
114
|
+
private updateChildCapcoContentLoopHelper;
|
|
115
|
+
private processChildNodesCapco;
|
|
116
|
+
private updateCapcoToParagraph;
|
|
117
|
+
private processTableCapco;
|
|
118
|
+
private figureTitleCase;
|
|
119
|
+
private handleImageChild;
|
|
120
|
+
private renderNewFigureTitle;
|
|
121
|
+
private figureParagraphCase;
|
|
122
|
+
private figureNoteCase;
|
|
123
|
+
private figureTableTitleCase;
|
|
138
124
|
private renderDocVignet;
|
|
139
125
|
private parseUntypedDocVignet;
|
|
140
126
|
private parseTypedDocVignet;
|
|
141
|
-
parseTypedDocVignetHelper
|
|
142
|
-
bgColor: string;
|
|
143
|
-
borderColor: string;
|
|
144
|
-
boxWidth: number;
|
|
145
|
-
};
|
|
127
|
+
private parseTypedDocVignetHelper;
|
|
146
128
|
private renderDocTable;
|
|
147
129
|
private renderEnhancedTable;
|
|
148
130
|
private getLicitTable;
|
|
149
|
-
renderNewLicitImage
|
|
150
|
-
renderDocBulletItems
|
|
151
|
-
processBulletNodes
|
|
152
|
-
addElementLicit
|
|
153
|
-
removeEmptyATags
|
|
131
|
+
private renderNewLicitImage;
|
|
132
|
+
private renderDocBulletItems;
|
|
133
|
+
private processBulletNodes;
|
|
134
|
+
private addElementLicit;
|
|
135
|
+
private removeEmptyATags;
|
|
154
136
|
private handleULNode;
|
|
155
137
|
private renderDocFigure;
|
|
156
|
-
renderImage
|
|
157
|
-
parseOL
|
|
138
|
+
private renderImage;
|
|
139
|
+
private parseOL;
|
|
158
140
|
/**
|
|
159
141
|
* To parse table data
|
|
160
142
|
* @param e - element
|
|
@@ -166,114 +148,109 @@ export declare class LicitConverter {
|
|
|
166
148
|
* @param isTransparent - flag to distinguish preface table
|
|
167
149
|
* @returns void
|
|
168
150
|
*/
|
|
169
|
-
parseTableContent
|
|
170
|
-
parseTableContentInnerLoopHelper
|
|
151
|
+
private parseTableContent;
|
|
152
|
+
private parseTableContentInnerLoopHelper;
|
|
171
153
|
private addCell;
|
|
172
|
-
checkCellStyle
|
|
154
|
+
private checkCellStyle;
|
|
173
155
|
private addTableImageCell;
|
|
174
|
-
ParseNestedList
|
|
156
|
+
private ParseNestedList;
|
|
175
157
|
/**
|
|
176
158
|
* Returns the level of an element as described by the number at the end of its classname
|
|
177
159
|
*
|
|
178
160
|
* @param className - The className of the element
|
|
179
161
|
* @returns The level as a number or zero if the level cannot be determined
|
|
180
162
|
*/
|
|
181
|
-
extractLevel
|
|
163
|
+
private extractLevel;
|
|
182
164
|
/**
|
|
183
165
|
* Determines if an element is a table or image then calls the appropriate parse method
|
|
184
166
|
*/
|
|
185
|
-
parseTableFigure
|
|
167
|
+
private parseTableFigure;
|
|
186
168
|
/**
|
|
187
169
|
* Parse a table element
|
|
188
170
|
*/
|
|
189
|
-
parseTable
|
|
171
|
+
private parseTable;
|
|
190
172
|
/**
|
|
191
173
|
* Parse a table element
|
|
192
174
|
*/
|
|
193
|
-
parseVignet
|
|
175
|
+
private parseVignet;
|
|
194
176
|
/**
|
|
195
177
|
* Parse a figure (image) element
|
|
196
178
|
*/
|
|
197
|
-
parseFigure
|
|
179
|
+
private parseFigure;
|
|
198
180
|
/**
|
|
199
181
|
* Parse a note element
|
|
200
182
|
*/
|
|
201
|
-
parseNote
|
|
183
|
+
private parseNote;
|
|
202
184
|
/**
|
|
203
185
|
* Parse a hr element
|
|
204
186
|
*/
|
|
205
|
-
parseHR
|
|
187
|
+
private parseHR;
|
|
206
188
|
/**
|
|
207
189
|
* Parse a chapter title element
|
|
208
190
|
*/
|
|
209
|
-
parseChapterTitle
|
|
191
|
+
private parseChapterTitle;
|
|
210
192
|
/**
|
|
211
193
|
* Parse a chapter subtitle element
|
|
212
194
|
*/
|
|
213
|
-
parseChapterSubtitle
|
|
195
|
+
private parseChapterSubtitle;
|
|
214
196
|
/**
|
|
215
197
|
* Parse a header element
|
|
216
198
|
*/
|
|
217
|
-
parseHeader
|
|
199
|
+
private parseHeader;
|
|
218
200
|
/**
|
|
219
201
|
* Parse a bullet point item element
|
|
220
202
|
*/
|
|
221
|
-
parseBullet
|
|
203
|
+
private parseBullet;
|
|
222
204
|
/**
|
|
223
205
|
* Parse a ordered list point item element
|
|
224
206
|
*/
|
|
225
|
-
parseOrdered
|
|
207
|
+
private parseOrdered;
|
|
226
208
|
/**
|
|
227
209
|
* Parse a paragraph element
|
|
228
210
|
*/
|
|
229
|
-
parseParagraph
|
|
230
|
-
parseDynamicHeader
|
|
211
|
+
private parseParagraph;
|
|
212
|
+
private parseDynamicHeader;
|
|
231
213
|
/** Sanitize the text content by removing specific characters */
|
|
232
|
-
sanitizeText
|
|
214
|
+
private sanitizeText;
|
|
233
215
|
/**
|
|
234
216
|
* Parse a figure (image) title element
|
|
235
217
|
*/
|
|
236
|
-
parseFigureTitle
|
|
218
|
+
private parseFigureTitle;
|
|
237
219
|
/**
|
|
238
220
|
* Parse a ChangeBarPara element
|
|
239
221
|
*/
|
|
240
|
-
parseChangeBarPara
|
|
222
|
+
private parseChangeBarPara;
|
|
241
223
|
/**
|
|
242
224
|
* Parse a table title element
|
|
243
225
|
*/
|
|
244
|
-
parseTableTitle
|
|
226
|
+
private parseTableTitle;
|
|
245
227
|
/**
|
|
246
228
|
* Parse an unknown element. Currently does nothing besides printing a warning to the console.
|
|
247
229
|
*/
|
|
248
|
-
parseUnknownElement
|
|
230
|
+
private parseUnknownElement;
|
|
249
231
|
/**
|
|
250
232
|
* Parse a section title element
|
|
251
233
|
*/
|
|
252
|
-
parseSectionTitle
|
|
234
|
+
private parseSectionTitle;
|
|
253
235
|
/**
|
|
254
236
|
* Parses an `Element` as determined by its `className`
|
|
255
237
|
*
|
|
256
238
|
* @param element - The `Element` to be parsed
|
|
257
239
|
*/
|
|
258
|
-
parseElement
|
|
259
|
-
parseElement_doc
|
|
240
|
+
private parseElement;
|
|
241
|
+
private parseElement_doc;
|
|
260
242
|
/**
|
|
261
243
|
* Cleans up the HTML by calling certain helper methods
|
|
262
244
|
*/
|
|
263
|
-
sanitizeHTML
|
|
264
|
-
/**
|
|
265
|
-
* Replaces characters in the HTML as defined by the `replacementChars` parameter in the config
|
|
266
|
-
*/
|
|
267
|
-
replaceUnwantedChars(html: string): string;
|
|
245
|
+
private sanitizeHTML;
|
|
268
246
|
/**
|
|
269
247
|
* Replaces keywords in the HTML with links, as defined by the `replaceWithLinks` parameter in the config
|
|
270
248
|
*/
|
|
271
|
-
replaceKeywordsWithLinks
|
|
272
|
-
matchClassToExcludeNumber
|
|
273
|
-
sanitizeElement
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
isTransparentTable(element: Element): boolean;
|
|
249
|
+
private replaceKeywordsWithLinks;
|
|
250
|
+
private matchClassToExcludeNumber;
|
|
251
|
+
private sanitizeElement;
|
|
252
|
+
private getScaledWidth;
|
|
253
|
+
private isTransparentTable;
|
|
277
254
|
/**
|
|
278
255
|
* Extracts and calculates the column widths from a given HTML table element.
|
|
279
256
|
*
|
|
@@ -286,24 +263,24 @@ export declare class LicitConverter {
|
|
|
286
263
|
* @param {HTMLTableElement} table - The HTML table element from which column widths are to be extracted.
|
|
287
264
|
* @returns {number[] | undefined} An array of column widths in pixels, or `undefined` if the widths are invalid or missing.
|
|
288
265
|
*/
|
|
289
|
-
getColWidthArray
|
|
290
|
-
setCellWidth
|
|
291
|
-
scaleWidthArray
|
|
292
|
-
getSumOfArray
|
|
266
|
+
private getColWidthArray;
|
|
267
|
+
private setCellWidth;
|
|
268
|
+
private scaleWidthArray;
|
|
269
|
+
private getSumOfArray;
|
|
293
270
|
/**
|
|
294
271
|
* Determines the orientation (portrait or landscape) based on the total width.
|
|
295
272
|
*
|
|
296
273
|
* @param {number} totalWidth - The total width (in pixels) used to determine orientation.
|
|
297
274
|
* @returns {'portrait' | 'landscape'} Returns 'portrait' if the width is less than 700 pixels; otherwise, returns 'landscape'.
|
|
298
275
|
*/
|
|
299
|
-
findOrientation
|
|
276
|
+
private findOrientation;
|
|
300
277
|
/**
|
|
301
278
|
* Extracts image information from an HTMLImageElement.
|
|
302
279
|
*
|
|
303
280
|
* @param {HTMLImageElement} img - The image element to extract information from.
|
|
304
281
|
* @returns {{ src: string; alt: string; width: number; height: number }} An object containing the image's source URL, alt text, width, and height.
|
|
305
282
|
*/
|
|
306
|
-
extractImageInfo
|
|
283
|
+
private extractImageInfo;
|
|
307
284
|
/**
|
|
308
285
|
* Extracts note paragraphs from the last row of an HTML table if that row
|
|
309
286
|
* contains a note header such as "OVERALL NOTE:" or "NOTES:".
|
|
@@ -345,7 +322,7 @@ export declare class LicitConverter {
|
|
|
345
322
|
* @param {Element} node - The DOM element to check.
|
|
346
323
|
* @returns {boolean} `true` if the element qualifies as a table figure, otherwise `false`.
|
|
347
324
|
*/
|
|
348
|
-
isTableFigureNode
|
|
325
|
+
private isTableFigureNode;
|
|
349
326
|
/**
|
|
350
327
|
* Determines whether the provided class name corresponds to a note-related node.
|
|
351
328
|
*
|
package/licit-transform.js
CHANGED
|
@@ -260,14 +260,6 @@ export class LicitConverter {
|
|
|
260
260
|
}
|
|
261
261
|
return renderedArr;
|
|
262
262
|
}
|
|
263
|
-
/**
|
|
264
|
-
* Returns a map elements which were parsed.
|
|
265
|
-
*
|
|
266
|
-
* @returns Map of elements
|
|
267
|
-
*/
|
|
268
|
-
getElementsParsedMap() {
|
|
269
|
-
return this.elementsParsedMap;
|
|
270
|
-
}
|
|
271
263
|
getCustomStyle(styleName) {
|
|
272
264
|
return this.config.customStyles?.find((s) => s.styleName === styleName);
|
|
273
265
|
}
|
|
@@ -467,7 +459,7 @@ export class LicitConverter {
|
|
|
467
459
|
}
|
|
468
460
|
const childNode = children[j];
|
|
469
461
|
let nextChildNode = children[j + 1];
|
|
470
|
-
//
|
|
462
|
+
// Handling paragraph combining logic for the case where
|
|
471
463
|
// heading is inside <OL>/<UL> and content is outside
|
|
472
464
|
if (!nextChildNode &&
|
|
473
465
|
(node.tagName === 'OL' || node.tagName === 'UL') &&
|
|
@@ -1894,16 +1886,6 @@ export class LicitConverter {
|
|
|
1894
1886
|
sanitizeHTML(html) {
|
|
1895
1887
|
return this.replaceKeywordsWithLinks(html);
|
|
1896
1888
|
}
|
|
1897
|
-
/**
|
|
1898
|
-
* Replaces characters in the HTML as defined by the `replacementChars` parameter in the config
|
|
1899
|
-
*/
|
|
1900
|
-
replaceUnwantedChars(html) {
|
|
1901
|
-
const chars = this.config.replacementChars;
|
|
1902
|
-
for (const char of chars) {
|
|
1903
|
-
html = html.replace(char.find, char.replace);
|
|
1904
|
-
}
|
|
1905
|
-
return html;
|
|
1906
|
-
}
|
|
1907
1889
|
/**
|
|
1908
1890
|
* Replaces keywords in the HTML with links, as defined by the `replaceWithLinks` parameter in the config
|
|
1909
1891
|
*/
|
|
@@ -1916,7 +1898,7 @@ export class LicitConverter {
|
|
|
1916
1898
|
}
|
|
1917
1899
|
return html;
|
|
1918
1900
|
}
|
|
1919
|
-
//
|
|
1901
|
+
// For skipping triming inside table, add more classes to the class list for future use
|
|
1920
1902
|
matchClassToExcludeNumber(className) {
|
|
1921
1903
|
let trimmedClassName = className.trim();
|
|
1922
1904
|
trimmedClassName = trimmedClassName.toLowerCase();
|
|
@@ -1950,14 +1932,6 @@ export class LicitConverter {
|
|
|
1950
1932
|
};
|
|
1951
1933
|
stripTextContent(element);
|
|
1952
1934
|
}
|
|
1953
|
-
removeLastNumber(inputString) {
|
|
1954
|
-
let lastNonDigitIndex = inputString.length - 1;
|
|
1955
|
-
while (lastNonDigitIndex >= 0 &&
|
|
1956
|
-
!Number.isNaN(Number.parseInt(inputString[lastNonDigitIndex]))) {
|
|
1957
|
-
lastNonDigitIndex--;
|
|
1958
|
-
}
|
|
1959
|
-
return inputString.slice(0, lastNonDigitIndex + 1);
|
|
1960
|
-
}
|
|
1961
1935
|
getScaledWidth(width) {
|
|
1962
1936
|
if (width <= 200) {
|
|
1963
1937
|
return width.toString();
|
package/package.json
CHANGED
|
@@ -1,52 +1,52 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@modusoperandi/licit-import-utils",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"license": "MIT",
|
|
5
|
-
"type": "module",
|
|
6
|
-
"subversion": "1",
|
|
7
|
-
"description": "A utility package for importing files like json or docx into Licit compatible documents",
|
|
8
|
-
"main": "index.js",
|
|
9
|
-
"types": "index.d.ts",
|
|
10
|
-
"repository": {
|
|
11
|
-
"type": "git",
|
|
12
|
-
"url": "git+https://github.com/MO-Movia/licit-import-utils.git"
|
|
13
|
-
},
|
|
14
|
-
"scripts": {
|
|
15
|
-
"test": "jest",
|
|
16
|
-
"test:unit": "jest",
|
|
17
|
-
"test:coverage": "jest --env=jsdom --coverage",
|
|
18
|
-
"build:clean": "rm -rf dist/ && rm -f modusoperandi-*.*.*.tgz",
|
|
19
|
-
"lint": "eslint src",
|
|
20
|
-
"ci:build": "tsc -b tsconfig.prod.json --clean && tsc -b tsconfig.prod.json && npx copyfiles@2.4.1 package.json LICENSE dist",
|
|
21
|
-
"ci:bom": "npx @cyclonedx/cyclonedx-npm --ignore-npm-errors --short-PURLs --output-format XML --output-file dist/bom.xml",
|
|
22
|
-
"verify": "npm run lint -- --fix && npm run ci:build && npm run test:coverage && echo 'All Tests Passed!'"
|
|
23
|
-
},
|
|
24
|
-
"peerDependencies": {
|
|
25
|
-
"@modusoperandi/mammoth": "^1.7.0-6",
|
|
26
|
-
"jszip": "^3.10.1"
|
|
27
|
-
},
|
|
28
|
-
"peerDependenciesMeta": {
|
|
29
|
-
"@modusoperandi/mammoth": {
|
|
30
|
-
"optional": true
|
|
31
|
-
},
|
|
32
|
-
"jszip": {
|
|
33
|
-
"optional": true
|
|
34
|
-
}
|
|
35
|
-
},
|
|
36
|
-
"dependencies": {
|
|
37
|
-
"uuid": "^13.0.0"
|
|
38
|
-
},
|
|
39
|
-
"devDependencies": {
|
|
40
|
-
"@modusoperandi/mammoth": "^1.7.0-6",
|
|
41
|
-
"@modusoperandi/eslint-config": "^3.0.3",
|
|
42
|
-
"@types/jest": "^30.0.0",
|
|
43
|
-
"jszip": "^3.10.1",
|
|
44
|
-
"eslint": "^9.39.2",
|
|
45
|
-
"jest": "^30.2.0",
|
|
46
|
-
"jest-environment-jsdom": "^30.2.0",
|
|
47
|
-
"jest-junit": "^16.0.0",
|
|
48
|
-
"ts-jest": "^29.4.6",
|
|
49
|
-
"ts-node": "^10.9.2",
|
|
50
|
-
"typescript": "^5.9.3"
|
|
51
|
-
}
|
|
52
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "@modusoperandi/licit-import-utils",
|
|
3
|
+
"version": "0.1.1",
|
|
4
|
+
"license": "MIT",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"subversion": "1",
|
|
7
|
+
"description": "A utility package for importing files like json or docx into Licit compatible documents",
|
|
8
|
+
"main": "index.js",
|
|
9
|
+
"types": "index.d.ts",
|
|
10
|
+
"repository": {
|
|
11
|
+
"type": "git",
|
|
12
|
+
"url": "git+https://github.com/MO-Movia/licit-import-utils.git"
|
|
13
|
+
},
|
|
14
|
+
"scripts": {
|
|
15
|
+
"test": "jest",
|
|
16
|
+
"test:unit": "jest",
|
|
17
|
+
"test:coverage": "jest --env=jsdom --coverage",
|
|
18
|
+
"build:clean": "rm -rf dist/ && rm -f modusoperandi-*.*.*.tgz",
|
|
19
|
+
"lint": "eslint src",
|
|
20
|
+
"ci:build": "tsc -b tsconfig.prod.json --clean && tsc -b tsconfig.prod.json && npx copyfiles@2.4.1 package.json LICENSE README.md dist",
|
|
21
|
+
"ci:bom": "npx @cyclonedx/cyclonedx-npm --ignore-npm-errors --short-PURLs --output-format XML --output-file dist/bom.xml",
|
|
22
|
+
"verify": "npm run lint -- --fix && npm run ci:build && npm run test:coverage && echo 'All Tests Passed!'"
|
|
23
|
+
},
|
|
24
|
+
"peerDependencies": {
|
|
25
|
+
"@modusoperandi/mammoth": "^1.7.0-6",
|
|
26
|
+
"jszip": "^3.10.1"
|
|
27
|
+
},
|
|
28
|
+
"peerDependenciesMeta": {
|
|
29
|
+
"@modusoperandi/mammoth": {
|
|
30
|
+
"optional": true
|
|
31
|
+
},
|
|
32
|
+
"jszip": {
|
|
33
|
+
"optional": true
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
"dependencies": {
|
|
37
|
+
"uuid": "^13.0.0"
|
|
38
|
+
},
|
|
39
|
+
"devDependencies": {
|
|
40
|
+
"@modusoperandi/mammoth": "^1.7.0-6",
|
|
41
|
+
"@modusoperandi/eslint-config": "^3.0.3",
|
|
42
|
+
"@types/jest": "^30.0.0",
|
|
43
|
+
"jszip": "^3.10.1",
|
|
44
|
+
"eslint": "^9.39.2",
|
|
45
|
+
"jest": "^30.2.0",
|
|
46
|
+
"jest-environment-jsdom": "^30.2.0",
|
|
47
|
+
"jest-junit": "^16.0.0",
|
|
48
|
+
"ts-jest": "^29.4.6",
|
|
49
|
+
"ts-node": "^10.9.2",
|
|
50
|
+
"typescript": "^5.9.3"
|
|
51
|
+
}
|
|
52
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license MIT
|
|
3
|
+
* @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
|
|
4
|
+
*/
|
|
5
|
+
import type { Message } from './types';
|
|
6
|
+
export declare function extractStylesForDoc(arrayBuffer: ArrayBuffer, docType: string): Promise<{
|
|
7
|
+
styles: string[];
|
|
8
|
+
}>;
|
|
9
|
+
export declare function extractUniqueStyleIds(data: Message[]): string[];
|
|
10
|
+
export declare function extractStylesForJSON(arrayBuffer: ArrayBuffer): Promise<{
|
|
11
|
+
content: string;
|
|
12
|
+
styles: string[];
|
|
13
|
+
}>;
|
|
14
|
+
export declare function collectStyles(obj: unknown, styles?: string[]): string[];
|
|
15
|
+
export declare function processHTML(arrayBuffer: ArrayBuffer): Promise<{
|
|
16
|
+
styles: string[];
|
|
17
|
+
}>;
|
|
18
|
+
export declare function extractStylesFromZip(zipFile: File): Promise<{
|
|
19
|
+
styles: string[];
|
|
20
|
+
}>;
|
|
21
|
+
export declare function arrayBufferToString(arrayBuffer: ArrayBuffer): string;
|
|
22
|
+
export declare function extractStyleNamesFromHTML(doc: Document): string[];
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license MIT
|
|
3
|
+
* @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
|
|
4
|
+
*/
|
|
5
|
+
import JSZip from 'jszip';
|
|
6
|
+
import { DocxTransformer } from './transform.docx';
|
|
7
|
+
export async function extractStylesForDoc(arrayBuffer, docType) {
|
|
8
|
+
const messages = [];
|
|
9
|
+
// Convert the ArrayBuffer to HTML using Mammoth.js
|
|
10
|
+
await new DocxTransformer(docType, (type, message) => messages.push({ type, message })).transform(arrayBuffer);
|
|
11
|
+
// Extract styles from the HTML (adapt as needed for your styling approach)
|
|
12
|
+
const styles = extractUniqueStyleIds(messages);
|
|
13
|
+
return { styles };
|
|
14
|
+
}
|
|
15
|
+
export function extractUniqueStyleIds(data) {
|
|
16
|
+
const styleIds = [];
|
|
17
|
+
data ??= [];
|
|
18
|
+
for (const item of data) {
|
|
19
|
+
const match = new RegExp(/Style ID: (.{0,100}?)(?=\))/).exec(item.message);
|
|
20
|
+
const styleId = match?.[1];
|
|
21
|
+
if (styleId && !styleIds.includes(styleId)) {
|
|
22
|
+
styleIds.push(styleId);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return styleIds;
|
|
26
|
+
}
|
|
27
|
+
export function extractStylesForJSON(arrayBuffer) {
|
|
28
|
+
const decoder = new TextDecoder('utf-8');
|
|
29
|
+
const content = decoder.decode(arrayBuffer);
|
|
30
|
+
const jsonObject = JSON.parse(content);
|
|
31
|
+
if (typeof jsonObject !== 'object' || jsonObject === null) {
|
|
32
|
+
throw new Error('Invalid JSON document');
|
|
33
|
+
}
|
|
34
|
+
const styles = [];
|
|
35
|
+
collectStyles(jsonObject, styles);
|
|
36
|
+
return Promise.resolve({ content, styles });
|
|
37
|
+
}
|
|
38
|
+
// Preprocessor to handle the JSON formatted documents
|
|
39
|
+
export function collectStyles(obj, styles = []) {
|
|
40
|
+
if (typeof obj !== 'object' || obj === null) {
|
|
41
|
+
return styles;
|
|
42
|
+
}
|
|
43
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
44
|
+
if (typeof value === 'object' && value !== null) {
|
|
45
|
+
// Recursively traverse nested objects
|
|
46
|
+
collectStyles(value, styles);
|
|
47
|
+
}
|
|
48
|
+
else if (key === 'styleName' &&
|
|
49
|
+
typeof value === 'string' &&
|
|
50
|
+
!styles.includes(value)) {
|
|
51
|
+
// Add the style name to the list if it's not already included
|
|
52
|
+
styles.push(value);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return styles;
|
|
56
|
+
}
|
|
57
|
+
export function processHTML(arrayBuffer) {
|
|
58
|
+
return new Promise((resolve) => {
|
|
59
|
+
const content = arrayBufferToString(arrayBuffer);
|
|
60
|
+
// Use DOMParser to parse HTML content
|
|
61
|
+
const parser = new DOMParser();
|
|
62
|
+
const doc = parser.parseFromString(content, 'text/html');
|
|
63
|
+
// Extract style names using regular expressions
|
|
64
|
+
const styleNames = extractStyleNamesFromHTML(doc);
|
|
65
|
+
resolve({ styles: styleNames });
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
export async function extractStylesFromZip(zipFile) {
|
|
69
|
+
const MAX_FILES = 10000;
|
|
70
|
+
const MAX_SIZE = 1073741824; // 1 GB
|
|
71
|
+
if (zipFile.size > MAX_SIZE &&
|
|
72
|
+
!confirm(`zip is ${zipFile.size / MAX_SIZE} GB. continue?`)) {
|
|
73
|
+
throw new Error('Size of the file is more than the limit 25 mb');
|
|
74
|
+
}
|
|
75
|
+
const loadedZip = await JSZip.loadAsync(zipFile); //NOSONAR size validated. Safe to extract.
|
|
76
|
+
// Check if the total number of files exceeds the limit
|
|
77
|
+
const totalFiles = Object.keys(loadedZip.files).length;
|
|
78
|
+
if (totalFiles > MAX_FILES &&
|
|
79
|
+
!confirm(`zip contains an excessive ${totalFiles} files. continue?`)) {
|
|
80
|
+
throw new Error(`Number of files in the zip (${totalFiles}) exceeds the limit (${MAX_FILES})`);
|
|
81
|
+
}
|
|
82
|
+
const htmlFiles = Object.keys(loadedZip.files).filter((fileName) => fileName.endsWith('.htm'));
|
|
83
|
+
let combinedStyles = [];
|
|
84
|
+
for (const fileName of htmlFiles) {
|
|
85
|
+
const arrayBuffer = await loadedZip.files[fileName].async('arraybuffer');
|
|
86
|
+
const { styles } = await processHTML(arrayBuffer);
|
|
87
|
+
// Combine styles
|
|
88
|
+
combinedStyles = [...new Set([...combinedStyles, ...styles])];
|
|
89
|
+
}
|
|
90
|
+
return { styles: combinedStyles };
|
|
91
|
+
}
|
|
92
|
+
export function arrayBufferToString(arrayBuffer) {
|
|
93
|
+
return new TextDecoder().decode(new Uint8Array(arrayBuffer));
|
|
94
|
+
}
|
|
95
|
+
export function extractStyleNamesFromHTML(doc) {
|
|
96
|
+
const styleNames = [];
|
|
97
|
+
// Extract class names from HTML elements and add to style names
|
|
98
|
+
const elementsWithClass = doc.querySelectorAll('[class]');
|
|
99
|
+
for (const element of Array.from(elementsWithClass)) {
|
|
100
|
+
const classes = element.className.split(/\s{1,100}/); // Split by whitespace
|
|
101
|
+
styleNames.push(...classes);
|
|
102
|
+
}
|
|
103
|
+
// Return unique style names
|
|
104
|
+
return [...new Set(styleNames)];
|
|
105
|
+
}
|
package/transform.zip.js
CHANGED
|
@@ -154,25 +154,32 @@ async function loopHTMLFiles(htmlFiles, updateSrc) {
|
|
|
154
154
|
const processedHtmlContents = (await Promise.all(htmlFiles.files
|
|
155
155
|
.filter((htmlFile) => !!htmlFile)
|
|
156
156
|
.map((f) => processFile(f, htmlFiles.imageFiles, updateSrc)))).filter((x) => x?.length);
|
|
157
|
+
if (processedHtmlContents.length === 0 && htmlFiles.files.length > 0) {
|
|
158
|
+
throw new Error(`File contents are empty`);
|
|
159
|
+
}
|
|
157
160
|
return sortedNodeList(processedHtmlContents);
|
|
158
161
|
}
|
|
159
162
|
async function processFile(file, imageFiles, updateSrc) {
|
|
160
163
|
const htmlContent = await file.content();
|
|
161
164
|
const htmlFileName = file.name ?? 'Unknown file';
|
|
165
|
+
// Reject files with zero bytes
|
|
166
|
+
if (!htmlContent?.length) {
|
|
167
|
+
throw new Error(`File ${htmlFileName} has zero bytes`);
|
|
168
|
+
}
|
|
162
169
|
// Get content before <head> (first 1000 chars should be enough)
|
|
163
170
|
const beforeHead = htmlContent.substring(0, 1000);
|
|
164
|
-
//
|
|
171
|
+
// Reject old DOCTYPE declarations
|
|
165
172
|
if (beforeHead.includes('<!DOCTYPE HTML PUBLIC')) {
|
|
166
|
-
throw new Error(`Incorrect file format: ${htmlFileName}`);
|
|
173
|
+
throw new Error(`Incorrect file format (was "!DOCTYPE HTML PUBLIC"): ${htmlFileName}`);
|
|
167
174
|
}
|
|
168
|
-
//
|
|
175
|
+
// Reject XML declarations (XHTML format)
|
|
169
176
|
if (beforeHead.trimStart().startsWith('<?xml')) {
|
|
170
|
-
throw new Error(`Incorrect file format: ${htmlFileName}`);
|
|
177
|
+
throw new Error(`Incorrect file format (was "XHTML"): ${htmlFileName}`);
|
|
171
178
|
}
|
|
172
|
-
//
|
|
179
|
+
// Must have <html lang="...">
|
|
173
180
|
// Option A: Exact match for en-US
|
|
174
181
|
if (!beforeHead.includes('<html lang="en-US">')) {
|
|
175
|
-
throw new Error(`Incorrect file format: ${htmlFileName}`);
|
|
182
|
+
throw new Error(`Incorrect file format (missing "<html lang=..."): ${htmlFileName}`);
|
|
176
183
|
}
|
|
177
184
|
const domCollection = new DOMParser().parseFromString(htmlContent, 'text/html');
|
|
178
185
|
//Get the title text
|