@modusoperandi/licit-import-utils 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/capco.util.d.ts +38 -0
- package/capco.util.js +195 -0
- package/index.d.ts +8 -0
- package/index.js +8 -0
- package/licit-elements.d.ts +878 -0
- package/licit-elements.js +2588 -0
- package/licit-transform.d.ts +360 -0
- package/licit-transform.js +2197 -0
- package/package.json +52 -0
- package/transform.docx.d.ts +16 -0
- package/transform.docx.js +154 -0
- package/transform.utils.d.ts +17 -0
- package/transform.utils.js +155 -0
- package/transform.zip.d.ts +5 -0
- package/transform.zip.js +296 -0
- package/types.d.ts +9 -0
- package/types.js +5 -0
- package/zip.utils.d.ts +6 -0
- package/zip.utils.js +23 -0
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license MIT
|
|
3
|
+
* @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
|
|
4
|
+
*/
|
|
5
|
+
import type { LicitDocumentJSON } from './licit-elements';
|
|
6
|
+
import { LicitBulletListElement, LicitDocumentElement, LicitEnhancedImageElement, LicitTableRowElement } from './licit-elements';
|
|
7
|
+
import type { UpdatedCapco } from './capco.util';
|
|
8
|
+
import type { MessageSink } from './types';
|
|
9
|
+
export interface ParserElement {
|
|
10
|
+
node: Element;
|
|
11
|
+
class: string;
|
|
12
|
+
type: ParserElementType;
|
|
13
|
+
level: number;
|
|
14
|
+
subText: string;
|
|
15
|
+
}
|
|
16
|
+
interface ImageInfo {
|
|
17
|
+
src: string;
|
|
18
|
+
alt: string;
|
|
19
|
+
width: number;
|
|
20
|
+
height: number;
|
|
21
|
+
}
|
|
22
|
+
declare enum ParserElementType {
|
|
23
|
+
ChapterTitle = 0,
|
|
24
|
+
ChapterSubtitle = 1,
|
|
25
|
+
ChapterFigureTitle = 2,
|
|
26
|
+
Header = 3,
|
|
27
|
+
Note = 4,
|
|
28
|
+
Paragraph = 5,
|
|
29
|
+
SectionTitle = 6,
|
|
30
|
+
TableTitle = 7,
|
|
31
|
+
FigureTitle = 8,
|
|
32
|
+
BulletListItem = 9,
|
|
33
|
+
OrderedListItem = 10,
|
|
34
|
+
Table = 11,
|
|
35
|
+
EnhancedTable = 12,
|
|
36
|
+
Figure = 13,
|
|
37
|
+
ChangeBarPara = 14,
|
|
38
|
+
hr = 15,
|
|
39
|
+
vignet = 16,
|
|
40
|
+
Uncategorized = 17,
|
|
41
|
+
infoIcon = 18,
|
|
42
|
+
NewFigureTitle = 19
|
|
43
|
+
}
|
|
44
|
+
export interface StyleInfo {
|
|
45
|
+
styleName: string;
|
|
46
|
+
styles?: {
|
|
47
|
+
styleLevel?: number | string;
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
export interface TransformConfig {
|
|
51
|
+
customStylesUrl: string;
|
|
52
|
+
replacementChars: {
|
|
53
|
+
find: string;
|
|
54
|
+
replace: string;
|
|
55
|
+
}[];
|
|
56
|
+
replaceCharacters: boolean;
|
|
57
|
+
stripSectionNumbers: boolean;
|
|
58
|
+
replaceWithLinks: {
|
|
59
|
+
find: string;
|
|
60
|
+
href: string;
|
|
61
|
+
}[];
|
|
62
|
+
messageSink?: MessageSink;
|
|
63
|
+
customStyles: StyleInfo[];
|
|
64
|
+
}
|
|
65
|
+
export declare const DEFAULT_Config: TransformConfig;
|
|
66
|
+
export declare function asTransformConfig(config?: Partial<TransformConfig>): {
|
|
67
|
+
customStylesUrl: string;
|
|
68
|
+
replacementChars: {
|
|
69
|
+
find: string;
|
|
70
|
+
replace: string;
|
|
71
|
+
}[];
|
|
72
|
+
replaceCharacters: boolean;
|
|
73
|
+
stripSectionNumbers: boolean;
|
|
74
|
+
replaceWithLinks: {
|
|
75
|
+
find: string;
|
|
76
|
+
href: string;
|
|
77
|
+
}[];
|
|
78
|
+
messageSink?: MessageSink;
|
|
79
|
+
customStyles: StyleInfo[];
|
|
80
|
+
};
|
|
81
|
+
export interface AddCellOptions {
|
|
82
|
+
bgColor: string;
|
|
83
|
+
isChapterHeader: boolean;
|
|
84
|
+
verAlign: string;
|
|
85
|
+
cellIndex: number;
|
|
86
|
+
widthArray: number[];
|
|
87
|
+
isTransparent: boolean;
|
|
88
|
+
}
|
|
89
|
+
export declare class LicitConverter {
|
|
90
|
+
private readonly config;
|
|
91
|
+
elementsParsedMap: Map<string, boolean>;
|
|
92
|
+
elements: ParserElement[];
|
|
93
|
+
constructor(config: TransformConfig);
|
|
94
|
+
parseHTML(html: Document, isDoctorine: boolean, moDocType?: string): LicitDocumentJSON;
|
|
95
|
+
parseFrameMakerHTML5(html: Element[]): LicitDocumentJSON;
|
|
96
|
+
render_FrameMakerHTML5_zip(nodes: NodeList, infoIconData?: HTMLOListElement[], _moDocType?: string, renderedContentList?: Node[]): LicitDocumentJSON;
|
|
97
|
+
render_FrameMakerHTML5_zip_SwitchHelper(e: ParserElement, infoIconData: HTMLOListElement[], renderedContentList: Node[], isNumberReseted: boolean, licitDocument: LicitDocumentElement): boolean;
|
|
98
|
+
private handleNodes;
|
|
99
|
+
fetchRenderedContent(nodes: NodeList): Node[];
|
|
100
|
+
/**
|
|
101
|
+
* Returns a map elements which were parsed.
|
|
102
|
+
*
|
|
103
|
+
* @returns Map of elements
|
|
104
|
+
*/
|
|
105
|
+
getElementsParsedMap(): Map<string, boolean>;
|
|
106
|
+
getCustomStyle(styleName: string): StyleInfo | undefined;
|
|
107
|
+
handleOrderedListItem(e: ParserElement, licitDocument: LicitDocumentElement): void;
|
|
108
|
+
/**
|
|
109
|
+
* Renders the HTML as a Licit JSON structure
|
|
110
|
+
*
|
|
111
|
+
* @returns The document as an `LicitDocumentJSON` object
|
|
112
|
+
*/
|
|
113
|
+
render(nodes: NodeListOf<Element>): LicitDocumentJSON;
|
|
114
|
+
renderSwitchHelper(e: ParserElement, licitDocument: LicitDocumentElement): void;
|
|
115
|
+
private renderTable;
|
|
116
|
+
private renderParagraph;
|
|
117
|
+
private renderHeader;
|
|
118
|
+
private buildElements;
|
|
119
|
+
checkChildNode(node: HTMLElement | Element, nextNode: HTMLElement | Element): number;
|
|
120
|
+
render_doc(nodes: NodeListOf<Element>, infoIconData: HTMLOListElement[] | undefined, moDocType: string): LicitDocumentJSON;
|
|
121
|
+
render_docSwitchHelper(e: ParserElement, licitDocument: LicitDocumentElement, tocRemoved: boolean, infoIconData: HTMLOListElement[], moDocType: string): boolean;
|
|
122
|
+
renderTypeParagraph(e: ParserElement, licitDocument: LicitDocumentElement, infoIconData?: HTMLOListElement[]): void;
|
|
123
|
+
handle_UrlText(text: string, licitDocument: LicitDocumentElement, infoIconData?: HTMLOListElement[]): void;
|
|
124
|
+
text_WithoutUrl(n: Node, licitDocument: LicitDocumentElement, infoIconData?: HTMLOListElement[]): void;
|
|
125
|
+
private handleNode;
|
|
126
|
+
mergeSpans(node: Element, nextNode: Element): number;
|
|
127
|
+
updateChildCapcoContent(e: ParserElement): void;
|
|
128
|
+
updateChildCapcoContentLoopHelper(childNodes: ChildNode[], res: UpdatedCapco): void;
|
|
129
|
+
processChildNodesCapco(childNodes: NodeListOf<ChildNode>): void;
|
|
130
|
+
updateCapcoToParagraph(child: ChildNode, res: UpdatedCapco): void;
|
|
131
|
+
processTableCapco(tableNode: HTMLTableElement): void;
|
|
132
|
+
figureTitleCase(e: ParserElement, licitDocument: LicitDocumentElement): void;
|
|
133
|
+
handleImageChild(child: Element, licitDocument: LicitDocumentElement): void;
|
|
134
|
+
renderNewFigureTitle(e: ParserElement, licitDocument: LicitDocumentElement): void;
|
|
135
|
+
figureParagraphCase(e: ParserElement, licitDocument: LicitDocumentElement, infoIconData: HTMLOListElement[] | undefined, renderedContentList: Node[]): void;
|
|
136
|
+
figureNoteCase(e: ParserElement, licitDocument: LicitDocumentElement): void;
|
|
137
|
+
figureTableTitleCase(e: ParserElement, licitDocument: LicitDocumentElement): void;
|
|
138
|
+
private renderDocVignet;
|
|
139
|
+
private parseUntypedDocVignet;
|
|
140
|
+
private parseTypedDocVignet;
|
|
141
|
+
parseTypedDocVignetHelper(val: string, bgColor: string, borderColor: string, boxWidth: number): {
|
|
142
|
+
bgColor: string;
|
|
143
|
+
borderColor: string;
|
|
144
|
+
boxWidth: number;
|
|
145
|
+
};
|
|
146
|
+
private renderDocTable;
|
|
147
|
+
private renderEnhancedTable;
|
|
148
|
+
private getLicitTable;
|
|
149
|
+
renderNewLicitImage(imageElement: HTMLImageElement, capco: string | null): LicitEnhancedImageElement;
|
|
150
|
+
renderDocBulletItems(e: ParserElement, licitDocument: LicitDocumentElement): void;
|
|
151
|
+
processBulletNodes(childNodes: Node[], bulletList: LicitBulletListElement, licitDocument: any, indent: number, e: any): void;
|
|
152
|
+
addElementLicit(licitDocument: any, bulletList: LicitBulletListElement): void;
|
|
153
|
+
removeEmptyATags(node: Node): void;
|
|
154
|
+
private handleULNode;
|
|
155
|
+
private renderDocFigure;
|
|
156
|
+
renderImage(imgElement: HTMLImageElement, licitDocument: LicitDocumentElement): void;
|
|
157
|
+
parseOL(e: ParserElement, licitDocument: LicitDocumentElement): void;
|
|
158
|
+
/**
|
|
159
|
+
* To parse table data
|
|
160
|
+
* @param e - element
|
|
161
|
+
* @param tableTag - The tag name or identifier of the table.
|
|
162
|
+
* @param querySel Selector for Querying from table row
|
|
163
|
+
* @param isChapterHeader flag to determine ChapterHeader
|
|
164
|
+
* @param licitTable -Licit Table Element
|
|
165
|
+
* @param widthArray - To scale the table to specific sizes
|
|
166
|
+
* @param isTransparent - flag to distinguish preface table
|
|
167
|
+
* @returns void
|
|
168
|
+
*/
|
|
169
|
+
parseTableContent(_e: any, tableTag: any, querySel: any, isChapterHeader: any, licitTable: any, widthArray: number[], isTransparent: boolean): void;
|
|
170
|
+
parseTableContentInnerLoopHelper(cells: any, _cellIndex: number, isChapterHeader: boolean, licitRow: LicitTableRowElement, widthArray: number[], isTransparent: boolean): void;
|
|
171
|
+
private addCell;
|
|
172
|
+
checkCellStyle(style: string | null): string | null;
|
|
173
|
+
private addTableImageCell;
|
|
174
|
+
ParseNestedList(_listType: string, node: ChildNode, licitDocument: LicitDocumentElement, indent: number): void;
|
|
175
|
+
/**
|
|
176
|
+
* Returns the level of an element as described by the number at the end of its classname
|
|
177
|
+
*
|
|
178
|
+
* @param className - The className of the element
|
|
179
|
+
* @returns The level as a number or zero if the level cannot be determined
|
|
180
|
+
*/
|
|
181
|
+
extractLevel(className: string): number;
|
|
182
|
+
/**
|
|
183
|
+
* Determines if an element is a table or image then calls the appropriate parse method
|
|
184
|
+
*/
|
|
185
|
+
parseTableFigure(element: Element): void;
|
|
186
|
+
/**
|
|
187
|
+
* Parse a table element
|
|
188
|
+
*/
|
|
189
|
+
parseTable(element: Element, useEnhancedTables: boolean): void;
|
|
190
|
+
/**
|
|
191
|
+
* Parse a table element
|
|
192
|
+
*/
|
|
193
|
+
parseVignet(element: Element): void;
|
|
194
|
+
/**
|
|
195
|
+
* Parse a figure (image) element
|
|
196
|
+
*/
|
|
197
|
+
parseFigure(element: Element): void;
|
|
198
|
+
/**
|
|
199
|
+
* Parse a note element
|
|
200
|
+
*/
|
|
201
|
+
parseNote(element: Element): void;
|
|
202
|
+
/**
|
|
203
|
+
* Parse a hr element
|
|
204
|
+
*/
|
|
205
|
+
parseHR(element: Element): void;
|
|
206
|
+
/**
|
|
207
|
+
* Parse a chapter title element
|
|
208
|
+
*/
|
|
209
|
+
parseChapterTitle(element: Element): void;
|
|
210
|
+
/**
|
|
211
|
+
* Parse a chapter subtitle element
|
|
212
|
+
*/
|
|
213
|
+
parseChapterSubtitle(element: Element): void;
|
|
214
|
+
/**
|
|
215
|
+
* Parse a header element
|
|
216
|
+
*/
|
|
217
|
+
parseHeader(element: Element, nextElement: Element): void;
|
|
218
|
+
/**
|
|
219
|
+
* Parse a bullet point item element
|
|
220
|
+
*/
|
|
221
|
+
parseBullet(element: Element): void;
|
|
222
|
+
/**
|
|
223
|
+
* Parse a ordered list point item element
|
|
224
|
+
*/
|
|
225
|
+
parseOrdered(element: Element): void;
|
|
226
|
+
/**
|
|
227
|
+
* Parse a paragraph element
|
|
228
|
+
*/
|
|
229
|
+
parseParagraph(element: Element): void;
|
|
230
|
+
parseDynamicHeader(element: Element): void;
|
|
231
|
+
/** Sanitize the text content by removing specific characters */
|
|
232
|
+
sanitizeText(element: Element): void;
|
|
233
|
+
/**
|
|
234
|
+
* Parse a figure (image) title element
|
|
235
|
+
*/
|
|
236
|
+
parseFigureTitle(element: Element): void;
|
|
237
|
+
/**
|
|
238
|
+
* Parse a ChangeBarPara element
|
|
239
|
+
*/
|
|
240
|
+
parseChangeBarPara(element: Element): void;
|
|
241
|
+
/**
|
|
242
|
+
* Parse a table title element
|
|
243
|
+
*/
|
|
244
|
+
parseTableTitle(element: Element): void;
|
|
245
|
+
/**
|
|
246
|
+
* Parse an unknown element. Currently does nothing besides printing a warning to the console.
|
|
247
|
+
*/
|
|
248
|
+
parseUnknownElement(element: Element, message: string): void;
|
|
249
|
+
/**
|
|
250
|
+
* Parse a section title element
|
|
251
|
+
*/
|
|
252
|
+
parseSectionTitle(element: Element): void;
|
|
253
|
+
/**
|
|
254
|
+
* Parses an `Element` as determined by its `className`
|
|
255
|
+
*
|
|
256
|
+
* @param element - The `Element` to be parsed
|
|
257
|
+
*/
|
|
258
|
+
parseElement(element: Element, nextElement: Element): void;
|
|
259
|
+
parseElement_doc(element: Element, nextElement: Element): void;
|
|
260
|
+
/**
|
|
261
|
+
* Cleans up the HTML by calling certain helper methods
|
|
262
|
+
*/
|
|
263
|
+
sanitizeHTML(html: string): string;
|
|
264
|
+
/**
|
|
265
|
+
* Replaces characters in the HTML as defined by the `replacementChars` parameter in the config
|
|
266
|
+
*/
|
|
267
|
+
replaceUnwantedChars(html: string): string;
|
|
268
|
+
/**
|
|
269
|
+
* Replaces keywords in the HTML with links, as defined by the `replaceWithLinks` parameter in the config
|
|
270
|
+
*/
|
|
271
|
+
replaceKeywordsWithLinks(html: string): string;
|
|
272
|
+
matchClassToExcludeNumber(className: string): boolean;
|
|
273
|
+
sanitizeElement(element: Element): void;
|
|
274
|
+
removeLastNumber(inputString: string): string;
|
|
275
|
+
getScaledWidth(width: number): string;
|
|
276
|
+
isTransparentTable(element: Element): boolean;
|
|
277
|
+
/**
|
|
278
|
+
* Extracts and calculates the column widths from a given HTML table element.
|
|
279
|
+
*
|
|
280
|
+
* This function reads `<col>` elements within a `<colgroup>` of the table and
|
|
281
|
+
* computes the pixel-based width for each column. It handles widths specified
|
|
282
|
+
* in percentages and pixels. If all widths are in pixels, they are scaled using
|
|
283
|
+
* a separate scaling method. If the computed widths are invalid or incomplete,
|
|
284
|
+
* the function returns `undefined`.
|
|
285
|
+
*
|
|
286
|
+
* @param {HTMLTableElement} table - The HTML table element from which column widths are to be extracted.
|
|
287
|
+
* @returns {number[] | undefined} An array of column widths in pixels, or `undefined` if the widths are invalid or missing.
|
|
288
|
+
*/
|
|
289
|
+
getColWidthArray(table: HTMLTableElement): number[] | undefined;
|
|
290
|
+
setCellWidth(colSpan: number, cellIndex: number, colWidthArray: number[]): number[];
|
|
291
|
+
scaleWidthArray(rawWidthArray: number[]): number[];
|
|
292
|
+
getSumOfArray(array: number[]): number;
|
|
293
|
+
/**
|
|
294
|
+
* Determines the orientation (portrait or landscape) based on the total width.
|
|
295
|
+
*
|
|
296
|
+
* @param {number} totalWidth - The total width (in pixels) used to determine orientation.
|
|
297
|
+
* @returns {'portrait' | 'landscape'} Returns 'portrait' if the width is less than 700 pixels; otherwise, returns 'landscape'.
|
|
298
|
+
*/
|
|
299
|
+
findOrientation(totalWidth: number): 'portrait' | 'landscape';
|
|
300
|
+
/**
|
|
301
|
+
* Extracts image information from an HTMLImageElement.
|
|
302
|
+
*
|
|
303
|
+
* @param {HTMLImageElement} img - The image element to extract information from.
|
|
304
|
+
* @returns {{ src: string; alt: string; width: number; height: number }} An object containing the image's source URL, alt text, width, and height.
|
|
305
|
+
*/
|
|
306
|
+
extractImageInfo(img: HTMLImageElement): ImageInfo;
|
|
307
|
+
/**
|
|
308
|
+
* Extracts note paragraphs from the last row of an HTML table if that row
|
|
309
|
+
* contains a note header such as "OVERALL NOTE:" or "NOTES:".
|
|
310
|
+
*
|
|
311
|
+
* This function is designed for tables where the final row may optionally
|
|
312
|
+
* contain a note. If such a note exists, it returns all <p> elements inside
|
|
313
|
+
* the first <td> of that row, excluding the header line itself
|
|
314
|
+
* (e.g., "OVERALL NOTE:" / "NOTES:").
|
|
315
|
+
*
|
|
316
|
+
* The returned <p> elements are kept as HTMLElement nodes so that they can
|
|
317
|
+
* be further converted into structured ProseMirror content
|
|
318
|
+
* (e.g., using NewLicitParagraphElement).
|
|
319
|
+
*
|
|
320
|
+
* If the table doesn't contain a note row, or if the expected structure is missing,
|
|
321
|
+
* the function safely returns null.
|
|
322
|
+
*
|
|
323
|
+
* @param {HTMLTableSectionElement} table - The HTML table section (tbody) to extract the note from.
|
|
324
|
+
* @returns {HTMLElement[] | null} - An array of <p> nodes representing the note paragraphs,
|
|
325
|
+
* or null if no note row was found.
|
|
326
|
+
*/
|
|
327
|
+
private extractNote;
|
|
328
|
+
/**
|
|
329
|
+
* Determines whether the given DOM element should be treated as a "table figure".
|
|
330
|
+
*
|
|
331
|
+
* Business context:
|
|
332
|
+
* As per mail send on 07 Aug 2025:
|
|
333
|
+
* > "Can we sense when there is an image and a line or two of text – maybe remove the vignette control."
|
|
334
|
+
*
|
|
335
|
+
* This function implements that detection logic by identifying elements that match
|
|
336
|
+
* either of the following patterns:
|
|
337
|
+
*
|
|
338
|
+
* 1. It is **not** a <DIV> element, and its first child element is an <IMG>.
|
|
339
|
+
* 2. It is a <DIV> element that:
|
|
340
|
+
* - Contains at least one <IMG> element anywhere inside (at any depth),
|
|
341
|
+
* - Contains exactly one <P> element anywhere inside,
|
|
342
|
+
* - That <P> element's trimmed text content is less than 100 characters
|
|
343
|
+
* (representing "a line or two of text").
|
|
344
|
+
*
|
|
345
|
+
* @param {Element} node - The DOM element to check.
|
|
346
|
+
* @returns {boolean} `true` if the element qualifies as a table figure, otherwise `false`.
|
|
347
|
+
*/
|
|
348
|
+
isTableFigureNode(node: Element): boolean;
|
|
349
|
+
/**
|
|
350
|
+
* Determines whether the provided class name corresponds to a note-related node.
|
|
351
|
+
*
|
|
352
|
+
* Checks if the given `className` matches any of the predefined note classes,
|
|
353
|
+
* such as examples, notes, cautions, or warnings.
|
|
354
|
+
*
|
|
355
|
+
* @param className - The CSS class name to check.
|
|
356
|
+
* @returns `true` if the class name is a recognized note node; otherwise, `false`.
|
|
357
|
+
*/
|
|
358
|
+
private isNoteNode;
|
|
359
|
+
}
|
|
360
|
+
export {};
|