@modusoperandi/licit-import-utils 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,360 @@
1
+ /**
2
+ * @license MIT
3
+ * @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
4
+ */
5
+ import type { LicitDocumentJSON } from './licit-elements';
6
+ import { LicitBulletListElement, LicitDocumentElement, LicitEnhancedImageElement, LicitTableRowElement } from './licit-elements';
7
+ import type { UpdatedCapco } from './capco.util';
8
+ import type { MessageSink } from './types';
9
+ export interface ParserElement {
10
+ node: Element;
11
+ class: string;
12
+ type: ParserElementType;
13
+ level: number;
14
+ subText: string;
15
+ }
16
+ interface ImageInfo {
17
+ src: string;
18
+ alt: string;
19
+ width: number;
20
+ height: number;
21
+ }
22
+ declare enum ParserElementType {
23
+ ChapterTitle = 0,
24
+ ChapterSubtitle = 1,
25
+ ChapterFigureTitle = 2,
26
+ Header = 3,
27
+ Note = 4,
28
+ Paragraph = 5,
29
+ SectionTitle = 6,
30
+ TableTitle = 7,
31
+ FigureTitle = 8,
32
+ BulletListItem = 9,
33
+ OrderedListItem = 10,
34
+ Table = 11,
35
+ EnhancedTable = 12,
36
+ Figure = 13,
37
+ ChangeBarPara = 14,
38
+ hr = 15,
39
+ vignet = 16,
40
+ Uncategorized = 17,
41
+ infoIcon = 18,
42
+ NewFigureTitle = 19
43
+ }
44
+ export interface StyleInfo {
45
+ styleName: string;
46
+ styles?: {
47
+ styleLevel?: number | string;
48
+ };
49
+ }
50
+ export interface TransformConfig {
51
+ customStylesUrl: string;
52
+ replacementChars: {
53
+ find: string;
54
+ replace: string;
55
+ }[];
56
+ replaceCharacters: boolean;
57
+ stripSectionNumbers: boolean;
58
+ replaceWithLinks: {
59
+ find: string;
60
+ href: string;
61
+ }[];
62
+ messageSink?: MessageSink;
63
+ customStyles: StyleInfo[];
64
+ }
65
+ export declare const DEFAULT_Config: TransformConfig;
66
+ export declare function asTransformConfig(config?: Partial<TransformConfig>): {
67
+ customStylesUrl: string;
68
+ replacementChars: {
69
+ find: string;
70
+ replace: string;
71
+ }[];
72
+ replaceCharacters: boolean;
73
+ stripSectionNumbers: boolean;
74
+ replaceWithLinks: {
75
+ find: string;
76
+ href: string;
77
+ }[];
78
+ messageSink?: MessageSink;
79
+ customStyles: StyleInfo[];
80
+ };
81
+ export interface AddCellOptions {
82
+ bgColor: string;
83
+ isChapterHeader: boolean;
84
+ verAlign: string;
85
+ cellIndex: number;
86
+ widthArray: number[];
87
+ isTransparent: boolean;
88
+ }
89
+ export declare class LicitConverter {
90
+ private readonly config;
91
+ elementsParsedMap: Map<string, boolean>;
92
+ elements: ParserElement[];
93
+ constructor(config: TransformConfig);
94
+ parseHTML(html: Document, isDoctorine: boolean, moDocType?: string): LicitDocumentJSON;
95
+ parseFrameMakerHTML5(html: Element[]): LicitDocumentJSON;
96
+ render_FrameMakerHTML5_zip(nodes: NodeList, infoIconData?: HTMLOListElement[], _moDocType?: string, renderedContentList?: Node[]): LicitDocumentJSON;
97
+ render_FrameMakerHTML5_zip_SwitchHelper(e: ParserElement, infoIconData: HTMLOListElement[], renderedContentList: Node[], isNumberReseted: boolean, licitDocument: LicitDocumentElement): boolean;
98
+ private handleNodes;
99
+ fetchRenderedContent(nodes: NodeList): Node[];
100
+ /**
101
+ * Returns a map elements which were parsed.
102
+ *
103
+ * @returns Map of elements
104
+ */
105
+ getElementsParsedMap(): Map<string, boolean>;
106
+ getCustomStyle(styleName: string): StyleInfo | undefined;
107
+ handleOrderedListItem(e: ParserElement, licitDocument: LicitDocumentElement): void;
108
+ /**
109
+ * Renders the HTML as a Licit JSON structure
110
+ *
111
+ * @returns The document as an `LicitDocumentJSON` object
112
+ */
113
+ render(nodes: NodeListOf<Element>): LicitDocumentJSON;
114
+ renderSwitchHelper(e: ParserElement, licitDocument: LicitDocumentElement): void;
115
+ private renderTable;
116
+ private renderParagraph;
117
+ private renderHeader;
118
+ private buildElements;
119
+ checkChildNode(node: HTMLElement | Element, nextNode: HTMLElement | Element): number;
120
+ render_doc(nodes: NodeListOf<Element>, infoIconData: HTMLOListElement[] | undefined, moDocType: string): LicitDocumentJSON;
121
+ render_docSwitchHelper(e: ParserElement, licitDocument: LicitDocumentElement, tocRemoved: boolean, infoIconData: HTMLOListElement[], moDocType: string): boolean;
122
+ renderTypeParagraph(e: ParserElement, licitDocument: LicitDocumentElement, infoIconData?: HTMLOListElement[]): void;
123
+ handle_UrlText(text: string, licitDocument: LicitDocumentElement, infoIconData?: HTMLOListElement[]): void;
124
+ text_WithoutUrl(n: Node, licitDocument: LicitDocumentElement, infoIconData?: HTMLOListElement[]): void;
125
+ private handleNode;
126
+ mergeSpans(node: Element, nextNode: Element): number;
127
+ updateChildCapcoContent(e: ParserElement): void;
128
+ updateChildCapcoContentLoopHelper(childNodes: ChildNode[], res: UpdatedCapco): void;
129
+ processChildNodesCapco(childNodes: NodeListOf<ChildNode>): void;
130
+ updateCapcoToParagraph(child: ChildNode, res: UpdatedCapco): void;
131
+ processTableCapco(tableNode: HTMLTableElement): void;
132
+ figureTitleCase(e: ParserElement, licitDocument: LicitDocumentElement): void;
133
+ handleImageChild(child: Element, licitDocument: LicitDocumentElement): void;
134
+ renderNewFigureTitle(e: ParserElement, licitDocument: LicitDocumentElement): void;
135
+ figureParagraphCase(e: ParserElement, licitDocument: LicitDocumentElement, infoIconData: HTMLOListElement[] | undefined, renderedContentList: Node[]): void;
136
+ figureNoteCase(e: ParserElement, licitDocument: LicitDocumentElement): void;
137
+ figureTableTitleCase(e: ParserElement, licitDocument: LicitDocumentElement): void;
138
+ private renderDocVignet;
139
+ private parseUntypedDocVignet;
140
+ private parseTypedDocVignet;
141
+ parseTypedDocVignetHelper(val: string, bgColor: string, borderColor: string, boxWidth: number): {
142
+ bgColor: string;
143
+ borderColor: string;
144
+ boxWidth: number;
145
+ };
146
+ private renderDocTable;
147
+ private renderEnhancedTable;
148
+ private getLicitTable;
149
+ renderNewLicitImage(imageElement: HTMLImageElement, capco: string | null): LicitEnhancedImageElement;
150
+ renderDocBulletItems(e: ParserElement, licitDocument: LicitDocumentElement): void;
151
+ processBulletNodes(childNodes: Node[], bulletList: LicitBulletListElement, licitDocument: any, indent: number, e: any): void;
152
+ addElementLicit(licitDocument: any, bulletList: LicitBulletListElement): void;
153
+ removeEmptyATags(node: Node): void;
154
+ private handleULNode;
155
+ private renderDocFigure;
156
+ renderImage(imgElement: HTMLImageElement, licitDocument: LicitDocumentElement): void;
157
+ parseOL(e: ParserElement, licitDocument: LicitDocumentElement): void;
158
+ /**
159
+ * To parse table data
160
+ * @param e - element
161
+ * @param tableTag - The tag name or identifier of the table.
162
+ * @param querySel Selector for Querying from table row
163
+ * @param isChapterHeader flag to determine ChapterHeader
164
+ * @param licitTable -Licit Table Element
165
+ * @param widthArray - To scale the table to specific sizes
166
+ * @param isTransparent - flag to distinguish preface table
167
+ * @returns void
168
+ */
169
+ parseTableContent(_e: any, tableTag: any, querySel: any, isChapterHeader: any, licitTable: any, widthArray: number[], isTransparent: boolean): void;
170
+ parseTableContentInnerLoopHelper(cells: any, _cellIndex: number, isChapterHeader: boolean, licitRow: LicitTableRowElement, widthArray: number[], isTransparent: boolean): void;
171
+ private addCell;
172
+ checkCellStyle(style: string | null): string | null;
173
+ private addTableImageCell;
174
+ ParseNestedList(_listType: string, node: ChildNode, licitDocument: LicitDocumentElement, indent: number): void;
175
+ /**
176
+ * Returns the level of an element as described by the number at the end of its classname
177
+ *
178
+ * @param className - The className of the element
179
+ * @returns The level as a number or zero if the level cannot be determined
180
+ */
181
+ extractLevel(className: string): number;
182
+ /**
183
+ * Determines if an element is a table or image then calls the appropriate parse method
184
+ */
185
+ parseTableFigure(element: Element): void;
186
+ /**
187
+ * Parse a table element
188
+ */
189
+ parseTable(element: Element, useEnhancedTables: boolean): void;
190
+ /**
191
+ * Parse a table element
192
+ */
193
+ parseVignet(element: Element): void;
194
+ /**
195
+ * Parse a figure (image) element
196
+ */
197
+ parseFigure(element: Element): void;
198
+ /**
199
+ * Parse a note element
200
+ */
201
+ parseNote(element: Element): void;
202
+ /**
203
+ * Parse a hr element
204
+ */
205
+ parseHR(element: Element): void;
206
+ /**
207
+ * Parse a chapter title element
208
+ */
209
+ parseChapterTitle(element: Element): void;
210
+ /**
211
+ * Parse a chapter subtitle element
212
+ */
213
+ parseChapterSubtitle(element: Element): void;
214
+ /**
215
+ * Parse a header element
216
+ */
217
+ parseHeader(element: Element, nextElement: Element): void;
218
+ /**
219
+ * Parse a bullet point item element
220
+ */
221
+ parseBullet(element: Element): void;
222
+ /**
223
+ * Parse a ordered list point item element
224
+ */
225
+ parseOrdered(element: Element): void;
226
+ /**
227
+ * Parse a paragraph element
228
+ */
229
+ parseParagraph(element: Element): void;
230
+ parseDynamicHeader(element: Element): void;
231
+ /** Sanitize the text content by removing specific characters */
232
+ sanitizeText(element: Element): void;
233
+ /**
234
+ * Parse a figure (image) title element
235
+ */
236
+ parseFigureTitle(element: Element): void;
237
+ /**
238
+ * Parse a ChangeBarPara element
239
+ */
240
+ parseChangeBarPara(element: Element): void;
241
+ /**
242
+ * Parse a table title element
243
+ */
244
+ parseTableTitle(element: Element): void;
245
+ /**
246
+ * Parse an unknown element. Currently does nothing besides printing a warning to the console.
247
+ */
248
+ parseUnknownElement(element: Element, message: string): void;
249
+ /**
250
+ * Parse a section title element
251
+ */
252
+ parseSectionTitle(element: Element): void;
253
+ /**
254
+ * Parses an `Element` as determined by its `className`
255
+ *
256
+ * @param element - The `Element` to be parsed
257
+ */
258
+ parseElement(element: Element, nextElement: Element): void;
259
+ parseElement_doc(element: Element, nextElement: Element): void;
260
+ /**
261
+ * Cleans up the HTML by calling certain helper methods
262
+ */
263
+ sanitizeHTML(html: string): string;
264
+ /**
265
+ * Replaces characters in the HTML as defined by the `replacementChars` parameter in the config
266
+ */
267
+ replaceUnwantedChars(html: string): string;
268
+ /**
269
+ * Replaces keywords in the HTML with links, as defined by the `replaceWithLinks` parameter in the config
270
+ */
271
+ replaceKeywordsWithLinks(html: string): string;
272
+ matchClassToExcludeNumber(className: string): boolean;
273
+ sanitizeElement(element: Element): void;
274
+ removeLastNumber(inputString: string): string;
275
+ getScaledWidth(width: number): string;
276
+ isTransparentTable(element: Element): boolean;
277
+ /**
278
+ * Extracts and calculates the column widths from a given HTML table element.
279
+ *
280
+ * This function reads `<col>` elements within a `<colgroup>` of the table and
281
+ * computes the pixel-based width for each column. It handles widths specified
282
+ * in percentages and pixels. If all widths are in pixels, they are scaled using
283
+ * a separate scaling method. If the computed widths are invalid or incomplete,
284
+ * the function returns `undefined`.
285
+ *
286
+ * @param {HTMLTableElement} table - The HTML table element from which column widths are to be extracted.
287
+ * @returns {number[] | undefined} An array of column widths in pixels, or `undefined` if the widths are invalid or missing.
288
+ */
289
+ getColWidthArray(table: HTMLTableElement): number[] | undefined;
290
+ setCellWidth(colSpan: number, cellIndex: number, colWidthArray: number[]): number[];
291
+ scaleWidthArray(rawWidthArray: number[]): number[];
292
+ getSumOfArray(array: number[]): number;
293
+ /**
294
+ * Determines the orientation (portrait or landscape) based on the total width.
295
+ *
296
+ * @param {number} totalWidth - The total width (in pixels) used to determine orientation.
297
+ * @returns {'portrait' | 'landscape'} Returns 'portrait' if the width is less than 700 pixels; otherwise, returns 'landscape'.
298
+ */
299
+ findOrientation(totalWidth: number): 'portrait' | 'landscape';
300
+ /**
301
+ * Extracts image information from an HTMLImageElement.
302
+ *
303
+ * @param {HTMLImageElement} img - The image element to extract information from.
304
+ * @returns {{ src: string; alt: string; width: number; height: number }} An object containing the image's source URL, alt text, width, and height.
305
+ */
306
+ extractImageInfo(img: HTMLImageElement): ImageInfo;
307
+ /**
308
+ * Extracts note paragraphs from the last row of an HTML table if that row
309
+ * contains a note header such as "OVERALL NOTE:" or "NOTES:".
310
+ *
311
+ * This function is designed for tables where the final row may optionally
312
+ * contain a note. If such a note exists, it returns all <p> elements inside
313
+ * the first <td> of that row, excluding the header line itself
314
+ * (e.g., "OVERALL NOTE:" / "NOTES:").
315
+ *
316
+ * The returned <p> elements are kept as HTMLElement nodes so that they can
317
+ * be further converted into structured ProseMirror content
318
+ * (e.g., using NewLicitParagraphElement).
319
+ *
320
+ * If the table doesn't contain a note row, or if the expected structure is missing,
321
+ * the function safely returns null.
322
+ *
323
+ * @param {HTMLTableSectionElement} table - The HTML table section (tbody) to extract the note from.
324
+ * @returns {HTMLElement[] | null} - An array of <p> nodes representing the note paragraphs,
325
+ * or null if no note row was found.
326
+ */
327
+ private extractNote;
328
+ /**
329
+ * Determines whether the given DOM element should be treated as a "table figure".
330
+ *
331
+ * Business context:
332
+ * As per mail send on 07 Aug 2025:
333
+ * > "Can we sense when there is an image and a line or two of text – maybe remove the vignette control."
334
+ *
335
+ * This function implements that detection logic by identifying elements that match
336
+ * either of the following patterns:
337
+ *
338
+ * 1. It is **not** a <DIV> element, and its first child element is an <IMG>.
339
+ * 2. It is a <DIV> element that:
340
+ * - Contains at least one <IMG> element anywhere inside (at any depth),
341
+ * - Contains exactly one <P> element anywhere inside,
342
+ * - That <P> element's trimmed text content is less than 100 characters
343
+ * (representing "a line or two of text").
344
+ *
345
+ * @param {Element} node - The DOM element to check.
346
+ * @returns {boolean} `true` if the element qualifies as a table figure, otherwise `false`.
347
+ */
348
+ isTableFigureNode(node: Element): boolean;
349
+ /**
350
+ * Determines whether the provided class name corresponds to a note-related node.
351
+ *
352
+ * Checks if the given `className` matches any of the predefined note classes,
353
+ * such as examples, notes, cautions, or warnings.
354
+ *
355
+ * @param className - The CSS class name to check.
356
+ * @returns `true` if the class name is a recognized note node; otherwise, `false`.
357
+ */
358
+ private isNoteNode;
359
+ }
360
+ export {};