@modusoperandi/licit-import-utils 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/capco.util.d.ts +38 -0
- package/capco.util.js +195 -0
- package/index.d.ts +8 -0
- package/index.js +8 -0
- package/licit-elements.d.ts +878 -0
- package/licit-elements.js +2588 -0
- package/licit-transform.d.ts +360 -0
- package/licit-transform.js +2197 -0
- package/package.json +52 -0
- package/transform.docx.d.ts +16 -0
- package/transform.docx.js +154 -0
- package/transform.utils.d.ts +17 -0
- package/transform.utils.js +155 -0
- package/transform.zip.d.ts +5 -0
- package/transform.zip.js +296 -0
- package/types.d.ts +9 -0
- package/types.js +5 -0
- package/zip.utils.d.ts +6 -0
- package/zip.utils.js +23 -0
|
@@ -0,0 +1,2197 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license MIT
|
|
3
|
+
* @copyright Copyright 2026 Modus Operandi Inc. All Rights Reserved.
|
|
4
|
+
*/
|
|
5
|
+
import { getElementAlignment, LicitBulletListElement, LicitBulletListItemElement, LicitDocumentElement, LicitEnhancedImageBodyElement, LicitEnhancedImageElement, LicitEnhancedTableElement, LicitEnhancedTableFigureBodyElement, LicitEnhancedTableFigureCapcoElement, LicitEnhancedTableNotesElement, LicitErrorTextElement, LicitHeaderElement, LicitHRElement, LicitNewImageElement, LicitOrderedListElement, LicitParagraphElement, LicitParagraphImageElement, LicitParagraphNote, LicitTableCellElement, LicitTableCellImageElement, LicitTableCellParaElement, LicitTableCellParagraph, LicitTableElement, LicitTableRowElement, LicitVignetteElement, NewLicitParagraphElement, shouldSkipNext, } from './licit-elements';
|
|
6
|
+
import { getCapcoFromNode, getCapcoObject, safeCapcoParse, getShortCapcoString, updateCapcoFromContent, removeCapcoTextFromNode, } from './capco.util';
|
|
7
|
+
var ParserElementType;
|
|
8
|
+
(function (ParserElementType) {
|
|
9
|
+
ParserElementType[ParserElementType["ChapterTitle"] = 0] = "ChapterTitle";
|
|
10
|
+
ParserElementType[ParserElementType["ChapterSubtitle"] = 1] = "ChapterSubtitle";
|
|
11
|
+
ParserElementType[ParserElementType["ChapterFigureTitle"] = 2] = "ChapterFigureTitle";
|
|
12
|
+
ParserElementType[ParserElementType["Header"] = 3] = "Header";
|
|
13
|
+
ParserElementType[ParserElementType["Note"] = 4] = "Note";
|
|
14
|
+
ParserElementType[ParserElementType["Paragraph"] = 5] = "Paragraph";
|
|
15
|
+
ParserElementType[ParserElementType["SectionTitle"] = 6] = "SectionTitle";
|
|
16
|
+
ParserElementType[ParserElementType["TableTitle"] = 7] = "TableTitle";
|
|
17
|
+
ParserElementType[ParserElementType["FigureTitle"] = 8] = "FigureTitle";
|
|
18
|
+
ParserElementType[ParserElementType["BulletListItem"] = 9] = "BulletListItem";
|
|
19
|
+
ParserElementType[ParserElementType["OrderedListItem"] = 10] = "OrderedListItem";
|
|
20
|
+
ParserElementType[ParserElementType["Table"] = 11] = "Table";
|
|
21
|
+
ParserElementType[ParserElementType["EnhancedTable"] = 12] = "EnhancedTable";
|
|
22
|
+
ParserElementType[ParserElementType["Figure"] = 13] = "Figure";
|
|
23
|
+
ParserElementType[ParserElementType["ChangeBarPara"] = 14] = "ChangeBarPara";
|
|
24
|
+
ParserElementType[ParserElementType["hr"] = 15] = "hr";
|
|
25
|
+
ParserElementType[ParserElementType["vignet"] = 16] = "vignet";
|
|
26
|
+
ParserElementType[ParserElementType["Uncategorized"] = 17] = "Uncategorized";
|
|
27
|
+
ParserElementType[ParserElementType["infoIcon"] = 18] = "infoIcon";
|
|
28
|
+
ParserElementType[ParserElementType["NewFigureTitle"] = 19] = "NewFigureTitle";
|
|
29
|
+
})(ParserElementType || (ParserElementType = {}));
|
|
30
|
+
export const DEFAULT_Config = {
|
|
31
|
+
customStylesUrl: 'styles/',
|
|
32
|
+
replacementChars: [
|
|
33
|
+
{
|
|
34
|
+
find: '‘',
|
|
35
|
+
replace: "'",
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
find: '’',
|
|
39
|
+
replace: "'",
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
find: '“',
|
|
43
|
+
replace: '"',
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
find: '”',
|
|
47
|
+
replace: '"',
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
find: '±',
|
|
51
|
+
replace: '+/-',
|
|
52
|
+
},
|
|
53
|
+
],
|
|
54
|
+
stripSectionNumbers: true,
|
|
55
|
+
replaceCharacters: true,
|
|
56
|
+
replaceWithLinks: [],
|
|
57
|
+
customStyles: [],
|
|
58
|
+
};
|
|
59
|
+
export function asTransformConfig(config = {}) {
|
|
60
|
+
config ??= {};
|
|
61
|
+
return {
|
|
62
|
+
...DEFAULT_Config,
|
|
63
|
+
...config,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
export class LicitConverter {
|
|
67
|
+
config;
|
|
68
|
+
elementsParsedMap = new Map();
|
|
69
|
+
elements = [];
|
|
70
|
+
constructor(config) {
|
|
71
|
+
this.config = config;
|
|
72
|
+
}
|
|
73
|
+
parseHTML(html, isDoctorine, moDocType) {
|
|
74
|
+
if (typeof html === 'string') {
|
|
75
|
+
if (!isDoctorine) {
|
|
76
|
+
this.sanitizeHTML(html);
|
|
77
|
+
}
|
|
78
|
+
html = new DOMParser().parseFromString(html, 'text/html');
|
|
79
|
+
}
|
|
80
|
+
let nodes;
|
|
81
|
+
if (isDoctorine) {
|
|
82
|
+
nodes = html.querySelectorAll('body > *');
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
const firstChild = html.querySelector('body > *');
|
|
86
|
+
if (firstChild.tagName === 'DIV') {
|
|
87
|
+
nodes = html.querySelectorAll('body > div > *');
|
|
88
|
+
}
|
|
89
|
+
else {
|
|
90
|
+
nodes = html.querySelectorAll('body > *');
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
// to print all the nodes in console
|
|
94
|
+
// this.printNodes(nodes, 0);
|
|
95
|
+
this.elements = [];
|
|
96
|
+
if (isDoctorine) {
|
|
97
|
+
this.elementsParsedMap.clear();
|
|
98
|
+
return this.render_doc(nodes, extractInfoIconData(html), moDocType);
|
|
99
|
+
}
|
|
100
|
+
else {
|
|
101
|
+
return this.render(nodes);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
parseFrameMakerHTML5(html) {
|
|
105
|
+
this.elements = [];
|
|
106
|
+
if (!html?.length) {
|
|
107
|
+
return undefined;
|
|
108
|
+
}
|
|
109
|
+
let parentDiv = html[0];
|
|
110
|
+
if (parentDiv.tagName != 'DIV') {
|
|
111
|
+
parentDiv = document.createElement('div');
|
|
112
|
+
parentDiv.appendChild(html[0].cloneNode(true));
|
|
113
|
+
}
|
|
114
|
+
// skip first. First element is parent.
|
|
115
|
+
for (const e of html.slice(1)) {
|
|
116
|
+
parentDiv.appendChild(e.cloneNode(true));
|
|
117
|
+
}
|
|
118
|
+
const dom = parentDiv;
|
|
119
|
+
const nodes = dom.tagName === 'DIV'
|
|
120
|
+
? dom.querySelectorAll('div > *')
|
|
121
|
+
: dom.querySelectorAll('*');
|
|
122
|
+
const renderedContentList = this.fetchRenderedContent(nodes);
|
|
123
|
+
return this.render_FrameMakerHTML5_zip(nodes, extractInfoIconData(dom), null, renderedContentList);
|
|
124
|
+
}
|
|
125
|
+
render_FrameMakerHTML5_zip(nodes, infoIconData, _moDocType, renderedContentList) {
|
|
126
|
+
// Build elements, joining special cases
|
|
127
|
+
this.handleNodes(nodes);
|
|
128
|
+
let isNumberReseted = false;
|
|
129
|
+
const licitDocument = new LicitDocumentElement();
|
|
130
|
+
for (const e of this.elements) {
|
|
131
|
+
this.updateChildCapcoContent(e);
|
|
132
|
+
isNumberReseted = this.render_FrameMakerHTML5_zip_SwitchHelper(e, infoIconData, renderedContentList, isNumberReseted, licitDocument);
|
|
133
|
+
}
|
|
134
|
+
return licitDocument.render();
|
|
135
|
+
}
|
|
136
|
+
render_FrameMakerHTML5_zip_SwitchHelper(e, infoIconData, renderedContentList, isNumberReseted, licitDocument) {
|
|
137
|
+
let resetNumbering = isNumberReseted;
|
|
138
|
+
switch (e.type) {
|
|
139
|
+
case ParserElementType.ChapterTitle:
|
|
140
|
+
case ParserElementType.ChapterSubtitle:
|
|
141
|
+
case ParserElementType.Header: {
|
|
142
|
+
const n = e.node;
|
|
143
|
+
if (n) {
|
|
144
|
+
const paragraph = new NewLicitParagraphElement(n, infoIconData, renderedContentList);
|
|
145
|
+
if (e.subText.length > 0) {
|
|
146
|
+
const subMark = {
|
|
147
|
+
type: 'text',
|
|
148
|
+
text: e.subText,
|
|
149
|
+
};
|
|
150
|
+
paragraph.marks.push(subMark);
|
|
151
|
+
}
|
|
152
|
+
//Reset numbering for first attachmentTitle
|
|
153
|
+
if (n.className === 'attachmentTitle' && !resetNumbering) {
|
|
154
|
+
resetNumbering = true;
|
|
155
|
+
paragraph.reset = true;
|
|
156
|
+
}
|
|
157
|
+
licitDocument.appendElement(paragraph);
|
|
158
|
+
}
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
case ParserElementType.Figure: {
|
|
162
|
+
this.renderDocFigure(e, licitDocument);
|
|
163
|
+
break;
|
|
164
|
+
}
|
|
165
|
+
case ParserElementType.BulletListItem: {
|
|
166
|
+
this.renderDocBulletItems(e, licitDocument);
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
case ParserElementType.Paragraph:
|
|
170
|
+
this.figureParagraphCase(e, licitDocument, infoIconData, renderedContentList);
|
|
171
|
+
break;
|
|
172
|
+
case ParserElementType.Note:
|
|
173
|
+
this.figureNoteCase(e, licitDocument);
|
|
174
|
+
break;
|
|
175
|
+
case ParserElementType.Table: {
|
|
176
|
+
this.renderDocTable(e, licitDocument);
|
|
177
|
+
break;
|
|
178
|
+
}
|
|
179
|
+
case ParserElementType.EnhancedTable: {
|
|
180
|
+
this.renderEnhancedTable(e, licitDocument);
|
|
181
|
+
break;
|
|
182
|
+
}
|
|
183
|
+
case ParserElementType.TableTitle:
|
|
184
|
+
this.figureTableTitleCase(e, licitDocument);
|
|
185
|
+
break;
|
|
186
|
+
case ParserElementType.FigureTitle:
|
|
187
|
+
this.figureTitleCase(e, licitDocument);
|
|
188
|
+
break;
|
|
189
|
+
case ParserElementType.NewFigureTitle:
|
|
190
|
+
this.renderNewFigureTitle(e, licitDocument);
|
|
191
|
+
break;
|
|
192
|
+
case ParserElementType.SectionTitle: {
|
|
193
|
+
const styleName = e.node.getAttribute('class') ?? 'normal';
|
|
194
|
+
const text = e.node.textContent;
|
|
195
|
+
if (text) {
|
|
196
|
+
const header = new LicitHeaderElement(text, '', 0, styleName, getCapcoFromNode(e.node) ?? '', e.node);
|
|
197
|
+
header.align = 'center';
|
|
198
|
+
licitDocument.appendElement(header);
|
|
199
|
+
}
|
|
200
|
+
break;
|
|
201
|
+
}
|
|
202
|
+
default:
|
|
203
|
+
console.warn(`Parser not configured to render element: ${e.class}`);
|
|
204
|
+
}
|
|
205
|
+
return resetNumbering;
|
|
206
|
+
}
|
|
207
|
+
handleNodes(nodes) {
|
|
208
|
+
let skipCount = 0;
|
|
209
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
210
|
+
if (skipCount > 0) {
|
|
211
|
+
skipCount--;
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
const node = nodes[i];
|
|
215
|
+
const nextNode = nodes[i + 1];
|
|
216
|
+
skipCount = this.handleNode(node, nextNode);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
fetchRenderedContent(nodes) {
|
|
220
|
+
const renderedArr = [];
|
|
221
|
+
// Process paragraph nodes with anchor tags that have hash values
|
|
222
|
+
const processParagraphNodes = (node) => {
|
|
223
|
+
const anchorTags = Array.from(node.getElementsByTagName('a'));
|
|
224
|
+
for (const anchorTag of anchorTags) {
|
|
225
|
+
if (anchorTag.hash && anchorTag.hash.trim() !== '') {
|
|
226
|
+
renderedArr.push(node);
|
|
227
|
+
break;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
};
|
|
231
|
+
// Process ordered list nodes with specific anchor tag conditions
|
|
232
|
+
const processOrderedListNodes = (node) => {
|
|
233
|
+
const anchorTags = Array.from(node.getElementsByTagName('a'));
|
|
234
|
+
for (let i = 0; i < anchorTags.length - 1; i++) {
|
|
235
|
+
const currentAnchor = anchorTags[i];
|
|
236
|
+
const nextAnchor = anchorTags[i + 2];
|
|
237
|
+
const isInnerLink = currentAnchor?.parentElement?.innerHTML.includes(' ');
|
|
238
|
+
if (isInnerLink && nextAnchor) {
|
|
239
|
+
// Get the NAME from the first anchor
|
|
240
|
+
const nameValue = currentAnchor.getAttribute('NAME');
|
|
241
|
+
if (nameValue &&
|
|
242
|
+
nameValue == Number.parseInt(nameValue, 10).toString()) {
|
|
243
|
+
// Set this NAME to the second anchor
|
|
244
|
+
nextAnchor.setAttribute('NAME', nameValue);
|
|
245
|
+
// Delete the current anchor node
|
|
246
|
+
currentAnchor.remove();
|
|
247
|
+
break;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
};
|
|
252
|
+
// Process each node based on its type
|
|
253
|
+
for (const node of Array.from(nodes)) {
|
|
254
|
+
if (node.nodeName === 'P') {
|
|
255
|
+
processParagraphNodes(node);
|
|
256
|
+
}
|
|
257
|
+
else if (node.nodeName === 'OL') {
|
|
258
|
+
processOrderedListNodes(node);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
return renderedArr;
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* Returns a map elements which were parsed.
|
|
265
|
+
*
|
|
266
|
+
* @returns Map of elements
|
|
267
|
+
*/
|
|
268
|
+
getElementsParsedMap() {
|
|
269
|
+
return this.elementsParsedMap;
|
|
270
|
+
}
|
|
271
|
+
getCustomStyle(styleName) {
|
|
272
|
+
return this.config.customStyles?.find((s) => s.styleName === styleName);
|
|
273
|
+
}
|
|
274
|
+
handleOrderedListItem(e, licitDocument) {
|
|
275
|
+
const orderedList = new LicitOrderedListElement(0);
|
|
276
|
+
const text = e.node.textContent;
|
|
277
|
+
if (text) {
|
|
278
|
+
const orderedItem = new LicitBulletListItemElement(e.node);
|
|
279
|
+
orderedList.addItem(orderedItem);
|
|
280
|
+
orderedList.styleLevel = e.level;
|
|
281
|
+
licitDocument.appendElement(orderedList);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Renders the HTML as a Licit JSON structure
|
|
286
|
+
*
|
|
287
|
+
* @returns The document as an `LicitDocumentJSON` object
|
|
288
|
+
*/
|
|
289
|
+
render(nodes) {
|
|
290
|
+
// Build elements, joining special cases
|
|
291
|
+
this.buildElements(nodes);
|
|
292
|
+
const licitDocument = new LicitDocumentElement();
|
|
293
|
+
for (const e of this.elements) {
|
|
294
|
+
this.renderSwitchHelper(e, licitDocument);
|
|
295
|
+
}
|
|
296
|
+
return licitDocument.render();
|
|
297
|
+
}
|
|
298
|
+
renderSwitchHelper(e, licitDocument) {
|
|
299
|
+
switch (e.type) {
|
|
300
|
+
case ParserElementType.ChapterTitle:
|
|
301
|
+
case ParserElementType.ChapterSubtitle:
|
|
302
|
+
case ParserElementType.Header: {
|
|
303
|
+
this.renderHeader(e, licitDocument);
|
|
304
|
+
break;
|
|
305
|
+
}
|
|
306
|
+
case ParserElementType.Figure: {
|
|
307
|
+
const image = e.node.querySelector('img');
|
|
308
|
+
const source = image?.src;
|
|
309
|
+
const alt = image?.alt;
|
|
310
|
+
const width = image?.getAttribute('width');
|
|
311
|
+
const height = image?.getAttribute('height');
|
|
312
|
+
const align = getElementAlignment(image);
|
|
313
|
+
if (source) {
|
|
314
|
+
// seybi excluded image
|
|
315
|
+
const imageElement = new LicitParagraphImageElement(source, alt, width, height, align);
|
|
316
|
+
licitDocument.appendElement(imageElement);
|
|
317
|
+
}
|
|
318
|
+
break;
|
|
319
|
+
}
|
|
320
|
+
case ParserElementType.OrderedListItem: {
|
|
321
|
+
this.handleOrderedListItem(e, licitDocument);
|
|
322
|
+
break;
|
|
323
|
+
}
|
|
324
|
+
case ParserElementType.BulletListItem: {
|
|
325
|
+
const bulletList = new LicitBulletListElement(0);
|
|
326
|
+
const text = e.node.textContent;
|
|
327
|
+
if (text) {
|
|
328
|
+
const bulletItem = new LicitBulletListItemElement(e.node);
|
|
329
|
+
bulletList.addItem(bulletItem);
|
|
330
|
+
bulletList.styleLevel = e.level;
|
|
331
|
+
licitDocument.appendElement(bulletList);
|
|
332
|
+
}
|
|
333
|
+
break;
|
|
334
|
+
}
|
|
335
|
+
case ParserElementType.Paragraph: {
|
|
336
|
+
// SL-15
|
|
337
|
+
this.renderParagraph(e, licitDocument);
|
|
338
|
+
break;
|
|
339
|
+
}
|
|
340
|
+
case ParserElementType.Note: {
|
|
341
|
+
const text = e.node.textContent;
|
|
342
|
+
if (text) {
|
|
343
|
+
const paragraph = new LicitParagraphElement(text);
|
|
344
|
+
paragraph.styleLevel = 0;
|
|
345
|
+
licitDocument.appendElement(paragraph);
|
|
346
|
+
}
|
|
347
|
+
break;
|
|
348
|
+
}
|
|
349
|
+
case ParserElementType.Table: {
|
|
350
|
+
this.renderTable(e, licitDocument);
|
|
351
|
+
break;
|
|
352
|
+
}
|
|
353
|
+
case ParserElementType.FigureTitle:
|
|
354
|
+
case ParserElementType.TableTitle: {
|
|
355
|
+
const text = e.node.textContent;
|
|
356
|
+
const styleName = e.node.getAttribute('class') ?? 'normal';
|
|
357
|
+
if (text) {
|
|
358
|
+
const header = new LicitHeaderElement(text, '', 0, styleName, getCapcoFromNode(e.node) ?? '', e.node);
|
|
359
|
+
licitDocument.appendElement(header);
|
|
360
|
+
}
|
|
361
|
+
break;
|
|
362
|
+
}
|
|
363
|
+
case ParserElementType.SectionTitle: {
|
|
364
|
+
const text = e.node.textContent;
|
|
365
|
+
const styleName = e.node.getAttribute('class') ?? 'normal';
|
|
366
|
+
if (text) {
|
|
367
|
+
const header = new LicitHeaderElement(text, '', 0, styleName, getCapcoFromNode(e.node) ?? '', e.node);
|
|
368
|
+
header.align = 'center';
|
|
369
|
+
licitDocument.appendElement(header);
|
|
370
|
+
}
|
|
371
|
+
break;
|
|
372
|
+
}
|
|
373
|
+
default:
|
|
374
|
+
console.warn(`Parser not configured to render element: ${e.class}`);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
renderTable(e, licitDocument) {
|
|
378
|
+
const licitTable = new LicitTableElement();
|
|
379
|
+
const table = e.node.querySelector('table');
|
|
380
|
+
if (table) {
|
|
381
|
+
const rows = table.querySelectorAll('tr');
|
|
382
|
+
for (const row of Array.from(rows)) {
|
|
383
|
+
const licitRow = new LicitTableRowElement();
|
|
384
|
+
const cells = row.querySelectorAll('td');
|
|
385
|
+
for (const cell of Array.from(cells)) {
|
|
386
|
+
const rowspan = cell.rowSpan;
|
|
387
|
+
const colspan = cell.colSpan;
|
|
388
|
+
const text = cell.textContent || '';
|
|
389
|
+
const licitCell = new LicitTableCellElement(text);
|
|
390
|
+
licitCell.rowspan = rowspan;
|
|
391
|
+
licitCell.colspan = colspan;
|
|
392
|
+
licitRow.addCell(licitCell);
|
|
393
|
+
}
|
|
394
|
+
licitTable.addRow(licitRow);
|
|
395
|
+
}
|
|
396
|
+
licitDocument.appendElement(licitTable);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
renderParagraph(e, licitDocument) {
|
|
400
|
+
const text = e.node.textContent;
|
|
401
|
+
if (text) {
|
|
402
|
+
const paragraph = new LicitParagraphElement(text);
|
|
403
|
+
if (e.node.attributes.getNamedItem('align')) {
|
|
404
|
+
paragraph.align = e.node.attributes.getNamedItem('align').value;
|
|
405
|
+
}
|
|
406
|
+
licitDocument.appendElement(paragraph);
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
renderHeader(e, licitDocument) {
|
|
410
|
+
const text = e.node.textContent;
|
|
411
|
+
const subText = e.subText || '';
|
|
412
|
+
const styleName = e.node.getAttribute('class') ?? 'normal';
|
|
413
|
+
if (text) {
|
|
414
|
+
const header = new LicitHeaderElement(text, subText, e.level, styleName, getCapcoFromNode(e.node) ?? '', e.node);
|
|
415
|
+
if (e.type !== ParserElementType.Header) {
|
|
416
|
+
header.align = 'center';
|
|
417
|
+
}
|
|
418
|
+
licitDocument.appendElement(header);
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
buildElements(nodes) {
|
|
422
|
+
let skipNext = false;
|
|
423
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
424
|
+
if (skipNext) {
|
|
425
|
+
skipNext = false;
|
|
426
|
+
continue;
|
|
427
|
+
}
|
|
428
|
+
const node = nodes[i];
|
|
429
|
+
const nextNode = nodes[i + 1];
|
|
430
|
+
const className = node.className;
|
|
431
|
+
if (!className) {
|
|
432
|
+
if (!node.tagName) {
|
|
433
|
+
this.parseTableFigure(node);
|
|
434
|
+
continue;
|
|
435
|
+
}
|
|
436
|
+
if (node.tagName === 'OL') {
|
|
437
|
+
this.checkChildNode(node, nextNode);
|
|
438
|
+
}
|
|
439
|
+
else {
|
|
440
|
+
this.parseTableFigure(node);
|
|
441
|
+
}
|
|
442
|
+
continue;
|
|
443
|
+
}
|
|
444
|
+
this.parseElement(node, nextNode);
|
|
445
|
+
// className is set before parseElement / stripFmPrefix is called.
|
|
446
|
+
// do not remove 'FM_ from the switch statement below.
|
|
447
|
+
switch (className) {
|
|
448
|
+
case 'FM_chpara0':
|
|
449
|
+
case 'FM_attpara0':
|
|
450
|
+
case 'FM_chsubpara1':
|
|
451
|
+
case 'FM_attsubpara1':
|
|
452
|
+
skipNext = !!nextNode;
|
|
453
|
+
break;
|
|
454
|
+
default:
|
|
455
|
+
skipNext = false;
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
checkChildNode(node, nextNode) {
|
|
460
|
+
const children = node.children;
|
|
461
|
+
let skipCount = 0;
|
|
462
|
+
if (children) {
|
|
463
|
+
for (let j = 0; j < children.length; j++) {
|
|
464
|
+
if (skipCount > 0) {
|
|
465
|
+
skipCount--;
|
|
466
|
+
continue;
|
|
467
|
+
}
|
|
468
|
+
const childNode = children[j];
|
|
469
|
+
let nextChildNode = children[j + 1];
|
|
470
|
+
// KNITE-1013: Handling paragraph combining logic for the case where
|
|
471
|
+
// heading is inside <OL>/<UL> and content is outside
|
|
472
|
+
if (!nextChildNode &&
|
|
473
|
+
(node.tagName === 'OL' || node.tagName === 'UL') &&
|
|
474
|
+
shouldSkipNext(childNode.className)) {
|
|
475
|
+
nextChildNode = nextNode;
|
|
476
|
+
}
|
|
477
|
+
skipCount = this.handleNode(childNode, nextChildNode);
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
return skipCount;
|
|
481
|
+
}
|
|
482
|
+
render_doc(nodes, infoIconData, moDocType) {
|
|
483
|
+
// Build elements, joining special cases
|
|
484
|
+
for (const node of Array.from(nodes)) {
|
|
485
|
+
if (this.isTableFigureNode(node)) {
|
|
486
|
+
this.parseTableFigure(node);
|
|
487
|
+
}
|
|
488
|
+
else if (node.children.item(0)?.tagName === 'TBODY' ||
|
|
489
|
+
node.children.item(0)?.tagName === 'THEAD') {
|
|
490
|
+
this.parseTable(node, false);
|
|
491
|
+
}
|
|
492
|
+
else {
|
|
493
|
+
this.parseElement_doc(node, null);
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
const licitDocument = new LicitDocumentElement();
|
|
497
|
+
let tocRemoved = false;
|
|
498
|
+
for (const e of this.elements) {
|
|
499
|
+
this.updateChildCapcoContent(e);
|
|
500
|
+
tocRemoved = this.render_docSwitchHelper(e, licitDocument, tocRemoved, infoIconData, moDocType);
|
|
501
|
+
}
|
|
502
|
+
return licitDocument.render();
|
|
503
|
+
}
|
|
504
|
+
render_docSwitchHelper(e, licitDocument, tocRemoved, infoIconData, moDocType) {
|
|
505
|
+
switch (e.type) {
|
|
506
|
+
case ParserElementType.ChapterTitle:
|
|
507
|
+
case ParserElementType.ChapterSubtitle:
|
|
508
|
+
case ParserElementType.Header: {
|
|
509
|
+
this.renderHeader(e, licitDocument);
|
|
510
|
+
break;
|
|
511
|
+
}
|
|
512
|
+
case ParserElementType.Figure: {
|
|
513
|
+
this.renderDocFigure(e, licitDocument);
|
|
514
|
+
break;
|
|
515
|
+
}
|
|
516
|
+
case ParserElementType.BulletListItem: {
|
|
517
|
+
this.renderDocBulletItems(e, licitDocument);
|
|
518
|
+
break;
|
|
519
|
+
}
|
|
520
|
+
case ParserElementType.OrderedListItem: {
|
|
521
|
+
this.parseOL(e, licitDocument);
|
|
522
|
+
break;
|
|
523
|
+
}
|
|
524
|
+
case ParserElementType.Paragraph: {
|
|
525
|
+
//SL-14
|
|
526
|
+
// Remove 'Table of Contents'
|
|
527
|
+
const text = (e.node.textContent || '').trim();
|
|
528
|
+
if (!tocRemoved && text.toLowerCase() === 'table of contents') {
|
|
529
|
+
tocRemoved = true;
|
|
530
|
+
return tocRemoved;
|
|
531
|
+
}
|
|
532
|
+
this.renderTypeParagraph(e, licitDocument, infoIconData);
|
|
533
|
+
break;
|
|
534
|
+
}
|
|
535
|
+
case ParserElementType.Note: {
|
|
536
|
+
const text = e.node.textContent;
|
|
537
|
+
if (text) {
|
|
538
|
+
const paragraph = new LicitParagraphElement(text);
|
|
539
|
+
paragraph.styleLevel = 0;
|
|
540
|
+
licitDocument.appendElement(paragraph);
|
|
541
|
+
}
|
|
542
|
+
break;
|
|
543
|
+
}
|
|
544
|
+
case ParserElementType.Table:
|
|
545
|
+
case ParserElementType.EnhancedTable: {
|
|
546
|
+
this.renderDocTable(e, licitDocument);
|
|
547
|
+
break;
|
|
548
|
+
}
|
|
549
|
+
case ParserElementType.vignet: {
|
|
550
|
+
// Handling generic docs in nonspecfic type
|
|
551
|
+
this.renderDocVignet(moDocType, e, licitDocument);
|
|
552
|
+
break;
|
|
553
|
+
}
|
|
554
|
+
case ParserElementType.FigureTitle:
|
|
555
|
+
case ParserElementType.TableTitle: {
|
|
556
|
+
const text = e.node.textContent;
|
|
557
|
+
const styleName = e.node.getAttribute('class') ?? 'normal';
|
|
558
|
+
if (text) {
|
|
559
|
+
const header = new LicitHeaderElement(text, '', 0, styleName, getCapcoFromNode(e.node) ?? '', e.node);
|
|
560
|
+
licitDocument.appendElement(header);
|
|
561
|
+
}
|
|
562
|
+
break;
|
|
563
|
+
}
|
|
564
|
+
case ParserElementType.SectionTitle: {
|
|
565
|
+
const text = e.node.textContent;
|
|
566
|
+
if (text) {
|
|
567
|
+
const header = new LicitHeaderElement(text);
|
|
568
|
+
header.align = 'center';
|
|
569
|
+
licitDocument.appendElement(header);
|
|
570
|
+
}
|
|
571
|
+
break;
|
|
572
|
+
}
|
|
573
|
+
case ParserElementType.hr: {
|
|
574
|
+
const hr = new LicitHRElement();
|
|
575
|
+
licitDocument.appendElement(hr);
|
|
576
|
+
break;
|
|
577
|
+
}
|
|
578
|
+
default:
|
|
579
|
+
console.warn(`Parser not configured to render element: ${e.class}`);
|
|
580
|
+
}
|
|
581
|
+
return tocRemoved;
|
|
582
|
+
}
|
|
583
|
+
renderTypeParagraph(e, licitDocument, infoIconData) {
|
|
584
|
+
const n = e.node;
|
|
585
|
+
if (!n) {
|
|
586
|
+
return;
|
|
587
|
+
}
|
|
588
|
+
if (e.class && e.class === 'Chapter Header') {
|
|
589
|
+
const spaceAbove = 3;
|
|
590
|
+
const p = document.createElement('p');
|
|
591
|
+
const p1 = new NewLicitParagraphElement(p, infoIconData);
|
|
592
|
+
p1.id = 'chspace';
|
|
593
|
+
for (let i = 0; i < spaceAbove; i++) {
|
|
594
|
+
licitDocument.appendElement(p1);
|
|
595
|
+
}
|
|
596
|
+
const paragraph = new NewLicitParagraphElement(n, infoIconData);
|
|
597
|
+
licitDocument.appendElement(paragraph);
|
|
598
|
+
return;
|
|
599
|
+
}
|
|
600
|
+
const text = n.textContent || '';
|
|
601
|
+
// Regular expression to find URLs (http:// or https://)
|
|
602
|
+
const urlRegex = /(https?:\/\/[^\s]{1,999})/g;
|
|
603
|
+
if (urlRegex.test(text)) {
|
|
604
|
+
this.handle_UrlText(text, licitDocument, infoIconData);
|
|
605
|
+
}
|
|
606
|
+
else {
|
|
607
|
+
this.text_WithoutUrl(n, licitDocument, infoIconData);
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
handle_UrlText(text, licitDocument, infoIconData) {
|
|
611
|
+
const urlRegex = /(https?:\/\/[^\s]{1,999})/g;
|
|
612
|
+
const parts = text.split(urlRegex);
|
|
613
|
+
const pElement = document.createElement('p');
|
|
614
|
+
for (const part of parts) {
|
|
615
|
+
if (urlRegex.test(part)) {
|
|
616
|
+
const anchor = document.createElement('a');
|
|
617
|
+
anchor.href = part;
|
|
618
|
+
anchor.target = '_blank';
|
|
619
|
+
anchor.rel = 'noopener noreferrer';
|
|
620
|
+
anchor.textContent = part;
|
|
621
|
+
pElement.appendChild(anchor);
|
|
622
|
+
}
|
|
623
|
+
else {
|
|
624
|
+
const textNode = document.createTextNode(part);
|
|
625
|
+
pElement.appendChild(textNode);
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
const paragraph = new NewLicitParagraphElement(pElement, infoIconData);
|
|
629
|
+
licitDocument.appendElement(paragraph);
|
|
630
|
+
}
|
|
631
|
+
text_WithoutUrl(n, licitDocument, infoIconData) {
|
|
632
|
+
const paragraph = new NewLicitParagraphElement(n, infoIconData);
|
|
633
|
+
licitDocument.appendElement(paragraph);
|
|
634
|
+
const element = n;
|
|
635
|
+
const indent = Number(element.dataset.indent);
|
|
636
|
+
if (indent > 0) {
|
|
637
|
+
paragraph.indent = indent;
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
handleNode(node, nextNode) {
|
|
641
|
+
const className = node.className;
|
|
642
|
+
const titleClasses = [
|
|
643
|
+
'attTableTitle',
|
|
644
|
+
'attFigureTitle',
|
|
645
|
+
'chTableTitle',
|
|
646
|
+
'chFigureTitle',
|
|
647
|
+
];
|
|
648
|
+
for (const cls of titleClasses) {
|
|
649
|
+
if (node.classList?.contains(cls)) {
|
|
650
|
+
const el = node;
|
|
651
|
+
el.style.textTransform = 'none';
|
|
652
|
+
el.style.color = '#000000';
|
|
653
|
+
break;
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
if (node.tagName !== 'DIV' && node.children.item(0)?.tagName === 'IMG') {
|
|
657
|
+
this.parseTableFigure(node);
|
|
658
|
+
}
|
|
659
|
+
else if (node.tagName === 'TABLE') {
|
|
660
|
+
this.parseTable(node, true);
|
|
661
|
+
}
|
|
662
|
+
else if (node.tagName === 'OL' || node.tagName === 'UL') {
|
|
663
|
+
return this.checkChildNode(node, nextNode);
|
|
664
|
+
}
|
|
665
|
+
else if (node.tagName === 'IMG') {
|
|
666
|
+
this.parseFigure(node);
|
|
667
|
+
}
|
|
668
|
+
else if (node.tagName === 'SPAN') {
|
|
669
|
+
return this.mergeSpans(node, nextNode);
|
|
670
|
+
}
|
|
671
|
+
else {
|
|
672
|
+
this.parseElement(node, nextNode);
|
|
673
|
+
}
|
|
674
|
+
// Old "skip next" logic for paragraph combining
|
|
675
|
+
if (shouldSkipNext(node.className) &&
|
|
676
|
+
nextNode &&
|
|
677
|
+
shouldSkipNext(nextNode.className)) {
|
|
678
|
+
return 1; // skip just next
|
|
679
|
+
}
|
|
680
|
+
return shouldSkipNext(className) ? 1 : 0;
|
|
681
|
+
}
|
|
682
|
+
//Merge consecutive spans below the table into a single paragraph
|
|
683
|
+
mergeSpans(node, nextNode) {
|
|
684
|
+
const p = document.createElement('p');
|
|
685
|
+
p.classList.add('dynamicTableHeader');
|
|
686
|
+
let current = node;
|
|
687
|
+
let consumed = 0;
|
|
688
|
+
let styleApplied = false;
|
|
689
|
+
let anchorInserted = false;
|
|
690
|
+
while (current?.tagName === 'SPAN') {
|
|
691
|
+
const style = current.getAttribute('style');
|
|
692
|
+
if (!styleApplied && style) {
|
|
693
|
+
p.setAttribute('style', style);
|
|
694
|
+
styleApplied = true;
|
|
695
|
+
}
|
|
696
|
+
//Directly look for <a id> inside the span for handling link references like <a id="1050920" name="1050920"></a>
|
|
697
|
+
if (!anchorInserted) {
|
|
698
|
+
const anchorInSpan = current.querySelector('a[id]');
|
|
699
|
+
if (anchorInSpan) {
|
|
700
|
+
const anchor = document.createElement('a');
|
|
701
|
+
anchor.id = anchorInSpan.id;
|
|
702
|
+
const nameAttr = anchorInSpan.getAttribute('name');
|
|
703
|
+
if (nameAttr) {
|
|
704
|
+
anchor.setAttribute('name', nameAttr);
|
|
705
|
+
}
|
|
706
|
+
p.appendChild(anchor);
|
|
707
|
+
anchorInserted = true;
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
// Append span content
|
|
711
|
+
p.innerHTML += current.innerHTML;
|
|
712
|
+
current = current.nextElementSibling;
|
|
713
|
+
consumed++;
|
|
714
|
+
}
|
|
715
|
+
// process merged paragraph
|
|
716
|
+
this.parseElement(p, nextNode);
|
|
717
|
+
// skip already consumed spans (minus the first one we processed)
|
|
718
|
+
return consumed - 1;
|
|
719
|
+
}
|
|
720
|
+
updateChildCapcoContent(e) {
|
|
721
|
+
if (e.node.textContent === '') {
|
|
722
|
+
return;
|
|
723
|
+
}
|
|
724
|
+
if (e.type === ParserElementType.EnhancedTable) {
|
|
725
|
+
this.processTableCapco(e.node);
|
|
726
|
+
return;
|
|
727
|
+
}
|
|
728
|
+
if (e.node.childNodes.length > 1) {
|
|
729
|
+
const childrens = e.node.childNodes;
|
|
730
|
+
this.processChildNodesCapco(childrens);
|
|
731
|
+
}
|
|
732
|
+
else {
|
|
733
|
+
const res = updateCapcoFromContent(e.node);
|
|
734
|
+
if (res?.containsCapco) {
|
|
735
|
+
if (e.node.nodeType === Node.ELEMENT_NODE) {
|
|
736
|
+
const element = e.node;
|
|
737
|
+
this.updateChildCapcoContentLoopHelper(Array.from(element.childNodes), res);
|
|
738
|
+
element.setAttribute('capco', JSON.stringify(res.capco));
|
|
739
|
+
}
|
|
740
|
+
else if (e.node.nodeType === Node.TEXT_NODE) {
|
|
741
|
+
e.node.textContent = res.updatedTextContent;
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
if (this.isNoteNode(e.node.className)) {
|
|
746
|
+
removeCapcoTextFromNode(e.node);
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
updateChildCapcoContentLoopHelper(childNodes, res) {
|
|
750
|
+
for (const node of childNodes) {
|
|
751
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
|
752
|
+
node.textContent = res.updatedTextContent;
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
processChildNodesCapco(childNodes) {
|
|
757
|
+
// For skipping trimStart logic for the case while handling " "
|
|
758
|
+
for (const child of Array.from(childNodes)) {
|
|
759
|
+
//Hidden -> contine;
|
|
760
|
+
if (child.nodeType === Node.ELEMENT_NODE &&
|
|
761
|
+
child.className == 'Hidden') {
|
|
762
|
+
continue;
|
|
763
|
+
}
|
|
764
|
+
if (child.nodeType === Node.TEXT_NODE &&
|
|
765
|
+
child.textContent.trim() !== '') {
|
|
766
|
+
const res = updateCapcoFromContent(child);
|
|
767
|
+
if (res?.containsCapco) {
|
|
768
|
+
this.updateCapcoToParagraph(child, res);
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
//Recursively looping through nodes
|
|
772
|
+
else if (child.nodeType === Node.ELEMENT_NODE &&
|
|
773
|
+
child.childNodes.length > 0) {
|
|
774
|
+
this.processChildNodesCapco(child.childNodes);
|
|
775
|
+
}
|
|
776
|
+
if ((child.textContent?.trim()?.length ?? 0) > 0) {
|
|
777
|
+
break;
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
updateCapcoToParagraph(child, res) {
|
|
782
|
+
child.textContent = res.updatedTextContent;
|
|
783
|
+
// Find the nearest paragraph
|
|
784
|
+
let parent = child.parentElement;
|
|
785
|
+
while (parent && parent.tagName.toLowerCase() !== 'p') {
|
|
786
|
+
parent = parent.parentElement;
|
|
787
|
+
}
|
|
788
|
+
if (parent) {
|
|
789
|
+
parent.setAttribute('capco', JSON.stringify(res.capco));
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
processTableCapco(tableNode) {
|
|
793
|
+
const table = tableNode.querySelector('tbody');
|
|
794
|
+
const rows = table?.rows;
|
|
795
|
+
if (!rows || rows.length === 0) {
|
|
796
|
+
const capcoObj = getCapcoObject('U');
|
|
797
|
+
table?.setAttribute('capco', JSON.stringify(capcoObj));
|
|
798
|
+
return;
|
|
799
|
+
}
|
|
800
|
+
const lastRowIndex = rows.length - 1;
|
|
801
|
+
const lastRow = rows[lastRowIndex];
|
|
802
|
+
if (lastRow?.cells?.length !== 1) {
|
|
803
|
+
const capcoObj = getCapcoObject('U');
|
|
804
|
+
table?.setAttribute('capco', JSON.stringify(capcoObj));
|
|
805
|
+
return;
|
|
806
|
+
}
|
|
807
|
+
const cell = lastRow.cells[0];
|
|
808
|
+
const capcoString = getShortCapcoString(cell.textContent);
|
|
809
|
+
const capcoObj = getCapcoObject(capcoString);
|
|
810
|
+
table?.setAttribute('capco', JSON.stringify(capcoObj));
|
|
811
|
+
// Remove the last row from the table
|
|
812
|
+
table.deleteRow(lastRowIndex);
|
|
813
|
+
}
|
|
814
|
+
figureTitleCase(e, licitDocument) {
|
|
815
|
+
let text = e.node.textContent;
|
|
816
|
+
const styleName = e.node.getAttribute('class') ?? 'normal';
|
|
817
|
+
if (text) {
|
|
818
|
+
if (text.startsWith('Figure')) {
|
|
819
|
+
text = text.replace(/^Figure\s{1,50}[A-Za-z0-9.\-:]{1,50}\s{1,50}(\([A-Z]{1,4}\))?\s{0,10}/, '');
|
|
820
|
+
}
|
|
821
|
+
const header = new LicitHeaderElement(text, '', 0, styleName, getCapcoFromNode(e.node) ?? '', e.node);
|
|
822
|
+
licitDocument.appendElement(header);
|
|
823
|
+
}
|
|
824
|
+
// Added for handling image inside chFigureTitle class
|
|
825
|
+
if (e.node.children.length > 0) {
|
|
826
|
+
const children = e.node.children;
|
|
827
|
+
const childrenArray = Array.from(children);
|
|
828
|
+
for (const child of childrenArray) {
|
|
829
|
+
this.handleImageChild(child, licitDocument);
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
handleImageChild(child, licitDocument) {
|
|
834
|
+
if (child.tagName !== 'IMG')
|
|
835
|
+
return;
|
|
836
|
+
const src = child.getAttribute('src');
|
|
837
|
+
if (!src)
|
|
838
|
+
return;
|
|
839
|
+
const alt = child.getAttribute('alt') ?? '';
|
|
840
|
+
const rawWidth = child.width;
|
|
841
|
+
const width = rawWidth && rawWidth > 0 ? this.getScaledWidth(rawWidth) : undefined;
|
|
842
|
+
const height = child.getAttribute('height');
|
|
843
|
+
const imageElement = new LicitParagraphImageElement(src, alt, width, height);
|
|
844
|
+
licitDocument.appendElement(imageElement);
|
|
845
|
+
}
|
|
846
|
+
renderNewFigureTitle(e, licitDocument) {
|
|
847
|
+
let text = e.node.textContent;
|
|
848
|
+
const styleName = e.node.getAttribute('class') ?? 'normal';
|
|
849
|
+
const capco = getCapcoFromNode(e.node);
|
|
850
|
+
if (text) {
|
|
851
|
+
if (text.startsWith('Figure')) {
|
|
852
|
+
text = text.replace(/^Figure\s{1,50}[A-Za-z0-9.\-:]{1,50}\s{1,50}(\([A-Z]{1,4}\))?\s{0,10}/, '');
|
|
853
|
+
}
|
|
854
|
+
const header = new LicitHeaderElement(text, '', 0, styleName, capco ?? '', e.node);
|
|
855
|
+
licitDocument.appendElement(header);
|
|
856
|
+
}
|
|
857
|
+
if (e.node.children.length === 0)
|
|
858
|
+
return;
|
|
859
|
+
const children = Array.from(e.node.children);
|
|
860
|
+
for (const child of children) {
|
|
861
|
+
if (child.tagName !== 'IMG')
|
|
862
|
+
continue;
|
|
863
|
+
const imgElement = child;
|
|
864
|
+
const imageSrc = imgElement.getAttribute('src');
|
|
865
|
+
if (!imageSrc)
|
|
866
|
+
continue;
|
|
867
|
+
const licitEnhancedImage = this.renderNewLicitImage(imgElement, capco);
|
|
868
|
+
licitDocument.appendElement(licitEnhancedImage);
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
figureParagraphCase(e, licitDocument, infoIconData, renderedContentList) {
|
|
872
|
+
const n = e.node;
|
|
873
|
+
if (n) {
|
|
874
|
+
const paragraph = new NewLicitParagraphElement(n, infoIconData, renderedContentList);
|
|
875
|
+
licitDocument.appendElement(paragraph);
|
|
876
|
+
if (Number(n.dataset.indent) > 0) {
|
|
877
|
+
paragraph.indent = Number(n.dataset.indent);
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
figureNoteCase(e, licitDocument) {
|
|
882
|
+
const paragraph = new LicitParagraphNote(e.node);
|
|
883
|
+
licitDocument.appendElement(paragraph);
|
|
884
|
+
}
|
|
885
|
+
figureTableTitleCase(e, licitDocument) {
|
|
886
|
+
let text = e.node.textContent;
|
|
887
|
+
const styleName = e.node.getAttribute('class') ?? 'normal';
|
|
888
|
+
if (text) {
|
|
889
|
+
if (text.startsWith('Table')) {
|
|
890
|
+
text = text.replace(/^Table\s{1,50}[A-Za-z0-9.\-:]{1,50}\s{1,50}(\([A-Z]{1,4}\))?\s{0,10}/, '');
|
|
891
|
+
}
|
|
892
|
+
const header = new LicitHeaderElement(text, '', 0, styleName, getCapcoFromNode(e.node) ?? '', e.node);
|
|
893
|
+
licitDocument.appendElement(header);
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
renderDocVignet(moDocType, e, licitDocument) {
|
|
897
|
+
if (moDocType == 'Non Specific') {
|
|
898
|
+
this.parseUntypedDocVignet(e, licitDocument);
|
|
899
|
+
}
|
|
900
|
+
else {
|
|
901
|
+
this.parseTypedDocVignet(e, licitDocument);
|
|
902
|
+
}
|
|
903
|
+
}
|
|
904
|
+
parseUntypedDocVignet(e, licitDocument) {
|
|
905
|
+
const image = e.node.querySelector('img');
|
|
906
|
+
const source = image?.src;
|
|
907
|
+
const altText = image?.alt;
|
|
908
|
+
const width = image?.getAttribute('width');
|
|
909
|
+
const height = image?.getAttribute('height');
|
|
910
|
+
const align = getElementAlignment(image);
|
|
911
|
+
if (source) {
|
|
912
|
+
const imageElement = new LicitParagraphImageElement(source, altText, width, height, align);
|
|
913
|
+
licitDocument.appendElement(imageElement);
|
|
914
|
+
if (altText === '/ERR:Unsupported Image Format x-emf') {
|
|
915
|
+
const errText = new LicitErrorTextElement(altText);
|
|
916
|
+
licitDocument.appendElement(errText);
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
const text = e.node.textContent;
|
|
920
|
+
if (text) {
|
|
921
|
+
for (const node of Array.from(e.node.childNodes)) {
|
|
922
|
+
if (node.nodeName === 'P') {
|
|
923
|
+
const paragraph = new NewLicitParagraphElement(node);
|
|
924
|
+
licitDocument.appendElement(paragraph);
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
parseTypedDocVignet(e, licitDocument) {
|
|
930
|
+
const licitTable = new LicitTableElement(true);
|
|
931
|
+
const licitRow = new LicitTableRowElement();
|
|
932
|
+
const rowspan = 1;
|
|
933
|
+
const colspan = 1;
|
|
934
|
+
let licitCell = null;
|
|
935
|
+
const style = e.node.getAttribute('style');
|
|
936
|
+
let borderColor;
|
|
937
|
+
let bgColor;
|
|
938
|
+
let boxWidth;
|
|
939
|
+
if (style) {
|
|
940
|
+
const styleVals = style.split(';');
|
|
941
|
+
for (const val of styleVals) {
|
|
942
|
+
const styles = this.parseTypedDocVignetHelper(val, bgColor, borderColor, boxWidth);
|
|
943
|
+
borderColor = styles.borderColor;
|
|
944
|
+
bgColor = styles.bgColor;
|
|
945
|
+
boxWidth = styles.boxWidth;
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
licitCell = new LicitVignetteElement(e.node, borderColor, bgColor, boxWidth);
|
|
949
|
+
licitCell.rowspan = rowspan;
|
|
950
|
+
licitCell.colspan = colspan;
|
|
951
|
+
licitRow.addCell(licitCell);
|
|
952
|
+
licitTable.addRow(licitRow);
|
|
953
|
+
licitDocument.appendElement(licitTable);
|
|
954
|
+
}
|
|
955
|
+
parseTypedDocVignetHelper(val, bgColor, borderColor, boxWidth) {
|
|
956
|
+
if (val.startsWith('background-color')) {
|
|
957
|
+
bgColor = val.split(':')[1];
|
|
958
|
+
}
|
|
959
|
+
if (val.startsWith('border')) {
|
|
960
|
+
borderColor = '#' + val.split('#')[1];
|
|
961
|
+
}
|
|
962
|
+
if (val.startsWith('width')) {
|
|
963
|
+
const calculatedWidth = Number(val.split(':')[1].replace('pt', '')) / 0.75;
|
|
964
|
+
boxWidth = Math.min(700, calculatedWidth);
|
|
965
|
+
}
|
|
966
|
+
return {
|
|
967
|
+
bgColor: bgColor,
|
|
968
|
+
borderColor: borderColor,
|
|
969
|
+
boxWidth: boxWidth,
|
|
970
|
+
};
|
|
971
|
+
}
|
|
972
|
+
renderDocTable(e, licitDocument) {
|
|
973
|
+
const licitTable = new LicitTableElement();
|
|
974
|
+
const colWidthsArray = this.getColWidthArray(e.node);
|
|
975
|
+
const tableHead = e.node.querySelector('thead');
|
|
976
|
+
const table = e.node.querySelector('tbody');
|
|
977
|
+
licitTable.capco = getCapcoFromNode(table);
|
|
978
|
+
const isTransparentTable = this.isTransparentTable(e.node);
|
|
979
|
+
//Process table header first and then table body. If there is no body then process table header only.
|
|
980
|
+
if (tableHead) {
|
|
981
|
+
this.parseTableContent(e, tableHead, 'th', true, licitTable, colWidthsArray, isTransparentTable);
|
|
982
|
+
}
|
|
983
|
+
if (table) {
|
|
984
|
+
this.parseTableContent(e, table, 'td', false, licitTable, colWidthsArray, isTransparentTable);
|
|
985
|
+
}
|
|
986
|
+
if (tableHead || table) {
|
|
987
|
+
licitDocument.appendElement(licitTable);
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
renderEnhancedTable(e, licitDocument) {
|
|
991
|
+
const widthArray = this.getColWidthArray(e.node);
|
|
992
|
+
const table = e.node.querySelector('tbody');
|
|
993
|
+
let totalWidth = 619;
|
|
994
|
+
if (widthArray) {
|
|
995
|
+
totalWidth = this.getSumOfArray(widthArray);
|
|
996
|
+
}
|
|
997
|
+
const orientation = this.findOrientation(totalWidth);
|
|
998
|
+
const capco = getCapcoFromNode(table);
|
|
999
|
+
const licitNewTable = new LicitEnhancedTableElement(orientation);
|
|
1000
|
+
const newBody = new LicitEnhancedTableFigureBodyElement();
|
|
1001
|
+
const tableCapco = new LicitEnhancedTableFigureCapcoElement(safeCapcoParse(capco).portionMarking);
|
|
1002
|
+
const licitTable = this.getLicitTable(e, widthArray, capco);
|
|
1003
|
+
if (licitTable.rows.length > 0) {
|
|
1004
|
+
newBody.addTable(licitTable);
|
|
1005
|
+
licitNewTable.addBody(newBody);
|
|
1006
|
+
licitNewTable.addCapco(tableCapco);
|
|
1007
|
+
const noteParagraphs = this.extractNote(table);
|
|
1008
|
+
//If the table has a note, create a new notes element and add it to the table
|
|
1009
|
+
if (noteParagraphs) {
|
|
1010
|
+
const note = new LicitEnhancedTableNotesElement(noteParagraphs);
|
|
1011
|
+
licitNewTable.addNotes(note);
|
|
1012
|
+
//Remove the row containing the note from the table
|
|
1013
|
+
licitNewTable.removeLastRow();
|
|
1014
|
+
}
|
|
1015
|
+
licitDocument.appendElement(licitNewTable);
|
|
1016
|
+
}
|
|
1017
|
+
}
|
|
1018
|
+
getLicitTable(e, widthArray, capco) {
|
|
1019
|
+
const licitTable = new LicitTableElement(false, capco);
|
|
1020
|
+
const tableHead = e.node.querySelector('thead');
|
|
1021
|
+
const table = e.node.querySelector('tbody');
|
|
1022
|
+
const isChapterHeader = false;
|
|
1023
|
+
if (table) {
|
|
1024
|
+
if (tableHead) {
|
|
1025
|
+
this.parseTableContent(e, tableHead, 'th', isChapterHeader, licitTable, widthArray, false);
|
|
1026
|
+
}
|
|
1027
|
+
this.parseTableContent(e, table, 'td', isChapterHeader, licitTable, widthArray, false);
|
|
1028
|
+
}
|
|
1029
|
+
return licitTable;
|
|
1030
|
+
}
|
|
1031
|
+
//To get Image node from the dom and return the Licit Enhanced Image Element.
|
|
1032
|
+
renderNewLicitImage(imageElement, capco) {
|
|
1033
|
+
const imageInfo = this.extractImageInfo(imageElement);
|
|
1034
|
+
const orientation = this.findOrientation(imageInfo.width);
|
|
1035
|
+
const licitImage = new LicitNewImageElement(imageInfo.src, imageInfo.width?.toString(), imageInfo.height?.toString(), imageInfo.alt, capco);
|
|
1036
|
+
const licitBody = new LicitEnhancedImageBodyElement(licitImage);
|
|
1037
|
+
const capcoString = safeCapcoParse(capco).portionMarking;
|
|
1038
|
+
const licitCapco = new LicitEnhancedTableFigureCapcoElement(capcoString);
|
|
1039
|
+
const licitEnhancedImage = new LicitEnhancedImageElement(orientation);
|
|
1040
|
+
licitEnhancedImage.addBody(licitBody);
|
|
1041
|
+
licitEnhancedImage.addCapco(licitCapco);
|
|
1042
|
+
return licitEnhancedImage;
|
|
1043
|
+
}
|
|
1044
|
+
renderDocBulletItems(e, licitDocument) {
|
|
1045
|
+
const indent = 0;
|
|
1046
|
+
const bulletList = new LicitBulletListElement(indent);
|
|
1047
|
+
const text = e.node.textContent;
|
|
1048
|
+
if (!text || (!e.node.childNodes && e.node.childNodes.length === 0)) {
|
|
1049
|
+
return;
|
|
1050
|
+
}
|
|
1051
|
+
this.removeEmptyATags(e.node);
|
|
1052
|
+
const childNodes = Array.from(e.node.childNodes);
|
|
1053
|
+
const firstChild = childNodes[0];
|
|
1054
|
+
if (firstChild.nodeName === '#text') {
|
|
1055
|
+
const bulletItem = new LicitBulletListItemElement(e.node);
|
|
1056
|
+
bulletList.addItem(bulletItem);
|
|
1057
|
+
bulletList.styleLevel = e.level;
|
|
1058
|
+
this.addElementLicit(licitDocument, bulletList);
|
|
1059
|
+
}
|
|
1060
|
+
else {
|
|
1061
|
+
this.processBulletNodes(childNodes, bulletList, licitDocument, indent, e);
|
|
1062
|
+
}
|
|
1063
|
+
}
|
|
1064
|
+
processBulletNodes(childNodes, bulletList, licitDocument, indent, e) {
|
|
1065
|
+
for (const node of childNodes) {
|
|
1066
|
+
const ulNode = Array.from(node.childNodes).find((childNode) => childNode.nodeName === 'UL');
|
|
1067
|
+
const olNode = Array.from(node.childNodes).find((childNode) => childNode.nodeName === 'OL');
|
|
1068
|
+
if (!(ulNode && olNode) && node.nextSibling && node.nodeName !== 'LI') {
|
|
1069
|
+
const bulletItem = new LicitBulletListItemElement(e.node);
|
|
1070
|
+
bulletList.addItem(bulletItem);
|
|
1071
|
+
this.addElementLicit(licitDocument, bulletList);
|
|
1072
|
+
break;
|
|
1073
|
+
}
|
|
1074
|
+
else {
|
|
1075
|
+
const bulletItem = new LicitBulletListItemElement(node);
|
|
1076
|
+
bulletList.addItem(bulletItem);
|
|
1077
|
+
this.addElementLicit(licitDocument, bulletList);
|
|
1078
|
+
if (ulNode) {
|
|
1079
|
+
this.handleULNode(licitDocument, indent, ulNode);
|
|
1080
|
+
}
|
|
1081
|
+
if (olNode) {
|
|
1082
|
+
this.parseOL(e, licitDocument);
|
|
1083
|
+
}
|
|
1084
|
+
}
|
|
1085
|
+
bulletList = new LicitBulletListElement(0);
|
|
1086
|
+
}
|
|
1087
|
+
}
|
|
1088
|
+
addElementLicit(licitDocument, bulletList) {
|
|
1089
|
+
if (bulletList.listItems.length > 0) {
|
|
1090
|
+
licitDocument?.appendElement(bulletList);
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
removeEmptyATags(node) {
|
|
1094
|
+
const childNodes = Array.from(node.childNodes);
|
|
1095
|
+
for (const childNode of childNodes) {
|
|
1096
|
+
if (childNode.nodeName === 'A' && childNode.textContent === '') {
|
|
1097
|
+
const index = childNodes.indexOf(childNode);
|
|
1098
|
+
if (index != -1) {
|
|
1099
|
+
childNodes[index].remove();
|
|
1100
|
+
}
|
|
1101
|
+
}
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
1104
|
+
handleULNode(licitDocument, indent, ulNode) {
|
|
1105
|
+
indent++;
|
|
1106
|
+
this.ParseNestedList('UL', ulNode, licitDocument, indent);
|
|
1107
|
+
}
|
|
1108
|
+
renderDocFigure(e, licitDocument) {
|
|
1109
|
+
if (e.node.tagName === 'P') {
|
|
1110
|
+
const paraImages = new NewLicitParagraphElement(e.node);
|
|
1111
|
+
licitDocument.appendElement(paraImages);
|
|
1112
|
+
}
|
|
1113
|
+
else if (e.node.tagName === 'IMG') {
|
|
1114
|
+
this.renderImage(e.node, licitDocument);
|
|
1115
|
+
}
|
|
1116
|
+
else {
|
|
1117
|
+
const images = e.node.querySelectorAll('img');
|
|
1118
|
+
for (const element of Array.from(images)) {
|
|
1119
|
+
if (element) {
|
|
1120
|
+
this.renderImage(element, licitDocument);
|
|
1121
|
+
}
|
|
1122
|
+
}
|
|
1123
|
+
if (e.node.tagName === 'DIV') {
|
|
1124
|
+
const caption = e.node.querySelector('p');
|
|
1125
|
+
const paraImages = new NewLicitParagraphElement(caption);
|
|
1126
|
+
licitDocument.appendElement(paraImages);
|
|
1127
|
+
}
|
|
1128
|
+
}
|
|
1129
|
+
}
|
|
1130
|
+
renderImage(imgElement, licitDocument) {
|
|
1131
|
+
const source = imgElement.getAttribute('src');
|
|
1132
|
+
const altText = imgElement.alt;
|
|
1133
|
+
if (source) {
|
|
1134
|
+
const imageElement = new LicitParagraphImageElement(source);
|
|
1135
|
+
if (altText === '/ERR:Unsupported Image Format x-emf') {
|
|
1136
|
+
const errText = new LicitErrorTextElement(altText);
|
|
1137
|
+
licitDocument.appendElement(imageElement);
|
|
1138
|
+
licitDocument.appendElement(errText);
|
|
1139
|
+
}
|
|
1140
|
+
licitDocument.appendElement(imageElement);
|
|
1141
|
+
if (imgElement.childNodes.length > 1) {
|
|
1142
|
+
imgElement.remove();
|
|
1143
|
+
const textInline = new NewLicitParagraphElement(imgElement);
|
|
1144
|
+
licitDocument.appendElement(textInline);
|
|
1145
|
+
}
|
|
1146
|
+
}
|
|
1147
|
+
}
|
|
1148
|
+
parseOL(e, licitDocument) {
|
|
1149
|
+
if (e.node.id === 'infoIcon') {
|
|
1150
|
+
return;
|
|
1151
|
+
}
|
|
1152
|
+
let indent = 0;
|
|
1153
|
+
let orderedList = new LicitOrderedListElement(indent);
|
|
1154
|
+
const text = e.node.textContent;
|
|
1155
|
+
if (text && e.node.childNodes && e.node.childNodes.length > 0) {
|
|
1156
|
+
const childNodes = Array.from(e.node.childNodes);
|
|
1157
|
+
for (const n of childNodes) {
|
|
1158
|
+
const ulNode = Array.from(n.childNodes).find((node) => node.nodeName === 'OL');
|
|
1159
|
+
const bulletItem = new LicitBulletListItemElement(n);
|
|
1160
|
+
orderedList.addItem(bulletItem);
|
|
1161
|
+
if (ulNode) {
|
|
1162
|
+
licitDocument.appendElement(orderedList);
|
|
1163
|
+
indent++;
|
|
1164
|
+
this.ParseNestedList('OL', ulNode, licitDocument, indent);
|
|
1165
|
+
orderedList = new LicitOrderedListElement(0);
|
|
1166
|
+
}
|
|
1167
|
+
}
|
|
1168
|
+
if (orderedList.listItems.length > 0) {
|
|
1169
|
+
licitDocument.appendElement(orderedList);
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
}
|
|
1173
|
+
/**
|
|
1174
|
+
* To parse table data
|
|
1175
|
+
* @param e - element
|
|
1176
|
+
* @param tableTag - The tag name or identifier of the table.
|
|
1177
|
+
* @param querySel Selector for Querying from table row
|
|
1178
|
+
* @param isChapterHeader flag to determine ChapterHeader
|
|
1179
|
+
* @param licitTable -Licit Table Element
|
|
1180
|
+
* @param widthArray - To scale the table to specific sizes
|
|
1181
|
+
* @param isTransparent - flag to distinguish preface table
|
|
1182
|
+
* @returns void
|
|
1183
|
+
*/
|
|
1184
|
+
parseTableContent(_e, tableTag, querySel, isChapterHeader, licitTable, widthArray, isTransparent) {
|
|
1185
|
+
const rows = tableTag.querySelectorAll('tr');
|
|
1186
|
+
for (let i = 0; i < rows.length; i++) {
|
|
1187
|
+
if (!isTransparent &&
|
|
1188
|
+
i == 0 &&
|
|
1189
|
+
!isChapterHeader &&
|
|
1190
|
+
rows[i].cells.length > 1) {
|
|
1191
|
+
isChapterHeader = true;
|
|
1192
|
+
}
|
|
1193
|
+
const licitRow = new LicitTableRowElement();
|
|
1194
|
+
const cells = rows[i].querySelectorAll(querySel);
|
|
1195
|
+
this.parseTableContentInnerLoopHelper(cells, i, isChapterHeader, licitRow, widthArray, isTransparent);
|
|
1196
|
+
licitTable.addRow(licitRow);
|
|
1197
|
+
isChapterHeader = false;
|
|
1198
|
+
}
|
|
1199
|
+
}
|
|
1200
|
+
parseTableContentInnerLoopHelper(cells, _cellIndex, isChapterHeader, licitRow, widthArray, isTransparent) {
|
|
1201
|
+
for (let j = 0; j < cells.length; j++) {
|
|
1202
|
+
//Start RK-Dynamic Cell(2-2 of Chapter Header) BgColor
|
|
1203
|
+
const style = cells[j].getAttribute('style');
|
|
1204
|
+
let bgColor;
|
|
1205
|
+
if (style) {
|
|
1206
|
+
const styleVals = style.split(';');
|
|
1207
|
+
for (const val of styleVals) {
|
|
1208
|
+
if (val.startsWith('background-color')) {
|
|
1209
|
+
bgColor = val.split(':')[1];
|
|
1210
|
+
}
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
else if (cells[j].getAttribute('fillcolor')) {
|
|
1214
|
+
bgColor = cells[j].getAttribute('fillcolor');
|
|
1215
|
+
}
|
|
1216
|
+
//
|
|
1217
|
+
let verAlign = 'top';
|
|
1218
|
+
if (cells[j].id === 'LC-Center') {
|
|
1219
|
+
verAlign = 'middle';
|
|
1220
|
+
}
|
|
1221
|
+
//END
|
|
1222
|
+
const cellOptions = {
|
|
1223
|
+
bgColor,
|
|
1224
|
+
isChapterHeader,
|
|
1225
|
+
verAlign,
|
|
1226
|
+
cellIndex: j,
|
|
1227
|
+
widthArray,
|
|
1228
|
+
isTransparent,
|
|
1229
|
+
};
|
|
1230
|
+
this.addCell(cells[j], licitRow, cellOptions);
|
|
1231
|
+
}
|
|
1232
|
+
}
|
|
1233
|
+
addCell(cell, licitRow, cellOptions) {
|
|
1234
|
+
if (!cell) {
|
|
1235
|
+
return;
|
|
1236
|
+
}
|
|
1237
|
+
let { bgColor } = cellOptions;
|
|
1238
|
+
const { verAlign, cellIndex, widthArray, isTransparent, isChapterHeader } = cellOptions;
|
|
1239
|
+
const rowspan = cell.rowSpan;
|
|
1240
|
+
const colspan = cell.colSpan;
|
|
1241
|
+
let colWidth;
|
|
1242
|
+
let licitCell = null;
|
|
1243
|
+
const text = cell.textContent ?? '';
|
|
1244
|
+
if (cell.childNodes?.length <= 0) {
|
|
1245
|
+
//condition
|
|
1246
|
+
licitCell = new LicitTableCellParaElement(cell, bgColor, null, verAlign, isChapterHeader, isTransparent);
|
|
1247
|
+
}
|
|
1248
|
+
else if ('' === text &&
|
|
1249
|
+
cell.childNodes[0].querySelector('img')) {
|
|
1250
|
+
({ licitCell } = this.addTableImageCell(cell, bgColor, isChapterHeader, licitCell, verAlign));
|
|
1251
|
+
}
|
|
1252
|
+
else {
|
|
1253
|
+
if (isChapterHeader) {
|
|
1254
|
+
bgColor = bgColor || '#d8d8d8';
|
|
1255
|
+
cell.align = 'center'; // NOSONAR used by Licit parser (depricated)
|
|
1256
|
+
cell.setAttribute('classname', 'LC-Center');
|
|
1257
|
+
}
|
|
1258
|
+
licitCell = new LicitTableCellParaElement(cell, bgColor, colWidth, verAlign, isChapterHeader, isTransparent);
|
|
1259
|
+
}
|
|
1260
|
+
licitCell.rowspan = rowspan;
|
|
1261
|
+
licitCell.colspan = colspan;
|
|
1262
|
+
if (widthArray?.length > 0) {
|
|
1263
|
+
licitCell.colWidth = this.setCellWidth(colspan, cellIndex, widthArray);
|
|
1264
|
+
}
|
|
1265
|
+
licitRow.addCell(licitCell);
|
|
1266
|
+
}
|
|
1267
|
+
checkCellStyle(style) {
|
|
1268
|
+
let borderColor = null;
|
|
1269
|
+
if (style != null) {
|
|
1270
|
+
const styleVals = style.split(';');
|
|
1271
|
+
for (const val of styleVals) {
|
|
1272
|
+
if (val.length > 0 &&
|
|
1273
|
+
val.includes('border') &&
|
|
1274
|
+
!val.includes('style') &&
|
|
1275
|
+
!val.includes('radius')) {
|
|
1276
|
+
const border = val.split(':')[1];
|
|
1277
|
+
if (border == '0') {
|
|
1278
|
+
borderColor = '#FFFFFF';
|
|
1279
|
+
}
|
|
1280
|
+
}
|
|
1281
|
+
}
|
|
1282
|
+
}
|
|
1283
|
+
return borderColor;
|
|
1284
|
+
}
|
|
1285
|
+
addTableImageCell(cell, bgColor, isChapterHeader, licitCell, verAlign) {
|
|
1286
|
+
const image = cell.childNodes[0].querySelector('img');
|
|
1287
|
+
let altText = null;
|
|
1288
|
+
let imgHeight = null;
|
|
1289
|
+
let colWidth = null;
|
|
1290
|
+
let fillImg = 0;
|
|
1291
|
+
let fitoParent = 0;
|
|
1292
|
+
if (['LC-Image-1', 'LC-Image-2'].includes(image.id)) {
|
|
1293
|
+
bgColor = '#d8d8d8';
|
|
1294
|
+
fillImg = 1;
|
|
1295
|
+
fitoParent = 1;
|
|
1296
|
+
colWidth = image.id === 'LC-Image-1' ? [100, 625] : [100];
|
|
1297
|
+
imgHeight = image?.id === 'LC-Image-2' ? '70' : imgHeight;
|
|
1298
|
+
isChapterHeader = true;
|
|
1299
|
+
}
|
|
1300
|
+
else {
|
|
1301
|
+
altText = image.alt;
|
|
1302
|
+
}
|
|
1303
|
+
const source = image?.getAttribute('srcRelative') ?? image?.src;
|
|
1304
|
+
if (source) {
|
|
1305
|
+
// seybi excluded image
|
|
1306
|
+
licitCell = new LicitTableCellImageElement(source, fillImg, fitoParent, bgColor, imgHeight, colWidth, altText);
|
|
1307
|
+
}
|
|
1308
|
+
else {
|
|
1309
|
+
licitCell = new LicitTableCellParagraph(cell, bgColor, colWidth, verAlign);
|
|
1310
|
+
}
|
|
1311
|
+
return { bgColor, isChapterHeader, licitCell };
|
|
1312
|
+
}
|
|
1313
|
+
ParseNestedList(_listType, node, licitDocument, indent) {
|
|
1314
|
+
let list;
|
|
1315
|
+
const ulType = 'UL';
|
|
1316
|
+
const olType = 'OL';
|
|
1317
|
+
if (node.nodeName === ulType) {
|
|
1318
|
+
list = new LicitBulletListElement(indent);
|
|
1319
|
+
}
|
|
1320
|
+
else if (node.nodeName === olType) {
|
|
1321
|
+
list = new LicitOrderedListElement(indent);
|
|
1322
|
+
}
|
|
1323
|
+
const text = node.textContent;
|
|
1324
|
+
if (text && node.childNodes && node.childNodes.length > 0) {
|
|
1325
|
+
const childNodes = Array.from(node.childNodes);
|
|
1326
|
+
for (const n of childNodes) {
|
|
1327
|
+
const ulNode = Array.from(n.childNodes).find((node) => node.nodeName === ulType);
|
|
1328
|
+
const olNode = Array.from(n.childNodes).find((node) => node.nodeName === olType);
|
|
1329
|
+
const bulletItem = new LicitBulletListItemElement(n);
|
|
1330
|
+
list.addItem(bulletItem);
|
|
1331
|
+
if (ulNode) {
|
|
1332
|
+
licitDocument.appendElement(list);
|
|
1333
|
+
indent++;
|
|
1334
|
+
this.ParseNestedList(ulType, ulNode, licitDocument, indent);
|
|
1335
|
+
list = new LicitBulletListElement(indent);
|
|
1336
|
+
}
|
|
1337
|
+
if (olNode) {
|
|
1338
|
+
licitDocument.appendElement(list);
|
|
1339
|
+
indent++;
|
|
1340
|
+
this.ParseNestedList(olType, olNode, licitDocument, indent);
|
|
1341
|
+
list = new LicitOrderedListElement(indent);
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
if (list.listItems.length > 0) {
|
|
1345
|
+
licitDocument.appendElement(list);
|
|
1346
|
+
}
|
|
1347
|
+
}
|
|
1348
|
+
}
|
|
1349
|
+
/**
|
|
1350
|
+
* Returns the level of an element as described by the number at the end of its classname
|
|
1351
|
+
*
|
|
1352
|
+
* @param className - The className of the element
|
|
1353
|
+
* @returns The level as a number or zero if the level cannot be determined
|
|
1354
|
+
*/
|
|
1355
|
+
extractLevel(className) {
|
|
1356
|
+
const customStyle = this.getCustomStyle(className);
|
|
1357
|
+
if (customStyle) {
|
|
1358
|
+
const level = customStyle.styles?.styleLevel;
|
|
1359
|
+
return level ? Number.parseInt(String(level), 10) : 0;
|
|
1360
|
+
}
|
|
1361
|
+
else {
|
|
1362
|
+
const matches = /\d{1,10}$/.exec(className);
|
|
1363
|
+
return matches ? Number.parseInt(matches[0], 10) : 0;
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
/**
|
|
1367
|
+
* Determines if an element is a table or image then calls the appropriate parse method
|
|
1368
|
+
*/
|
|
1369
|
+
parseTableFigure(element) {
|
|
1370
|
+
if (element.querySelector('img')) {
|
|
1371
|
+
this.parseFigure(element);
|
|
1372
|
+
}
|
|
1373
|
+
}
|
|
1374
|
+
/**
|
|
1375
|
+
* Parse a table element
|
|
1376
|
+
*/
|
|
1377
|
+
parseTable(element, useEnhancedTables) {
|
|
1378
|
+
this.sanitizeElement(element);
|
|
1379
|
+
let tableType = ParserElementType.Table;
|
|
1380
|
+
if (useEnhancedTables && !this.isTransparentTable(element)) {
|
|
1381
|
+
tableType = ParserElementType.EnhancedTable;
|
|
1382
|
+
}
|
|
1383
|
+
this.elements.push({
|
|
1384
|
+
node: element,
|
|
1385
|
+
type: tableType,
|
|
1386
|
+
class: '',
|
|
1387
|
+
level: 0,
|
|
1388
|
+
subText: '',
|
|
1389
|
+
});
|
|
1390
|
+
}
|
|
1391
|
+
/**
|
|
1392
|
+
* Parse a table element
|
|
1393
|
+
*/
|
|
1394
|
+
parseVignet(element) {
|
|
1395
|
+
this.elements.push({
|
|
1396
|
+
node: element,
|
|
1397
|
+
type: ParserElementType.vignet,
|
|
1398
|
+
class: '',
|
|
1399
|
+
level: 0,
|
|
1400
|
+
subText: '',
|
|
1401
|
+
});
|
|
1402
|
+
}
|
|
1403
|
+
/**
|
|
1404
|
+
* Parse a figure (image) element
|
|
1405
|
+
*/
|
|
1406
|
+
parseFigure(element) {
|
|
1407
|
+
this.elements.push({
|
|
1408
|
+
node: element,
|
|
1409
|
+
type: ParserElementType.Figure,
|
|
1410
|
+
class: '',
|
|
1411
|
+
level: 0,
|
|
1412
|
+
subText: '',
|
|
1413
|
+
});
|
|
1414
|
+
}
|
|
1415
|
+
/**
|
|
1416
|
+
* Parse a note element
|
|
1417
|
+
*/
|
|
1418
|
+
parseNote(element) {
|
|
1419
|
+
const level = this.extractLevel(element.className);
|
|
1420
|
+
this.elements.push({
|
|
1421
|
+
node: element,
|
|
1422
|
+
type: ParserElementType.Note,
|
|
1423
|
+
class: element.className,
|
|
1424
|
+
level,
|
|
1425
|
+
subText: '',
|
|
1426
|
+
});
|
|
1427
|
+
}
|
|
1428
|
+
/**
|
|
1429
|
+
* Parse a hr element
|
|
1430
|
+
*/
|
|
1431
|
+
parseHR(element) {
|
|
1432
|
+
this.elements.push({
|
|
1433
|
+
node: element,
|
|
1434
|
+
type: ParserElementType.hr,
|
|
1435
|
+
class: element.className,
|
|
1436
|
+
level: 0,
|
|
1437
|
+
subText: '',
|
|
1438
|
+
});
|
|
1439
|
+
}
|
|
1440
|
+
/**
|
|
1441
|
+
* Parse a chapter title element
|
|
1442
|
+
*/
|
|
1443
|
+
parseChapterTitle(element) {
|
|
1444
|
+
const level = this.extractLevel(element.className);
|
|
1445
|
+
this.elements.push({
|
|
1446
|
+
node: element,
|
|
1447
|
+
type: ParserElementType.ChapterTitle,
|
|
1448
|
+
class: element.className,
|
|
1449
|
+
level,
|
|
1450
|
+
subText: '',
|
|
1451
|
+
});
|
|
1452
|
+
}
|
|
1453
|
+
/**
|
|
1454
|
+
* Parse a chapter subtitle element
|
|
1455
|
+
*/
|
|
1456
|
+
parseChapterSubtitle(element) {
|
|
1457
|
+
const level = this.extractLevel(element.className);
|
|
1458
|
+
this.elements.push({
|
|
1459
|
+
node: element,
|
|
1460
|
+
type: ParserElementType.ChapterSubtitle,
|
|
1461
|
+
class: element.className,
|
|
1462
|
+
level,
|
|
1463
|
+
subText: '',
|
|
1464
|
+
});
|
|
1465
|
+
}
|
|
1466
|
+
/**
|
|
1467
|
+
* Parse a header element
|
|
1468
|
+
*/
|
|
1469
|
+
parseHeader(element, nextElement) {
|
|
1470
|
+
const level = this.extractLevel(element.className);
|
|
1471
|
+
function updateTextContent(el) {
|
|
1472
|
+
// Create a TreeWalker that only shows TEXT nodes
|
|
1473
|
+
const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, {
|
|
1474
|
+
acceptNode(node) {
|
|
1475
|
+
return node.textContent?.trim()
|
|
1476
|
+
? NodeFilter.FILTER_ACCEPT
|
|
1477
|
+
: NodeFilter.FILTER_REJECT;
|
|
1478
|
+
},
|
|
1479
|
+
});
|
|
1480
|
+
let lastTextNode = null;
|
|
1481
|
+
// Walk through all text nodes
|
|
1482
|
+
while (walker.nextNode()) {
|
|
1483
|
+
lastTextNode = walker.currentNode;
|
|
1484
|
+
}
|
|
1485
|
+
// If we found a last text node
|
|
1486
|
+
if (lastTextNode) {
|
|
1487
|
+
const text = lastTextNode.textContent.trimEnd();
|
|
1488
|
+
if (text.endsWith('.')) {
|
|
1489
|
+
// Already ends with a period, just add a space
|
|
1490
|
+
lastTextNode.textContent = text + ' ';
|
|
1491
|
+
}
|
|
1492
|
+
else {
|
|
1493
|
+
// No period, add ". "
|
|
1494
|
+
lastTextNode.textContent = text + '. ';
|
|
1495
|
+
}
|
|
1496
|
+
}
|
|
1497
|
+
}
|
|
1498
|
+
/* Add nextElement as child if it exists instead of passing it as just subtext
|
|
1499
|
+
so that we can preserve the marks and apply them later on! */
|
|
1500
|
+
if (nextElement &&
|
|
1501
|
+
!shouldSkipNext(nextElement.className) &&
|
|
1502
|
+
nextElement.textContent?.length > 0) {
|
|
1503
|
+
updateTextContent(element);
|
|
1504
|
+
element.appendChild(nextElement);
|
|
1505
|
+
}
|
|
1506
|
+
this.elements.push({
|
|
1507
|
+
node: element,
|
|
1508
|
+
type: ParserElementType.Header,
|
|
1509
|
+
class: element.className,
|
|
1510
|
+
level,
|
|
1511
|
+
subText: '',
|
|
1512
|
+
});
|
|
1513
|
+
}
|
|
1514
|
+
/**
|
|
1515
|
+
* Parse a bullet point item element
|
|
1516
|
+
*/
|
|
1517
|
+
parseBullet(element) {
|
|
1518
|
+
const level = this.extractLevel(element.className);
|
|
1519
|
+
this.elements.push({
|
|
1520
|
+
node: element,
|
|
1521
|
+
type: ParserElementType.BulletListItem,
|
|
1522
|
+
class: element.className,
|
|
1523
|
+
level,
|
|
1524
|
+
subText: '',
|
|
1525
|
+
});
|
|
1526
|
+
}
|
|
1527
|
+
/**
|
|
1528
|
+
* Parse a ordered list point item element
|
|
1529
|
+
*/
|
|
1530
|
+
parseOrdered(element) {
|
|
1531
|
+
const level = this.extractLevel(element.className);
|
|
1532
|
+
this.elements.push({
|
|
1533
|
+
node: element,
|
|
1534
|
+
type: ParserElementType.OrderedListItem,
|
|
1535
|
+
class: element.className,
|
|
1536
|
+
level,
|
|
1537
|
+
subText: '',
|
|
1538
|
+
});
|
|
1539
|
+
}
|
|
1540
|
+
/**
|
|
1541
|
+
* Parse a paragraph element
|
|
1542
|
+
*/
|
|
1543
|
+
parseParagraph(element) {
|
|
1544
|
+
this.sanitizeText(element);
|
|
1545
|
+
const level = this.extractLevel(element.className);
|
|
1546
|
+
this.elements.push({
|
|
1547
|
+
node: element,
|
|
1548
|
+
type: ParserElementType.Paragraph,
|
|
1549
|
+
class: element.getAttribute('classname') ?? element.className,
|
|
1550
|
+
level,
|
|
1551
|
+
subText: '',
|
|
1552
|
+
});
|
|
1553
|
+
}
|
|
1554
|
+
parseDynamicHeader(element) {
|
|
1555
|
+
const level = this.extractLevel(element.className);
|
|
1556
|
+
const headerElement = {
|
|
1557
|
+
node: element,
|
|
1558
|
+
type: ParserElementType.TableTitle,
|
|
1559
|
+
class: element.className,
|
|
1560
|
+
level,
|
|
1561
|
+
subText: '',
|
|
1562
|
+
};
|
|
1563
|
+
const lastIndex = this.elements.length - 1;
|
|
1564
|
+
// Append element as child to the existing TableTitle node
|
|
1565
|
+
if (lastIndex >= 0 &&
|
|
1566
|
+
this.elements[lastIndex]?.type === ParserElementType.EnhancedTable &&
|
|
1567
|
+
this.elements[lastIndex - 1]?.type === ParserElementType.TableTitle) {
|
|
1568
|
+
const targetNode = this.elements[lastIndex - 1].node;
|
|
1569
|
+
targetNode.style.color = '#000000';
|
|
1570
|
+
targetNode.style.textTransform = 'none';
|
|
1571
|
+
// Move all children from `element` into `targetNode`
|
|
1572
|
+
while (element.firstChild) {
|
|
1573
|
+
targetNode.appendChild(element.firstChild);
|
|
1574
|
+
}
|
|
1575
|
+
}
|
|
1576
|
+
else if (lastIndex >= 0 &&
|
|
1577
|
+
this.elements[lastIndex]?.type === ParserElementType.EnhancedTable) {
|
|
1578
|
+
// Insert header before the last table
|
|
1579
|
+
this.elements.splice(lastIndex, 0, headerElement);
|
|
1580
|
+
}
|
|
1581
|
+
else {
|
|
1582
|
+
// Push normally
|
|
1583
|
+
this.elements.push(headerElement);
|
|
1584
|
+
}
|
|
1585
|
+
}
|
|
1586
|
+
/** Sanitize the text content by removing specific characters */
|
|
1587
|
+
sanitizeText(element) {
|
|
1588
|
+
for (const node of Array.from(element.childNodes ?? [])) {
|
|
1589
|
+
if (node.nodeType === 1 &&
|
|
1590
|
+
node.textContent?.replaceAll(/\s{1,100}/g, '') === 'µµ') {
|
|
1591
|
+
node.remove();
|
|
1592
|
+
}
|
|
1593
|
+
}
|
|
1594
|
+
}
|
|
1595
|
+
/**
|
|
1596
|
+
* Parse a figure (image) title element
|
|
1597
|
+
*/
|
|
1598
|
+
parseFigureTitle(element) {
|
|
1599
|
+
const level = this.extractLevel(element.className);
|
|
1600
|
+
const img = element.querySelector('img');
|
|
1601
|
+
const isNewFiguretype = img?.width > 200;
|
|
1602
|
+
this.elements.push({
|
|
1603
|
+
node: element,
|
|
1604
|
+
type: isNewFiguretype
|
|
1605
|
+
? ParserElementType.NewFigureTitle
|
|
1606
|
+
: ParserElementType.FigureTitle,
|
|
1607
|
+
class: element.className,
|
|
1608
|
+
level,
|
|
1609
|
+
subText: '',
|
|
1610
|
+
});
|
|
1611
|
+
}
|
|
1612
|
+
/**
|
|
1613
|
+
* Parse a ChangeBarPara element
|
|
1614
|
+
*/
|
|
1615
|
+
parseChangeBarPara(element) {
|
|
1616
|
+
const level = this.extractLevel(element.className);
|
|
1617
|
+
this.elements.push({
|
|
1618
|
+
node: element,
|
|
1619
|
+
type: ParserElementType.ChangeBarPara,
|
|
1620
|
+
class: element.className,
|
|
1621
|
+
level,
|
|
1622
|
+
subText: '',
|
|
1623
|
+
});
|
|
1624
|
+
}
|
|
1625
|
+
/**
|
|
1626
|
+
* Parse a table title element
|
|
1627
|
+
*/
|
|
1628
|
+
parseTableTitle(element) {
|
|
1629
|
+
const level = this.extractLevel(element.className);
|
|
1630
|
+
this.elements.push({
|
|
1631
|
+
node: element,
|
|
1632
|
+
type: ParserElementType.TableTitle,
|
|
1633
|
+
class: element.className,
|
|
1634
|
+
level,
|
|
1635
|
+
subText: '',
|
|
1636
|
+
});
|
|
1637
|
+
}
|
|
1638
|
+
/**
|
|
1639
|
+
* Parse an unknown element. Currently does nothing besides printing a warning to the console.
|
|
1640
|
+
*/
|
|
1641
|
+
parseUnknownElement(element, message) {
|
|
1642
|
+
console.warn(`Parsing unknown element: ${element.className}.${message}`);
|
|
1643
|
+
this.config.messageSink?.('Warning', `Unknown element detected: ${element.className}.${message}`);
|
|
1644
|
+
}
|
|
1645
|
+
/**
|
|
1646
|
+
* Parse a section title element
|
|
1647
|
+
*/
|
|
1648
|
+
parseSectionTitle(element) {
|
|
1649
|
+
const level = this.extractLevel(element.className);
|
|
1650
|
+
this.elements.push({
|
|
1651
|
+
node: element,
|
|
1652
|
+
type: ParserElementType.SectionTitle,
|
|
1653
|
+
class: element.className,
|
|
1654
|
+
level,
|
|
1655
|
+
subText: '',
|
|
1656
|
+
});
|
|
1657
|
+
}
|
|
1658
|
+
/**
|
|
1659
|
+
* Parses an `Element` as determined by its `className`
|
|
1660
|
+
*
|
|
1661
|
+
* @param element - The `Element` to be parsed
|
|
1662
|
+
*/
|
|
1663
|
+
parseElement(element, nextElement) {
|
|
1664
|
+
this.sanitizeElement(element);
|
|
1665
|
+
const className = element.className?.trim();
|
|
1666
|
+
if (!className) {
|
|
1667
|
+
this.elementsParsedMap.set('unknown', false);
|
|
1668
|
+
this.parseUnknownElement(element, 'Ignoring element with no class name.');
|
|
1669
|
+
return;
|
|
1670
|
+
}
|
|
1671
|
+
this.elementsParsedMap.set(className, true);
|
|
1672
|
+
switch (className) {
|
|
1673
|
+
case '_AF_Example':
|
|
1674
|
+
case '_AF_Note':
|
|
1675
|
+
case '_AF_Caution':
|
|
1676
|
+
case '_AF_Warning':
|
|
1677
|
+
this.parseNote(element);
|
|
1678
|
+
break;
|
|
1679
|
+
case 'chapterTitle':
|
|
1680
|
+
case 'attachmentTitle':
|
|
1681
|
+
this.parseChapterTitle(element);
|
|
1682
|
+
break;
|
|
1683
|
+
case 'chpara0':
|
|
1684
|
+
case 'attpara0':
|
|
1685
|
+
case 'chsubpara1':
|
|
1686
|
+
case 'attsubpara1':
|
|
1687
|
+
this.parseHeader(element, nextElement);
|
|
1688
|
+
break;
|
|
1689
|
+
case 'chTableTitle':
|
|
1690
|
+
case 'attTableTitleCont':
|
|
1691
|
+
case 'attTableTitle':
|
|
1692
|
+
case 'chTableTitleCont':
|
|
1693
|
+
this.parseTableTitle(element);
|
|
1694
|
+
break;
|
|
1695
|
+
case 'chText':
|
|
1696
|
+
case 'attText':
|
|
1697
|
+
this.parseChapterSubtitle(element);
|
|
1698
|
+
break;
|
|
1699
|
+
case 'i_bullet':
|
|
1700
|
+
case 'item_0':
|
|
1701
|
+
case 'i_bullet_0':
|
|
1702
|
+
case 'i_bullet_1':
|
|
1703
|
+
case 'i_bullet_2':
|
|
1704
|
+
case 'i_bullet_3':
|
|
1705
|
+
case 'i_bullet_4':
|
|
1706
|
+
case 'i_bullet_5':
|
|
1707
|
+
case 'i_bullet_6':
|
|
1708
|
+
case 'i_bullet_7':
|
|
1709
|
+
case 'i-bullet':
|
|
1710
|
+
case 'i-bullet-0':
|
|
1711
|
+
case 'i-bullet-1':
|
|
1712
|
+
case 'i-bullet-2':
|
|
1713
|
+
case 'i-bullet-3':
|
|
1714
|
+
this.parseParagraph(element);
|
|
1715
|
+
break;
|
|
1716
|
+
case 'para':
|
|
1717
|
+
case 'para0':
|
|
1718
|
+
case 'para1':
|
|
1719
|
+
case 'paraleadin':
|
|
1720
|
+
case 'paraLeft':
|
|
1721
|
+
case 'AFDP Bullet':
|
|
1722
|
+
case 'AFDP Sub-bullet':
|
|
1723
|
+
case 'acronym':
|
|
1724
|
+
case 'chsubpara2':
|
|
1725
|
+
case 'chsubpara3':
|
|
1726
|
+
case 'attsubpara2':
|
|
1727
|
+
case 'attsubpara3':
|
|
1728
|
+
case 'attsubpara4':
|
|
1729
|
+
case 'attsubpara5':
|
|
1730
|
+
case 'attsubpara6':
|
|
1731
|
+
case 'chsubpara4':
|
|
1732
|
+
case 'chsubpara5':
|
|
1733
|
+
case 'chsubpara6':
|
|
1734
|
+
case 'FM-AF-Note':
|
|
1735
|
+
case 'FM-AF-Example':
|
|
1736
|
+
case 'item_1':
|
|
1737
|
+
case 'item_2':
|
|
1738
|
+
case 'item_3':
|
|
1739
|
+
case 'Numbered1start':
|
|
1740
|
+
case 'Numbered1':
|
|
1741
|
+
case 'Body':
|
|
1742
|
+
case 'Level0_Start':
|
|
1743
|
+
case 'Level0_Cont':
|
|
1744
|
+
case 'Level1_Start':
|
|
1745
|
+
case 'Level1_Cont':
|
|
1746
|
+
case 'Level2_Start':
|
|
1747
|
+
case 'Level2_Cont':
|
|
1748
|
+
case 'Level3_Cont':
|
|
1749
|
+
case 'Level3_Start':
|
|
1750
|
+
case 'Level4_Cont':
|
|
1751
|
+
case 'Level4_Start':
|
|
1752
|
+
this.parseParagraph(element);
|
|
1753
|
+
break;
|
|
1754
|
+
case 'dynamicTableHeader':
|
|
1755
|
+
this.parseDynamicHeader(element);
|
|
1756
|
+
break;
|
|
1757
|
+
case 'chFigureTitle':
|
|
1758
|
+
case 'chFigureTitleCont':
|
|
1759
|
+
case 'attFigureTitle':
|
|
1760
|
+
case 'attFigureTitleCont':
|
|
1761
|
+
this.parseFigureTitle(element);
|
|
1762
|
+
break;
|
|
1763
|
+
case 'ChangeBarPara':
|
|
1764
|
+
this.parseChangeBarPara(element);
|
|
1765
|
+
break;
|
|
1766
|
+
case 'sectionTitle':
|
|
1767
|
+
this.parseSectionTitle(element);
|
|
1768
|
+
break;
|
|
1769
|
+
case 'UL':
|
|
1770
|
+
this.parseBullet(element);
|
|
1771
|
+
break;
|
|
1772
|
+
case 'Hidden':
|
|
1773
|
+
this.elementsParsedMap.set(className, false);
|
|
1774
|
+
this.parseUnknownElement(element, `Ignoring "${className}" because hidden text is not meant to be displayed.`);
|
|
1775
|
+
break;
|
|
1776
|
+
case 'Cross_Reference':
|
|
1777
|
+
this.elementsParsedMap.set(className, false);
|
|
1778
|
+
this.parseUnknownElement(element, `Ignoring "${className}" because cross-references text is not meant to be displayed.`);
|
|
1779
|
+
break;
|
|
1780
|
+
case 'FLOW_A':
|
|
1781
|
+
this.elementsParsedMap.set(className, false);
|
|
1782
|
+
this.parseUnknownElement(element, `Ignoring "${className}" the FLOW_A is for framemaker to support columns. There are no columns, so they are ignored`);
|
|
1783
|
+
break;
|
|
1784
|
+
case 'superscript':
|
|
1785
|
+
default:
|
|
1786
|
+
console.warn(`Unknown style detected: ${className}. Treating as paragraph.`);
|
|
1787
|
+
this.config.messageSink?.('Warning', `Unknown style detected: ${className}`);
|
|
1788
|
+
this.parseParagraph(element);
|
|
1789
|
+
break;
|
|
1790
|
+
}
|
|
1791
|
+
}
|
|
1792
|
+
parseElement_doc(element, nextElement) {
|
|
1793
|
+
// SL-12
|
|
1794
|
+
this.elementsParsedMap.set(element.tagName, true);
|
|
1795
|
+
switch (element.tagName // SL-6
|
|
1796
|
+
) {
|
|
1797
|
+
case '_AF_Example':
|
|
1798
|
+
case '_AF_Note':
|
|
1799
|
+
case '_AF_Caution':
|
|
1800
|
+
case '_AF_Warning':
|
|
1801
|
+
this.parseNote(element);
|
|
1802
|
+
break;
|
|
1803
|
+
case 'HR':
|
|
1804
|
+
this.parseHR(element);
|
|
1805
|
+
break;
|
|
1806
|
+
case 'chapterTitle':
|
|
1807
|
+
case 'attachmentTitle':
|
|
1808
|
+
this.parseChapterTitle(element);
|
|
1809
|
+
break;
|
|
1810
|
+
case 'H1':
|
|
1811
|
+
case 'H2':
|
|
1812
|
+
case 'H3':
|
|
1813
|
+
case 'H4':
|
|
1814
|
+
this.parseHeader(element, nextElement);
|
|
1815
|
+
break;
|
|
1816
|
+
case 'chTableTitle':
|
|
1817
|
+
case 'attTableTitleCont':
|
|
1818
|
+
case 'attTableTitle':
|
|
1819
|
+
this.parseTableTitle(element);
|
|
1820
|
+
break;
|
|
1821
|
+
case 'chText':
|
|
1822
|
+
case 'attText':
|
|
1823
|
+
this.parseChapterSubtitle(element);
|
|
1824
|
+
break;
|
|
1825
|
+
case 'i_bullet':
|
|
1826
|
+
case 'i_bullet_0':
|
|
1827
|
+
case 'i_bullet_1':
|
|
1828
|
+
case 'i_bullet_2':
|
|
1829
|
+
case 'i_bullet_3':
|
|
1830
|
+
case 'i_bullet_4':
|
|
1831
|
+
case 'i_bullet_5':
|
|
1832
|
+
case 'i_bullet_6':
|
|
1833
|
+
case 'i_bullet_7':
|
|
1834
|
+
this.parseBullet(element);
|
|
1835
|
+
break;
|
|
1836
|
+
case 'SPAN':
|
|
1837
|
+
case 'DIV':
|
|
1838
|
+
this.parseVignet(element);
|
|
1839
|
+
break;
|
|
1840
|
+
case 'P':
|
|
1841
|
+
case 'para1':
|
|
1842
|
+
case 'paraleadin':
|
|
1843
|
+
case 'paraLeft':
|
|
1844
|
+
case 'AFDP Bullet':
|
|
1845
|
+
case 'AFDP Sub-bullet':
|
|
1846
|
+
case 'attsubpara2':
|
|
1847
|
+
case 'attsubpara3':
|
|
1848
|
+
case 'attsubpara4':
|
|
1849
|
+
case 'attsubpara5':
|
|
1850
|
+
case 'attsubpara6':
|
|
1851
|
+
case 'i-bullet-2':
|
|
1852
|
+
case 'chsubpara4':
|
|
1853
|
+
case 'chsubpara5':
|
|
1854
|
+
case 'chsubpara6':
|
|
1855
|
+
case 'sumText':
|
|
1856
|
+
this.parseParagraph(element);
|
|
1857
|
+
break;
|
|
1858
|
+
case 'chFigureTitle':
|
|
1859
|
+
case 'attFigureTitle':
|
|
1860
|
+
case 'attFigureTitleCont':
|
|
1861
|
+
this.parseFigureTitle(element);
|
|
1862
|
+
break;
|
|
1863
|
+
case 'ChangeBarPara':
|
|
1864
|
+
this.parseChangeBarPara(element);
|
|
1865
|
+
break;
|
|
1866
|
+
case 'sectionTitle':
|
|
1867
|
+
this.parseSectionTitle(element);
|
|
1868
|
+
break;
|
|
1869
|
+
case 'SUP':
|
|
1870
|
+
case 'OL':
|
|
1871
|
+
this.parseOrdered(element);
|
|
1872
|
+
break;
|
|
1873
|
+
case 'UL':
|
|
1874
|
+
case 'LI':
|
|
1875
|
+
this.parseBullet(element);
|
|
1876
|
+
break;
|
|
1877
|
+
case 'Hidden':
|
|
1878
|
+
this.elementsParsedMap.set(element.className, false);
|
|
1879
|
+
this.parseUnknownElement(element, `Ignoring "${element.className}" because hidden text is not meant to be displayed.`);
|
|
1880
|
+
break;
|
|
1881
|
+
case 'Cross_Reference':
|
|
1882
|
+
this.elementsParsedMap.set(element.className, false);
|
|
1883
|
+
this.parseUnknownElement(element, `Ignoring "${element.className}" because Cross_Reference text is not meant to be displayed.`);
|
|
1884
|
+
break;
|
|
1885
|
+
case 'superscript':
|
|
1886
|
+
default:
|
|
1887
|
+
console.warn(`Unknown style detected: ${element.className}. Treating as paragraph.`);
|
|
1888
|
+
this.parseParagraph(element);
|
|
1889
|
+
}
|
|
1890
|
+
}
|
|
1891
|
+
/**
|
|
1892
|
+
* Cleans up the HTML by calling certain helper methods
|
|
1893
|
+
*/
|
|
1894
|
+
sanitizeHTML(html) {
|
|
1895
|
+
return this.replaceKeywordsWithLinks(html);
|
|
1896
|
+
}
|
|
1897
|
+
/**
|
|
1898
|
+
* Replaces characters in the HTML as defined by the `replacementChars` parameter in the config
|
|
1899
|
+
*/
|
|
1900
|
+
replaceUnwantedChars(html) {
|
|
1901
|
+
const chars = this.config.replacementChars;
|
|
1902
|
+
for (const char of chars) {
|
|
1903
|
+
html = html.replace(char.find, char.replace);
|
|
1904
|
+
}
|
|
1905
|
+
return html;
|
|
1906
|
+
}
|
|
1907
|
+
/**
|
|
1908
|
+
* Replaces keywords in the HTML with links, as defined by the `replaceWithLinks` parameter in the config
|
|
1909
|
+
*/
|
|
1910
|
+
replaceKeywordsWithLinks(html) {
|
|
1911
|
+
const arr = this.config.replaceWithLinks;
|
|
1912
|
+
for (const e of arr) {
|
|
1913
|
+
const regex = new RegExp(String.raw `\b${e.find}\b`, 'gi');
|
|
1914
|
+
const link = `<a href="${e.href}">${e.find}</a>`;
|
|
1915
|
+
html = html.replace(regex, link);
|
|
1916
|
+
}
|
|
1917
|
+
return html;
|
|
1918
|
+
}
|
|
1919
|
+
//FS : For skipping triming inside table, add more classes to the class list for future use
|
|
1920
|
+
matchClassToExcludeNumber(className) {
|
|
1921
|
+
let trimmedClassName = className.trim();
|
|
1922
|
+
trimmedClassName = trimmedClassName.toLowerCase();
|
|
1923
|
+
const classList = ['cellbody', 'cellheading', 'bolditalic'];
|
|
1924
|
+
return !classList.includes(trimmedClassName);
|
|
1925
|
+
}
|
|
1926
|
+
sanitizeElement(element) {
|
|
1927
|
+
const stripTextContent = (node) => {
|
|
1928
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
|
1929
|
+
const parentClass = node.parentNode?.className?.toLowerCase?.() || '';
|
|
1930
|
+
if (this.config?.stripSectionNumbers &&
|
|
1931
|
+
this.matchClassToExcludeNumber(parentClass) &&
|
|
1932
|
+
parentClass !== 'acronym' // skip if parent is acronym
|
|
1933
|
+
) {
|
|
1934
|
+
//Fix for paras having double sets of numbering
|
|
1935
|
+
node.textContent = node.textContent
|
|
1936
|
+
.replaceAll(/^[A-Z]?\d{1,5}(?:\.\d{1,5}){0,10}\.?(?=\s)/gm, '')
|
|
1937
|
+
.replaceAll('\n', '');
|
|
1938
|
+
}
|
|
1939
|
+
node.textContent = node.textContent.replace(/•\s{0,10000}/, '');
|
|
1940
|
+
node.textContent = node.textContent.replace(/^FM_/, '');
|
|
1941
|
+
if (node.textContent === '') {
|
|
1942
|
+
node.remove();
|
|
1943
|
+
}
|
|
1944
|
+
}
|
|
1945
|
+
else if (node.nodeType === Node.ELEMENT_NODE) {
|
|
1946
|
+
for (const childNode of Array.from(node.childNodes)) {
|
|
1947
|
+
stripTextContent(childNode);
|
|
1948
|
+
}
|
|
1949
|
+
}
|
|
1950
|
+
};
|
|
1951
|
+
stripTextContent(element);
|
|
1952
|
+
}
|
|
1953
|
+
removeLastNumber(inputString) {
|
|
1954
|
+
let lastNonDigitIndex = inputString.length - 1;
|
|
1955
|
+
while (lastNonDigitIndex >= 0 &&
|
|
1956
|
+
!Number.isNaN(Number.parseInt(inputString[lastNonDigitIndex]))) {
|
|
1957
|
+
lastNonDigitIndex--;
|
|
1958
|
+
}
|
|
1959
|
+
return inputString.slice(0, lastNonDigitIndex + 1);
|
|
1960
|
+
}
|
|
1961
|
+
getScaledWidth(width) {
|
|
1962
|
+
if (width <= 200) {
|
|
1963
|
+
return width.toString();
|
|
1964
|
+
}
|
|
1965
|
+
else if (width <= 699) {
|
|
1966
|
+
return '624';
|
|
1967
|
+
}
|
|
1968
|
+
else {
|
|
1969
|
+
return '864';
|
|
1970
|
+
}
|
|
1971
|
+
}
|
|
1972
|
+
isTransparentTable(element) {
|
|
1973
|
+
const firstRow = element.querySelector('tr');
|
|
1974
|
+
let isTransparent = false;
|
|
1975
|
+
const transparentColors = new Set([
|
|
1976
|
+
'#ffffff',
|
|
1977
|
+
'#fff',
|
|
1978
|
+
'rgb(255, 255, 255)',
|
|
1979
|
+
'transparent',
|
|
1980
|
+
]);
|
|
1981
|
+
if (!firstRow) {
|
|
1982
|
+
return isTransparent;
|
|
1983
|
+
}
|
|
1984
|
+
const tdElements = firstRow.getElementsByTagName('td');
|
|
1985
|
+
for (const td of Array.from(tdElements)) {
|
|
1986
|
+
const style = td.getAttribute('style');
|
|
1987
|
+
if (style) {
|
|
1988
|
+
const borderColor = this.checkCellStyle(style);
|
|
1989
|
+
if (borderColor && transparentColors.has(borderColor.toLowerCase())) {
|
|
1990
|
+
isTransparent = true;
|
|
1991
|
+
break;
|
|
1992
|
+
}
|
|
1993
|
+
}
|
|
1994
|
+
}
|
|
1995
|
+
return isTransparent;
|
|
1996
|
+
}
|
|
1997
|
+
/**
|
|
1998
|
+
* Extracts and calculates the column widths from a given HTML table element.
|
|
1999
|
+
*
|
|
2000
|
+
* This function reads `<col>` elements within a `<colgroup>` of the table and
|
|
2001
|
+
* computes the pixel-based width for each column. It handles widths specified
|
|
2002
|
+
* in percentages and pixels. If all widths are in pixels, they are scaled using
|
|
2003
|
+
* a separate scaling method. If the computed widths are invalid or incomplete,
|
|
2004
|
+
* the function returns `undefined`.
|
|
2005
|
+
*
|
|
2006
|
+
* @param {HTMLTableElement} table - The HTML table element from which column widths are to be extracted.
|
|
2007
|
+
* @returns {number[] | undefined} An array of column widths in pixels, or `undefined` if the widths are invalid or missing.
|
|
2008
|
+
*/
|
|
2009
|
+
getColWidthArray(table) {
|
|
2010
|
+
const colElements = Array.from(table.querySelectorAll('colgroup > col'));
|
|
2011
|
+
if (colElements.length == 0) {
|
|
2012
|
+
return;
|
|
2013
|
+
}
|
|
2014
|
+
let widthArray = [];
|
|
2015
|
+
let totalPixelWidth = 619;
|
|
2016
|
+
const rawWidthArray = [];
|
|
2017
|
+
for (const col of colElements) {
|
|
2018
|
+
//Added fallback if style attribute is not present
|
|
2019
|
+
const width = col.style.width || col.getAttribute('width');
|
|
2020
|
+
// Skip this column if width is empty (no inline style set)
|
|
2021
|
+
if (!width) {
|
|
2022
|
+
return;
|
|
2023
|
+
}
|
|
2024
|
+
if (width.endsWith('%')) {
|
|
2025
|
+
const percent = Number.parseFloat(width);
|
|
2026
|
+
widthArray.push(Math.round((percent / 100) * 620));
|
|
2027
|
+
}
|
|
2028
|
+
else if (width.endsWith('px')) {
|
|
2029
|
+
rawWidthArray.push(Number.parseFloat(width));
|
|
2030
|
+
}
|
|
2031
|
+
// Skip invalid widths
|
|
2032
|
+
else {
|
|
2033
|
+
return;
|
|
2034
|
+
}
|
|
2035
|
+
}
|
|
2036
|
+
//Finding scaled widths for individual widths mentioned in px
|
|
2037
|
+
if (rawWidthArray.length === colElements.length) {
|
|
2038
|
+
widthArray = this.scaleWidthArray(rawWidthArray);
|
|
2039
|
+
}
|
|
2040
|
+
//Return undefined for any invalid case
|
|
2041
|
+
if (widthArray.length !== colElements.length) {
|
|
2042
|
+
return;
|
|
2043
|
+
}
|
|
2044
|
+
const sum = this.getSumOfArray(widthArray);
|
|
2045
|
+
if (sum < 200) {
|
|
2046
|
+
totalPixelWidth = sum;
|
|
2047
|
+
}
|
|
2048
|
+
else if (sum > 700) {
|
|
2049
|
+
totalPixelWidth = 861;
|
|
2050
|
+
}
|
|
2051
|
+
// cut/add excess to meet the size requirement
|
|
2052
|
+
widthArray[0] += totalPixelWidth - sum;
|
|
2053
|
+
return widthArray;
|
|
2054
|
+
}
|
|
2055
|
+
setCellWidth(colSpan, cellIndex, colWidthArray) {
|
|
2056
|
+
return colWidthArray.slice(cellIndex, cellIndex + colSpan);
|
|
2057
|
+
}
|
|
2058
|
+
scaleWidthArray(rawWidthArray) {
|
|
2059
|
+
const sum = this.getSumOfArray(rawWidthArray);
|
|
2060
|
+
if (sum < 200) {
|
|
2061
|
+
return rawWidthArray;
|
|
2062
|
+
}
|
|
2063
|
+
else {
|
|
2064
|
+
const newTotal = sum <= 700 ? 619 : 861;
|
|
2065
|
+
const scaledWidths = rawWidthArray.map((w) => Math.round((w / sum) * newTotal));
|
|
2066
|
+
return scaledWidths;
|
|
2067
|
+
}
|
|
2068
|
+
}
|
|
2069
|
+
getSumOfArray(array) {
|
|
2070
|
+
if (array.length == 0) {
|
|
2071
|
+
return 0;
|
|
2072
|
+
}
|
|
2073
|
+
return array.reduce((sum, n) => sum + n, 0);
|
|
2074
|
+
}
|
|
2075
|
+
/**
|
|
2076
|
+
* Determines the orientation (portrait or landscape) based on the total width.
|
|
2077
|
+
*
|
|
2078
|
+
* @param {number} totalWidth - The total width (in pixels) used to determine orientation.
|
|
2079
|
+
* @returns {'portrait' | 'landscape'} Returns 'portrait' if the width is less than 700 pixels; otherwise, returns 'landscape'.
|
|
2080
|
+
*/
|
|
2081
|
+
findOrientation(totalWidth) {
|
|
2082
|
+
return totalWidth < 700 ? 'portrait' : 'landscape';
|
|
2083
|
+
}
|
|
2084
|
+
/**
|
|
2085
|
+
* Extracts image information from an HTMLImageElement.
|
|
2086
|
+
*
|
|
2087
|
+
* @param {HTMLImageElement} img - The image element to extract information from.
|
|
2088
|
+
* @returns {{ src: string; alt: string; width: number; height: number }} An object containing the image's source URL, alt text, width, and height.
|
|
2089
|
+
*/
|
|
2090
|
+
extractImageInfo(img) {
|
|
2091
|
+
return {
|
|
2092
|
+
src: img.src,
|
|
2093
|
+
alt: img.alt,
|
|
2094
|
+
width: img.width,
|
|
2095
|
+
height: img.height,
|
|
2096
|
+
};
|
|
2097
|
+
}
|
|
2098
|
+
/**
|
|
2099
|
+
* Extracts note paragraphs from the last row of an HTML table if that row
|
|
2100
|
+
* contains a note header such as "OVERALL NOTE:" or "NOTES:".
|
|
2101
|
+
*
|
|
2102
|
+
* This function is designed for tables where the final row may optionally
|
|
2103
|
+
* contain a note. If such a note exists, it returns all <p> elements inside
|
|
2104
|
+
* the first <td> of that row, excluding the header line itself
|
|
2105
|
+
* (e.g., "OVERALL NOTE:" / "NOTES:").
|
|
2106
|
+
*
|
|
2107
|
+
* The returned <p> elements are kept as HTMLElement nodes so that they can
|
|
2108
|
+
* be further converted into structured ProseMirror content
|
|
2109
|
+
* (e.g., using NewLicitParagraphElement).
|
|
2110
|
+
*
|
|
2111
|
+
* If the table doesn't contain a note row, or if the expected structure is missing,
|
|
2112
|
+
* the function safely returns null.
|
|
2113
|
+
*
|
|
2114
|
+
* @param {HTMLTableSectionElement} table - The HTML table section (tbody) to extract the note from.
|
|
2115
|
+
* @returns {HTMLElement[] | null} - An array of <p> nodes representing the note paragraphs,
|
|
2116
|
+
* or null if no note row was found.
|
|
2117
|
+
*/
|
|
2118
|
+
extractNote(table) {
|
|
2119
|
+
const rows = Array.from(table.querySelectorAll('tr'));
|
|
2120
|
+
if (rows.length < 2)
|
|
2121
|
+
return null;
|
|
2122
|
+
const lastRow = rows.at(-1);
|
|
2123
|
+
const td = lastRow.querySelector('td');
|
|
2124
|
+
if (!td)
|
|
2125
|
+
return null;
|
|
2126
|
+
const paragraphs = Array.from(td.querySelectorAll('p'));
|
|
2127
|
+
if (paragraphs.length < 2)
|
|
2128
|
+
return null;
|
|
2129
|
+
const firstParaText = (paragraphs[0].textContent || '')
|
|
2130
|
+
.replaceAll(/\s{1,100}/g, ' ')
|
|
2131
|
+
.trim()
|
|
2132
|
+
.toUpperCase();
|
|
2133
|
+
if (firstParaText.includes('OVERALL NOTE:') ||
|
|
2134
|
+
firstParaText.includes('NOTES:')) {
|
|
2135
|
+
return paragraphs.slice(1);
|
|
2136
|
+
}
|
|
2137
|
+
return null;
|
|
2138
|
+
}
|
|
2139
|
+
/**
|
|
2140
|
+
* Determines whether the given DOM element should be treated as a "table figure".
|
|
2141
|
+
*
|
|
2142
|
+
* Business context:
|
|
2143
|
+
* As per mail send on 07 Aug 2025:
|
|
2144
|
+
* > "Can we sense when there is an image and a line or two of text – maybe remove the vignette control."
|
|
2145
|
+
*
|
|
2146
|
+
* This function implements that detection logic by identifying elements that match
|
|
2147
|
+
* either of the following patterns:
|
|
2148
|
+
*
|
|
2149
|
+
* 1. It is **not** a <DIV> element, and its first child element is an <IMG>.
|
|
2150
|
+
* 2. It is a <DIV> element that:
|
|
2151
|
+
* - Contains at least one <IMG> element anywhere inside (at any depth),
|
|
2152
|
+
* - Contains exactly one <P> element anywhere inside,
|
|
2153
|
+
* - That <P> element's trimmed text content is less than 100 characters
|
|
2154
|
+
* (representing "a line or two of text").
|
|
2155
|
+
*
|
|
2156
|
+
* @param {Element} node - The DOM element to check.
|
|
2157
|
+
* @returns {boolean} `true` if the element qualifies as a table figure, otherwise `false`.
|
|
2158
|
+
*/
|
|
2159
|
+
isTableFigureNode(node) {
|
|
2160
|
+
const isImage = node.tagName !== 'DIV' && node.children.item(0)?.tagName === 'IMG';
|
|
2161
|
+
const isShortVignette = node.tagName === 'DIV' &&
|
|
2162
|
+
node.querySelector('img') &&
|
|
2163
|
+
node.querySelectorAll('p').length === 1 &&
|
|
2164
|
+
node.querySelector('p').textContent.trim().length < 100;
|
|
2165
|
+
return isImage || isShortVignette;
|
|
2166
|
+
}
|
|
2167
|
+
/**
|
|
2168
|
+
* Determines whether the provided class name corresponds to a note-related node.
|
|
2169
|
+
*
|
|
2170
|
+
* Checks if the given `className` matches any of the predefined note classes,
|
|
2171
|
+
* such as examples, notes, cautions, or warnings.
|
|
2172
|
+
*
|
|
2173
|
+
* @param className - The CSS class name to check.
|
|
2174
|
+
* @returns `true` if the class name is a recognized note node; otherwise, `false`.
|
|
2175
|
+
*/
|
|
2176
|
+
isNoteNode(className) {
|
|
2177
|
+
const noteClasses = [
|
|
2178
|
+
'_AF_Example',
|
|
2179
|
+
'_AF_Note',
|
|
2180
|
+
'_AF_Caution',
|
|
2181
|
+
'_AF_Warning',
|
|
2182
|
+
'FM-AF-Note',
|
|
2183
|
+
'FM-AF-Example',
|
|
2184
|
+
];
|
|
2185
|
+
return noteClasses.includes(className);
|
|
2186
|
+
}
|
|
2187
|
+
}
|
|
2188
|
+
function extractInfoIconData(dom) {
|
|
2189
|
+
const nodes = dom.querySelectorAll('body > *');
|
|
2190
|
+
const filteredArr = [];
|
|
2191
|
+
for (const node of Array.from(nodes)) {
|
|
2192
|
+
if (node.nodeName === 'OL' && node.id === 'infoIcon') {
|
|
2193
|
+
filteredArr.push(node);
|
|
2194
|
+
}
|
|
2195
|
+
}
|
|
2196
|
+
return filteredArr;
|
|
2197
|
+
}
|