docx-to-html-mathml-v2 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +22 -0
  2. package/OMML2MML.XSL +1819 -0
  3. package/README.md +29 -0
  4. package/bin/mammoth +38 -0
  5. package/index.js +3 -0
  6. package/lib/document-to-html.js +526 -0
  7. package/lib/documents.js +266 -0
  8. package/lib/docx/body-reader.js +931 -0
  9. package/lib/docx/comments-reader.js +31 -0
  10. package/lib/docx/content-types-reader.js +58 -0
  11. package/lib/docx/document-xml-reader.js +30 -0
  12. package/lib/docx/docx-reader.js +226 -0
  13. package/lib/docx/files.js +80 -0
  14. package/lib/docx/notes-reader.js +28 -0
  15. package/lib/docx/numbering-xml.js +111 -0
  16. package/lib/docx/office-xml-reader.js +73 -0
  17. package/lib/docx/relationships-reader.js +43 -0
  18. package/lib/docx/style-map.js +75 -0
  19. package/lib/docx/styles-reader.js +90 -0
  20. package/lib/docx/uris.js +21 -0
  21. package/lib/docx-to-html-mathml.js +193 -0
  22. package/lib/html/ast.js +51 -0
  23. package/lib/html/index.js +49 -0
  24. package/lib/html/simplify.js +99 -0
  25. package/lib/images.js +31 -0
  26. package/lib/index.d.ts +15 -0
  27. package/lib/index.js +111 -0
  28. package/lib/main.js +63 -0
  29. package/lib/mammoth-core.js +3 -0
  30. package/lib/mathtype_batch.rb +58 -0
  31. package/lib/omml_to_mathml.cjs +97 -0
  32. package/lib/options-reader.js +107 -0
  33. package/lib/promises.js +42 -0
  34. package/lib/raw-text.js +14 -0
  35. package/lib/results.js +72 -0
  36. package/lib/style-reader.js +365 -0
  37. package/lib/styles/document-matchers.js +100 -0
  38. package/lib/styles/html-paths.js +75 -0
  39. package/lib/styles/parser/tokeniser.js +30 -0
  40. package/lib/transforms.js +62 -0
  41. package/lib/underline.js +11 -0
  42. package/lib/unzip.js +20 -0
  43. package/lib/writers/html-writer.js +167 -0
  44. package/lib/writers/index.js +14 -0
  45. package/lib/writers/markdown-writer.js +163 -0
  46. package/lib/xml/index.js +8 -0
  47. package/lib/xml/nodes.js +70 -0
  48. package/lib/xml/reader.js +75 -0
  49. package/lib/xml/writer.js +61 -0
  50. package/lib/xml/xmldom.js +23 -0
  51. package/lib/zipfile.js +72 -0
  52. package/mathtype_to_mathml_plus.rb +139 -0
  53. package/package.json +74 -0
@@ -0,0 +1,931 @@
1
+ exports.createBodyReader = createBodyReader;
2
+ exports._readNumberingProperties = readNumberingProperties;
3
+
4
+ var dingbatToUnicode = require("dingbat-to-unicode");
5
+ var _ = require("underscore");
6
+
7
+ var documents = require("../documents");
8
+ var Result = require("../results").Result;
9
+ var warning = require("../results").warning;
10
+ var xml = require("../xml");
11
+ var transforms = require("../transforms");
12
+ var uris = require("./uris");
13
+
14
+
15
+ function cleanTagName(name) {
16
+ if (!name) return '';
17
+ const match = name.match(/\}(.+)/);
18
+ return match ? match[1] : name;
19
+ }
20
+ function serializeNode(node) {
21
+ if (!node) return "";
22
+
23
+ if (node.type === "text") {
24
+ return node.value || "";
25
+ }
26
+
27
+ if (node.type === "element") {
28
+ const name = node.name;
29
+
30
+ const attrs = Object.entries(node.attributes || {})
31
+ .map(([k, v]) => `${k}="${v}"`)
32
+ .join(" ");
33
+
34
+ const children = (node.children || [])
35
+ .map(serializeNode)
36
+ .join("");
37
+
38
+ if (!children) {
39
+ return attrs
40
+ ? `<${name} ${attrs}/>`
41
+ : `<${name}/>`;
42
+ }
43
+
44
+ return attrs
45
+ ? `<${name} ${attrs}>${children}</${name}>`
46
+ : `<${name}>${children}</${name}>`;
47
+ }
48
+
49
+ return "";
50
+ }
51
+
52
+
53
+ function createBodyReader(options) {
54
+ return {
55
+ readXmlElement: function (element) {
56
+ return new BodyReader(options).readXmlElement(element);
57
+ },
58
+ readXmlElements: function (elements) {
59
+ return new BodyReader(options).readXmlElements(elements);
60
+ }
61
+ };
62
+ }
63
+
64
+ function BodyReader(options) {
65
+ var mathTypeIndex = 0;
66
+ var mathTypePlaceholders = options.mathTypePlaceholders || {};
67
+
68
+ var complexFieldStack = [];
69
+ var currentInstrText = [];
70
+
71
+ // When a paragraph is marked as deleted, its contents should be combined
72
+ // with the following paragraph. See 17.13.5.15 del (Deleted Paragraph) of
73
+ // ECMA-376 4th edition Part 1.
74
+ var deletedParagraphContents = [];
75
+
76
+ var relationships = options.relationships;
77
+ var contentTypes = options.contentTypes;
78
+ var docxFile = options.docxFile;
79
+ var files = options.files;
80
+ var numbering = options.numbering;
81
+ var styles = options.styles;
82
+
83
+ function getOleRelationshipId(ole) {
84
+ return ole.attributes["r:id"] || ole.attributes["relationships:id"];
85
+ }
86
+
87
+ function getOleBinPath(relationshipId) {
88
+ const target = relationships.findTargetByRelationshipId(relationshipId);
89
+ return uris.uriToZipEntryName("word", target);
90
+ }
91
+
92
+ function readXmlElements(elements) {
93
+ var results = elements.map(readXmlElement);
94
+ return combineResults(results);
95
+ }
96
+ // function readXmlElement(element) {
97
+ // if (element.type === "element") {
98
+ // var handler = xmlElementReaders[element.name];
99
+ // if (handler) {
100
+ // return handler(element);
101
+ // } else if (!Object.prototype.hasOwnProperty.call(ignoreElements, element.name)) {
102
+ // var message = warning("An unrecognised element was ignored: " + element.name);
103
+ // return emptyResultWithMessages([message]);
104
+ // }
105
+ // }
106
+ // return emptyResult();
107
+ // }
108
+ function readXmlElement(element) {
109
+ if (element.type !== "element") return emptyResult();
110
+
111
+ const name = element.name || "";
112
+ if (name.endsWith("oMath") || name.endsWith("oMathPara")) {
113
+ return elementResult({
114
+ type: "math",
115
+ kind: "omml",
116
+ omml: element._rawXml,
117
+ altText: "[omml]"
118
+ });
119
+ }
120
+ const handler = xmlElementReaders[name];
121
+ if (handler) return handler(element);
122
+ if (element.children && element.children.length) {
123
+ return readXmlElements(element.children);
124
+ }
125
+ if (!Object.prototype.hasOwnProperty.call(ignoreElements, name)) {
126
+ return emptyResultWithMessages([warning("An unrecognised element was ignored: " + name)]);
127
+ }
128
+ return emptyResult();
129
+ }
130
+
131
+
132
+
133
+ function readParagraphProperties(element) {
134
+ return readParagraphStyle(element).map(function (style) {
135
+ return {
136
+ type: "paragraphProperties",
137
+ styleId: style.styleId,
138
+ styleName: style.name,
139
+ alignment: element.firstOrEmpty("w:jc").attributes["w:val"],
140
+ numbering: readNumberingProperties(style.styleId, element.firstOrEmpty("w:numPr"), numbering),
141
+ // indent: readParagraphIndent(element.firstOrEmpty("w:ind"))
142
+ };
143
+ });
144
+ }
145
+
146
+ function readRunProperties(element) {
147
+ return readRunStyle(element).map(function (style) {
148
+
149
+ var fontSizeString = element.firstOrEmpty("w:sz").attributes["w:val"];
150
+ var fontSize = /^[0-9]+$/.test(fontSizeString) ? parseInt(fontSizeString, 10) / 2 : null;
151
+
152
+ // NEW: read color
153
+ var colorVal = element.firstOrEmpty("w:color").attributes["w:val"];
154
+
155
+ // NEW: background shading
156
+ var shdVal = element.firstOrEmpty("w:shd").attributes["w:fill"];
157
+
158
+ // NEW: font-family
159
+ var fontFamily = element.firstOrEmpty("w:rFonts").attributes["w:ascii"]
160
+ || element.firstOrEmpty("w:rFonts").attributes["w:hAnsi"];
161
+
162
+ return {
163
+ type: "runProperties",
164
+ styleId: style.styleId,
165
+ styleName: style.name,
166
+
167
+ verticalAlignment: element.firstOrEmpty("w:vertAlign").attributes["w:val"],
168
+
169
+ // patched font handling:
170
+ font: fontFamily,
171
+ fontSize: fontSize,
172
+
173
+ // basic formatting
174
+ isBold: readBooleanElement(element.first("w:b")),
175
+ isUnderline: readUnderline(element.first("w:u")),
176
+ isItalic: readBooleanElement(element.first("w:i")),
177
+ isStrikethrough: readBooleanElement(element.first("w:strike")),
178
+ isAllCaps: readBooleanElement(element.first("w:caps")),
179
+ isSmallCaps: readBooleanElement(element.first("w:smallCaps")),
180
+
181
+ // highlight (existing code)
182
+ highlight: readHighlightValue(element.firstOrEmpty("w:highlight").attributes["w:val"]),
183
+
184
+ // NEW: add color + background
185
+ color: colorVal,
186
+ background: shdVal
187
+ };
188
+ });
189
+ }
190
+
191
+
192
+ function readUnderline(element) {
193
+ if (element) {
194
+ var value = element.attributes["w:val"];
195
+ return value !== undefined && value !== "false" && value !== "0" && value !== "none";
196
+ } else {
197
+ return false;
198
+ }
199
+ }
200
+
201
+ function readBooleanElement(element) {
202
+ if (element) {
203
+ var value = element.attributes["w:val"];
204
+ return value !== "false" && value !== "0";
205
+ } else {
206
+ return false;
207
+ }
208
+ }
209
+
210
+ function readBooleanAttributeValue(value) {
211
+ return value !== "false" && value !== "0";
212
+ }
213
+
214
+ function readHighlightValue(value) {
215
+ if (!value || value === "none") {
216
+ return null;
217
+ } else {
218
+ return value;
219
+ }
220
+ }
221
+
222
+ function readParagraphStyle(element) {
223
+ return readStyle(element, "w:pStyle", "Paragraph", styles.findParagraphStyleById);
224
+ }
225
+
226
+ function readRunStyle(element) {
227
+ return readStyle(element, "w:rStyle", "Run", styles.findCharacterStyleById);
228
+ }
229
+
230
+ function readTableStyle(element) {
231
+ return readStyle(element, "w:tblStyle", "Table", styles.findTableStyleById);
232
+ }
233
+
234
+ function readStyle(element, styleTagName, styleType, findStyleById) {
235
+ var messages = [];
236
+ var styleElement = element.first(styleTagName);
237
+ var styleId = null;
238
+ var name = null;
239
+ if (styleElement) {
240
+ styleId = styleElement.attributes["w:val"];
241
+ if (styleId) {
242
+ var style = findStyleById(styleId);
243
+ if (style) {
244
+ name = style.name;
245
+ } else {
246
+ messages.push(undefinedStyleWarning(styleType, styleId));
247
+ }
248
+ }
249
+ }
250
+ return elementResultWithMessages({ styleId: styleId, name: name }, messages);
251
+ }
252
+
253
+ function readFldChar(element) {
254
+ var type = element.attributes["w:fldCharType"];
255
+ if (type === "begin") {
256
+ complexFieldStack.push({ type: "begin", fldChar: element });
257
+ currentInstrText = [];
258
+ } else if (type === "end") {
259
+ var complexFieldEnd = complexFieldStack.pop();
260
+ if (complexFieldEnd.type === "begin") {
261
+ complexFieldEnd = parseCurrentInstrText(complexFieldEnd);
262
+ }
263
+ if (complexFieldEnd.type === "checkbox") {
264
+ return elementResult(documents.checkbox({
265
+ checked: complexFieldEnd.checked
266
+ }));
267
+ }
268
+ } else if (type === "separate") {
269
+ var complexFieldSeparate = complexFieldStack.pop();
270
+ var complexField = parseCurrentInstrText(complexFieldSeparate);
271
+ complexFieldStack.push(complexField);
272
+ }
273
+ return emptyResult();
274
+ }
275
+
276
+ function currentHyperlinkOptions() {
277
+ var topHyperlink = _.last(complexFieldStack.filter(function (complexField) {
278
+ return complexField.type === "hyperlink";
279
+ }));
280
+ return topHyperlink ? topHyperlink.options : null;
281
+ }
282
+
283
+ function parseCurrentInstrText(complexField) {
284
+ return parseInstrText(
285
+ currentInstrText.join(''),
286
+ complexField.type === "begin"
287
+ ? complexField.fldChar
288
+ : xml.emptyElement
289
+ );
290
+ }
291
+
292
+ function parseInstrText(instrText, fldChar) {
293
+ var externalLinkResult = /\s*HYPERLINK "(.*)"/.exec(instrText);
294
+ if (externalLinkResult) {
295
+ return { type: "hyperlink", options: { href: externalLinkResult[1] } };
296
+ }
297
+
298
+ var internalLinkResult = /\s*HYPERLINK\s+\\l\s+"(.*)"/.exec(instrText);
299
+ if (internalLinkResult) {
300
+ return { type: "hyperlink", options: { anchor: internalLinkResult[1] } };
301
+ }
302
+
303
+ var checkboxResult = /\s*FORMCHECKBOX\s*/.exec(instrText);
304
+ if (checkboxResult) {
305
+ var checkboxElement = fldChar
306
+ .firstOrEmpty("w:ffData")
307
+ .firstOrEmpty("w:checkBox");
308
+ var checkedElement = checkboxElement.first("w:checked");
309
+ var checked = checkedElement == null
310
+ ? readBooleanElement(checkboxElement.first("w:default"))
311
+ : readBooleanElement(checkedElement);
312
+ return { type: "checkbox", checked: checked };
313
+ }
314
+
315
+ return { type: "unknown" };
316
+ }
317
+
318
+ function readInstrText(element) {
319
+ currentInstrText.push(element.text());
320
+ return emptyResult();
321
+ }
322
+
323
+ function readSymbol(element) {
324
+ // See 17.3.3.30 sym (Symbol Character) of ECMA-376 4th edition Part 1
325
+ var font = element.attributes["w:font"];
326
+ var char = element.attributes["w:char"];
327
+ var unicodeCharacter = dingbatToUnicode.hex(font, char);
328
+ if (unicodeCharacter == null && /^F0..$/.test(char)) {
329
+ unicodeCharacter = dingbatToUnicode.hex(font, char.substring(2));
330
+ }
331
+
332
+ if (unicodeCharacter == null) {
333
+ return emptyResultWithMessages([warning(
334
+ "A w:sym element with an unsupported character was ignored: char " + char + " in font " + font
335
+ )]);
336
+ } else {
337
+ return elementResult(new documents.Text(unicodeCharacter.string));
338
+ }
339
+ }
340
+
341
+ function noteReferenceReader(noteType) {
342
+ return function (element) {
343
+ var noteId = element.attributes["w:id"];
344
+ return elementResult(new documents.NoteReference({
345
+ noteType: noteType,
346
+ noteId: noteId
347
+ }));
348
+ };
349
+ }
350
+
351
+ function readCommentReference(element) {
352
+ return elementResult(documents.commentReference({
353
+ commentId: element.attributes["w:id"]
354
+ }));
355
+ }
356
+
357
+ function readChildElements(element) {
358
+ return readXmlElements(element.children);
359
+ }
360
+
361
+ var xmlElementReaders = {
362
+ "w:p": function (element) {
363
+ var paragraphPropertiesElement = element.firstOrEmpty("w:pPr");
364
+
365
+ var isDeleted = !!paragraphPropertiesElement
366
+ .firstOrEmpty("w:rPr")
367
+ .first("w:del");
368
+
369
+ if (isDeleted) {
370
+ element.children.forEach(function (child) {
371
+ deletedParagraphContents.push(child);
372
+ });
373
+ return emptyResult();
374
+ } else {
375
+ var childrenXml = element.children;
376
+ if (deletedParagraphContents.length > 0) {
377
+ childrenXml = deletedParagraphContents.concat(childrenXml);
378
+ deletedParagraphContents = [];
379
+ }
380
+ return ReadResult.map(
381
+ readParagraphProperties(paragraphPropertiesElement),
382
+ readXmlElements(childrenXml),
383
+ function (properties, children) {
384
+ return new documents.Paragraph(children, properties);
385
+ }
386
+ ).insertExtra();
387
+ }
388
+ },
389
+ "w:r": function (element) {
390
+ return ReadResult.map(
391
+ readRunProperties(element.firstOrEmpty("w:rPr")),
392
+ readXmlElements(element.children),
393
+ function (properties, children) {
394
+ var hyperlinkOptions = currentHyperlinkOptions();
395
+ if (hyperlinkOptions !== null) {
396
+ children = [new documents.Hyperlink(children, hyperlinkOptions)];
397
+ }
398
+
399
+ return new documents.Run(children, properties);
400
+ }
401
+ );
402
+ },
403
+ "w:fldChar": readFldChar,
404
+ "w:instrText": readInstrText,
405
+ "w:t": function (element) {
406
+ return elementResult(new documents.Text(element.text()));
407
+ },
408
+ "w:tab": function (element) {
409
+ return elementResult(new documents.Tab());
410
+ },
411
+ "w:noBreakHyphen": function () {
412
+ return elementResult(new documents.Text("\u2011"));
413
+ },
414
+ "w:softHyphen": function (element) {
415
+ return elementResult(new documents.Text("\u00AD"));
416
+ },
417
+ "w:sym": readSymbol,
418
+ "w:hyperlink": function (element) {
419
+ var relationshipId = element.attributes["r:id"];
420
+ var anchor = element.attributes["w:anchor"];
421
+ return readXmlElements(element.children).map(function (children) {
422
+ function create(options) {
423
+ var targetFrame = element.attributes["w:tgtFrame"] || null;
424
+
425
+ return new documents.Hyperlink(
426
+ children,
427
+ _.extend({ targetFrame: targetFrame }, options)
428
+ );
429
+ }
430
+
431
+ if (relationshipId) {
432
+ var href = relationships.findTargetByRelationshipId(relationshipId);
433
+ if (anchor) {
434
+ href = uris.replaceFragment(href, anchor);
435
+ }
436
+ return create({ href: href });
437
+ } else if (anchor) {
438
+ return create({ anchor: anchor });
439
+ } else {
440
+ return children;
441
+ }
442
+ });
443
+ },
444
+ "w:tbl": readTable,
445
+ "w:tr": readTableRow,
446
+ "w:tc": readTableCell,
447
+ "w:footnoteReference": noteReferenceReader("footnote"),
448
+ "w:endnoteReference": noteReferenceReader("endnote"),
449
+ "w:commentReference": readCommentReference,
450
+ "w:br": function (element) {
451
+ var breakType = element.attributes["w:type"];
452
+ if (breakType == null || breakType === "textWrapping") {
453
+ return elementResult(documents.lineBreak);
454
+ } else if (breakType === "page") {
455
+ return elementResult(documents.pageBreak);
456
+ } else if (breakType === "column") {
457
+ return elementResult(documents.columnBreak);
458
+ } else {
459
+ return emptyResultWithMessages([warning("Unsupported break type: " + breakType)]);
460
+ }
461
+ },
462
+ "w:bookmarkStart": function (element) {
463
+ var name = element.attributes["w:name"];
464
+ if (name === "_GoBack") {
465
+ return emptyResult();
466
+ } else {
467
+ return elementResult(new documents.BookmarkStart({ name: name }));
468
+ }
469
+ },
470
+
471
+ "mc:AlternateContent": function (element) {
472
+ return readChildElements(element.firstOrEmpty("mc:Fallback"));
473
+ },
474
+
475
+ "w:sdt": function (element) {
476
+ var contentResult = readXmlElements(element.firstOrEmpty("w:sdtContent").children);
477
+ return contentResult.map(function (content) {
478
+ // From the WordML standard: https://learn.microsoft.com/en-us/openspecs/office_standards/ms-docx/3350cb64-931f-41f7-8824-f18b2568ce66
479
+ //
480
+ // > A CT_SdtCheckbox element that specifies that the parent
481
+ // > structured document tag is a checkbox when displayed in the
482
+ // > document. The parent structured document tag contents MUST
483
+ // > contain a single character and optionally an additional
484
+ // > character in a deleted run.
485
+
486
+ var checkbox = element
487
+ .firstOrEmpty("w:sdtPr")
488
+ .first("wordml:checkbox");
489
+
490
+ if (checkbox) {
491
+ var checkedElement = checkbox.first("wordml:checked");
492
+ var isChecked = !!checkedElement && readBooleanAttributeValue(
493
+ checkedElement.attributes["wordml:val"]
494
+ );
495
+ var documentCheckbox = documents.checkbox({
496
+ checked: isChecked
497
+ });
498
+
499
+ var hasCheckbox = false;
500
+ var replacedContent = content.map(transforms._elementsOfType(
501
+ documents.types.text,
502
+ function (text) {
503
+ if (text.value.length > 0 && !hasCheckbox) {
504
+ hasCheckbox = true;
505
+ return documentCheckbox;
506
+ } else {
507
+ return text;
508
+ }
509
+ }
510
+ ));
511
+
512
+ if (hasCheckbox) {
513
+ return replacedContent;
514
+ } else {
515
+ return documentCheckbox;
516
+ }
517
+
518
+ } else {
519
+ return content;
520
+ }
521
+ });
522
+ },
523
+
524
+ "w:ins": readChildElements,
525
+ "w:object": function (element) {
526
+ var ole = findOleObject(element);
527
+ if (!ole || !isMathTypeObject(element)) {
528
+ return readChildElements(element);
529
+ }
530
+
531
+ mathTypeIndex++;
532
+
533
+ var relId = getOleRelationshipId(ole);
534
+ var binPath = getOleBinPath(relId);
535
+
536
+
537
+ return elementResult({
538
+ type: "math",
539
+ kind: "mathtype",
540
+ binPath: binPath,
541
+ altText: `MathType:${binPath}`,
542
+ read: function () {
543
+ return docxFile.read(binPath);
544
+ }
545
+ });
546
+ },
547
+ "w:smartTag": readChildElements,
548
+ "w:drawing": readChildElements,
549
+ "w:pict": function (element) {
550
+ return readChildElements(element).toExtra();
551
+ },
552
+ "v:roundrect": readChildElements,
553
+ "v:shape": readChildElements,
554
+ "v:textbox": readChildElements,
555
+ "w:txbxContent": readChildElements,
556
+ "wp:inline": readDrawingElement,
557
+ "wp:anchor": readDrawingElement,
558
+ "v:imagedata": readImageData,
559
+ "v:group": readChildElements,
560
+ "v:rect": readChildElements
561
+ };
562
+ function findOleObject(element) {
563
+ if (!element || !element.children) return null;
564
+
565
+ return element.children.find(child =>
566
+ child.name &&
567
+ child.name.includes("OLEObject")
568
+ );
569
+ }
570
+
571
+ function isMathTypeObject(element) {
572
+ var ole = findOleObject(element);
573
+ if (!ole) return false;
574
+
575
+ var progId = ole.attributes && ole.attributes["ProgID"];
576
+ if (!progId) return false;
577
+
578
+ return progId.includes("Equation")
579
+ || progId.includes("MathType");
580
+ }
581
+
582
+ return {
583
+ readXmlElement: readXmlElement,
584
+ readXmlElements: readXmlElements
585
+ };
586
+
587
+
588
+ function readTable(element) {
589
+ var propertiesResult = readTableProperties(element.firstOrEmpty("w:tblPr"));
590
+ return readXmlElements(element.children)
591
+ .flatMap(calculateRowSpans)
592
+ .flatMap(function (children) {
593
+ return propertiesResult.map(function (properties) {
594
+ return documents.Table(children, properties);
595
+ });
596
+ });
597
+ }
598
+
599
+ function readTableProperties(element) {
600
+ return readTableStyle(element).map(function (style) {
601
+ return {
602
+ styleId: style.styleId,
603
+ styleName: style.name
604
+ };
605
+ });
606
+ }
607
+
608
+ function readTableRow(element) {
609
+ var properties = element.firstOrEmpty("w:trPr");
610
+
611
+ // See 17.13.5.12 del (Deleted Table Row) of ECMA-376 4th edition Part 1
612
+ var isDeleted = !!properties.first("w:del");
613
+ if (isDeleted) {
614
+ return emptyResult();
615
+ }
616
+
617
+ var isHeader = !!properties.first("w:tblHeader");
618
+ return readXmlElements(element.children).map(function (children) {
619
+ return documents.TableRow(children, { isHeader: isHeader });
620
+ });
621
+ }
622
+
623
+ function readTableCell(element) {
624
+ return readXmlElements(element.children).map(function (children) {
625
+ var properties = element.firstOrEmpty("w:tcPr");
626
+
627
+ var gridSpan = properties.firstOrEmpty("w:gridSpan").attributes["w:val"];
628
+ var colSpan = gridSpan ? parseInt(gridSpan, 10) : 1;
629
+
630
+ var cell = documents.TableCell(children, { colSpan: colSpan });
631
+ cell._vMerge = readVMerge(properties);
632
+ return cell;
633
+ });
634
+ }
635
+
636
+ function readVMerge(properties) {
637
+ var element = properties.first("w:vMerge");
638
+ if (element) {
639
+ var val = element.attributes["w:val"];
640
+ return val === "continue" || !val;
641
+ } else {
642
+ return null;
643
+ }
644
+ }
645
+
646
+ function calculateRowSpans(rows) {
647
+ var unexpectedNonRows = _.any(rows, function (row) {
648
+ return row.type !== documents.types.tableRow;
649
+ });
650
+ if (unexpectedNonRows) {
651
+ removeVMergeProperties(rows);
652
+ return elementResultWithMessages(rows, [warning(
653
+ "unexpected non-row element in table, cell merging may be incorrect"
654
+ )]);
655
+ }
656
+ var unexpectedNonCells = _.any(rows, function (row) {
657
+ return _.any(row.children, function (cell) {
658
+ return cell.type !== documents.types.tableCell;
659
+ });
660
+ });
661
+ if (unexpectedNonCells) {
662
+ removeVMergeProperties(rows);
663
+ return elementResultWithMessages(rows, [warning(
664
+ "unexpected non-cell element in table row, cell merging may be incorrect"
665
+ )]);
666
+ }
667
+
668
+ var columns = {};
669
+
670
+ rows.forEach(function (row) {
671
+ var cellIndex = 0;
672
+ row.children.forEach(function (cell) {
673
+ if (cell._vMerge && columns[cellIndex]) {
674
+ columns[cellIndex].rowSpan++;
675
+ } else {
676
+ columns[cellIndex] = cell;
677
+ cell._vMerge = false;
678
+ }
679
+ cellIndex += cell.colSpan;
680
+ });
681
+ });
682
+
683
+ rows.forEach(function (row) {
684
+ row.children = row.children.filter(function (cell) {
685
+ return !cell._vMerge;
686
+ });
687
+ row.children.forEach(function (cell) {
688
+ delete cell._vMerge;
689
+ });
690
+ });
691
+
692
+ return elementResult(rows);
693
+ }
694
+
695
+ function removeVMergeProperties(rows) {
696
+ rows.forEach(function (row) {
697
+ var cells = transforms.getDescendantsOfType(row, documents.types.tableCell);
698
+ cells.forEach(function (cell) {
699
+ delete cell._vMerge;
700
+ });
701
+ });
702
+ }
703
+
704
+ function readDrawingElement(element) {
705
+ var blips = element
706
+ .getElementsByTagName("a:graphic")
707
+ .getElementsByTagName("a:graphicData")
708
+ .getElementsByTagName("pic:pic")
709
+ .getElementsByTagName("pic:blipFill")
710
+ .getElementsByTagName("a:blip");
711
+
712
+ return combineResults(blips.map(readBlip.bind(null, element)));
713
+ }
714
+
715
+ function readBlip(element, blip) {
716
+ var propertiesElement = element.firstOrEmpty("wp:docPr");
717
+ var properties = propertiesElement.attributes;
718
+
719
+ var altText = isBlank(properties.descr) ? properties.title : properties.descr;
720
+
721
+ var blipImageFile = findBlipImageFile(blip);
722
+ if (blipImageFile === null) {
723
+ return emptyResultWithMessages([warning("Could not find image file for a:blip element")]);
724
+ }
725
+
726
+ return readImage(blipImageFile, altText).map(function (imageElement) {
727
+ var hlinkClickElement = propertiesElement.firstOrEmpty("a:hlinkClick");
728
+ var relationshipId = hlinkClickElement.attributes["r:id"];
729
+ if (relationshipId) {
730
+ var href = relationships.findTargetByRelationshipId(relationshipId);
731
+ return new documents.Hyperlink([imageElement], { href: href });
732
+ } else {
733
+ return imageElement;
734
+ }
735
+ });
736
+ }
737
+
738
+ function isBlank(value) {
739
+ return value == null || /^\s*$/.test(value);
740
+ }
741
+
742
+ function findBlipImageFile(blip) {
743
+ var embedRelationshipId = blip.attributes["r:embed"];
744
+ var linkRelationshipId = blip.attributes["r:link"];
745
+ if (embedRelationshipId) {
746
+ return findEmbeddedImageFile(embedRelationshipId);
747
+ } else if (linkRelationshipId) {
748
+ var imagePath = relationships.findTargetByRelationshipId(linkRelationshipId);
749
+ return {
750
+ path: imagePath,
751
+ read: files.read.bind(files, imagePath)
752
+ };
753
+ } else {
754
+ return null;
755
+ }
756
+ }
757
+
758
+ function readImageData(element) {
759
+ var relationshipId = element.attributes['r:id'];
760
+
761
+ if (relationshipId) {
762
+ return readImage(
763
+ findEmbeddedImageFile(relationshipId),
764
+ element.attributes["o:title"]);
765
+ } else {
766
+ return emptyResultWithMessages([warning("A v:imagedata element without a relationship ID was ignored")]);
767
+ }
768
+ }
769
+
770
+ function findEmbeddedImageFile(relationshipId) {
771
+ var path = uris.uriToZipEntryName("word", relationships.findTargetByRelationshipId(relationshipId));
772
+
773
+ return {
774
+ path: path,
775
+ read: docxFile.read.bind(docxFile, path)
776
+ };
777
+ }
778
+
779
+ function readImage(imageFile, altText) {
780
+ var contentType = contentTypes.findContentType(imageFile.path);
781
+
782
+ var image = documents.Image({
783
+ readImage: imageFile.read,
784
+ altText: altText,
785
+ contentType: contentType
786
+ });
787
+ var warnings = supportedImageTypes[contentType] ?
788
+ [] : warning("Image of type " + contentType + " is unlikely to display in web browsers");
789
+ return elementResultWithMessages(image, warnings);
790
+ }
791
+
792
+ function undefinedStyleWarning(type, styleId) {
793
+ return warning(
794
+ type + " style with ID " + styleId + " was referenced but not defined in the document");
795
+ }
796
+ }
797
+
798
+
799
+ function readNumberingProperties(styleId, element, numbering) {
800
+ var level = element.firstOrEmpty("w:ilvl").attributes["w:val"];
801
+ var numId = element.firstOrEmpty("w:numId").attributes["w:val"];
802
+ if (level !== undefined && numId !== undefined) {
803
+ return numbering.findLevel(numId, level);
804
+ }
805
+
806
+ if (styleId != null) {
807
+ var levelByStyleId = numbering.findLevelByParagraphStyleId(styleId);
808
+ if (levelByStyleId != null) {
809
+ return levelByStyleId;
810
+ }
811
+ }
812
+
813
+ // Some malformed documents define numbering levels without an index, and
814
+ // reference the numbering using a w:numPr element without a w:ilvl child.
815
+ // To handle such cases, we assume a level of 0 as a fallback.
816
+ if (numId !== undefined) {
817
+ return numbering.findLevel(numId, "0");
818
+ }
819
+
820
+ return null;
821
+ }
822
+
823
+ var supportedImageTypes = {
824
+ "image/png": true,
825
+ "image/gif": true,
826
+ "image/jpeg": true,
827
+ "image/svg+xml": true,
828
+ "image/tiff": true
829
+ };
830
+
831
+ var ignoreElements = {
832
+ "office-word:wrap": true,
833
+ "v:shadow": true,
834
+ "v:shapetype": true,
835
+ "w:annotationRef": true,
836
+ "w:bookmarkEnd": true,
837
+ "w:sectPr": true,
838
+ "w:proofErr": true,
839
+ "w:lastRenderedPageBreak": true,
840
+ "w:commentRangeStart": true,
841
+ "w:commentRangeEnd": true,
842
+ "w:del": true,
843
+ "w:footnoteRef": true,
844
+ "w:endnoteRef": true,
845
+ "w:pPr": true,
846
+ "w:rPr": true,
847
+ "w:tblPr": true,
848
+ "w:tblGrid": true,
849
+ "w:trPr": true,
850
+ "w:tcPr": true,
851
+ // "m:oMath": true,
852
+ // "m:oMathPara": true
853
+ };
854
+
855
+ function emptyResultWithMessages(messages) {
856
+ return new ReadResult(null, null, messages);
857
+ }
858
+
859
+ function emptyResult() {
860
+ return new ReadResult(null);
861
+ }
862
+
863
+ function elementResult(element) {
864
+ return new ReadResult(element);
865
+ }
866
+
867
+ function elementResultWithMessages(element, messages) {
868
+ return new ReadResult(element, null, messages);
869
+ }
870
+
871
+ function ReadResult(element, extra, messages) {
872
+ this.value = element || [];
873
+ this.extra = extra || [];
874
+ this._result = new Result({
875
+ element: this.value,
876
+ extra: extra
877
+ }, messages);
878
+ this.messages = this._result.messages;
879
+ }
880
+
881
+ ReadResult.prototype.toExtra = function () {
882
+ return new ReadResult(null, joinElements(this.extra, this.value), this.messages);
883
+ };
884
+
885
+ ReadResult.prototype.insertExtra = function () {
886
+ var extra = this.extra;
887
+ if (extra && extra.length) {
888
+ return new ReadResult(joinElements(this.value, extra), null, this.messages);
889
+ } else {
890
+ return this;
891
+ }
892
+ };
893
+
894
+ ReadResult.prototype.map = function (func) {
895
+ var result = this._result.map(function (value) {
896
+ return func(value.element);
897
+ });
898
+ return new ReadResult(result.value, this.extra, result.messages);
899
+ };
900
+
901
+ ReadResult.prototype.flatMap = function (func) {
902
+ var result = this._result.flatMap(function (value) {
903
+ return func(value.element)._result;
904
+ });
905
+ return new ReadResult(result.value.element, joinElements(this.extra, result.value.extra), result.messages);
906
+ };
907
+
908
+ ReadResult.map = function (first, second, func) {
909
+ return new ReadResult(
910
+ func(first.value, second.value),
911
+ joinElements(first.extra, second.extra),
912
+ first.messages.concat(second.messages)
913
+ );
914
+ };
915
+
916
+ function combineResults(results) {
917
+ var result = Result.combine(_.pluck(results, "_result"));
918
+ return new ReadResult(
919
+ _.flatten(_.pluck(result.value, "element")),
920
+ _.filter(_.flatten(_.pluck(result.value, "extra")), identity),
921
+ result.messages
922
+ );
923
+ }
924
+
925
+ function joinElements(first, second) {
926
+ return _.flatten([first, second]);
927
+ }
928
+
929
+ function identity(value) {
930
+ return value;
931
+ }