docx-to-html-mathml-v2 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +22 -0
  2. package/OMML2MML.XSL +1819 -0
  3. package/README.md +29 -0
  4. package/bin/mammoth +38 -0
  5. package/index.js +3 -0
  6. package/lib/document-to-html.js +526 -0
  7. package/lib/documents.js +266 -0
  8. package/lib/docx/body-reader.js +931 -0
  9. package/lib/docx/comments-reader.js +31 -0
  10. package/lib/docx/content-types-reader.js +58 -0
  11. package/lib/docx/document-xml-reader.js +30 -0
  12. package/lib/docx/docx-reader.js +226 -0
  13. package/lib/docx/files.js +80 -0
  14. package/lib/docx/notes-reader.js +28 -0
  15. package/lib/docx/numbering-xml.js +111 -0
  16. package/lib/docx/office-xml-reader.js +73 -0
  17. package/lib/docx/relationships-reader.js +43 -0
  18. package/lib/docx/style-map.js +75 -0
  19. package/lib/docx/styles-reader.js +90 -0
  20. package/lib/docx/uris.js +21 -0
  21. package/lib/docx-to-html-mathml.js +193 -0
  22. package/lib/html/ast.js +51 -0
  23. package/lib/html/index.js +49 -0
  24. package/lib/html/simplify.js +99 -0
  25. package/lib/images.js +31 -0
  26. package/lib/index.d.ts +15 -0
  27. package/lib/index.js +111 -0
  28. package/lib/main.js +63 -0
  29. package/lib/mammoth-core.js +3 -0
  30. package/lib/mathtype_batch.rb +58 -0
  31. package/lib/omml_to_mathml.cjs +97 -0
  32. package/lib/options-reader.js +107 -0
  33. package/lib/promises.js +42 -0
  34. package/lib/raw-text.js +14 -0
  35. package/lib/results.js +72 -0
  36. package/lib/style-reader.js +365 -0
  37. package/lib/styles/document-matchers.js +100 -0
  38. package/lib/styles/html-paths.js +75 -0
  39. package/lib/styles/parser/tokeniser.js +30 -0
  40. package/lib/transforms.js +62 -0
  41. package/lib/underline.js +11 -0
  42. package/lib/unzip.js +20 -0
  43. package/lib/writers/html-writer.js +167 -0
  44. package/lib/writers/index.js +14 -0
  45. package/lib/writers/markdown-writer.js +163 -0
  46. package/lib/xml/index.js +8 -0
  47. package/lib/xml/nodes.js +70 -0
  48. package/lib/xml/reader.js +75 -0
  49. package/lib/xml/writer.js +61 -0
  50. package/lib/xml/xmldom.js +23 -0
  51. package/lib/zipfile.js +72 -0
  52. package/mathtype_to_mathml_plus.rb +139 -0
  53. package/package.json +74 -0
@@ -0,0 +1,167 @@
1
+ var _ = require("underscore");
2
+
3
+ exports.writer = writer;
4
+
5
+ function writer(options) {
6
+ options = options || {};
7
+ if (options.prettyPrint) {
8
+ return prettyWriter();
9
+ } else {
10
+ return simpleWriter();
11
+ }
12
+ }
13
+
14
+
15
+ var indentedElements = {
16
+ div: true,
17
+ p: true,
18
+ ul: true,
19
+ li: true
20
+ };
21
+
22
+
23
+ function prettyWriter() {
24
+ var indentationLevel = 0;
25
+ var indentation = " ";
26
+ var stack = [];
27
+ var start = true;
28
+ var inText = false;
29
+
30
+ var writer = simpleWriter();
31
+
32
+ function open(tagName, attributes) {
33
+ if (indentedElements[tagName]) {
34
+ indent();
35
+ }
36
+ stack.push(tagName);
37
+ writer.open(tagName, attributes);
38
+ if (indentedElements[tagName]) {
39
+ indentationLevel++;
40
+ }
41
+ start = false;
42
+ }
43
+
44
+ function close(tagName) {
45
+ if (indentedElements[tagName]) {
46
+ indentationLevel--;
47
+ indent();
48
+ }
49
+ stack.pop();
50
+ writer.close(tagName);
51
+ }
52
+
53
+ function text(value) {
54
+ startText();
55
+ var text = isInPre() ? value : value.replace("\n", "\n" + indentation);
56
+ writer.text(text);
57
+ }
58
+
59
+ function selfClosing(tagName, attributes) {
60
+ indent();
61
+ writer.selfClosing(tagName, attributes);
62
+ }
63
+
64
+ function insideIndentedElement() {
65
+ return stack.length === 0 || indentedElements[stack[stack.length - 1]];
66
+ }
67
+
68
+ function startText() {
69
+ if (!inText) {
70
+ indent();
71
+ inText = true;
72
+ }
73
+ }
74
+
75
+ function indent() {
76
+ inText = false;
77
+ if (!start && insideIndentedElement() && !isInPre()) {
78
+ writer._append("\n");
79
+ for (var i = 0; i < indentationLevel; i++) {
80
+ writer._append(indentation);
81
+ }
82
+ }
83
+ }
84
+
85
+ function isInPre() {
86
+ return _.some(stack, function (tagName) {
87
+ return tagName === "pre";
88
+ });
89
+ }
90
+
91
+ function raw(value) {
92
+ // giữ format indent giống text: nếu đang trong block indent thì indent trước
93
+ startText();
94
+ writer._append(value); // ✅ ghi thẳng
95
+ }
96
+
97
+ return {
98
+ asString: writer.asString,
99
+ open: open,
100
+ close: close,
101
+ text: text,
102
+ selfClosing: selfClosing,
103
+ raw: raw,
104
+ };
105
+ }
106
+
107
+
108
+ function simpleWriter() {
109
+ var fragments = [];
110
+
111
+ function open(tagName, attributes) {
112
+ var attributeString = generateAttributeString(attributes);
113
+ fragments.push("<" + tagName + attributeString + ">");
114
+ }
115
+
116
+ function close(tagName) {
117
+ fragments.push("</" + tagName + ">");
118
+ }
119
+
120
+ function selfClosing(tagName, attributes) {
121
+ var attributeString = generateAttributeString(attributes);
122
+ fragments.push("<" + tagName + attributeString + " />");
123
+ }
124
+
125
+ function generateAttributeString(attributes) {
126
+ return _.map(attributes, function (value, key) {
127
+ return " " + key + '="' + escapeHtmlAttribute(value) + '"';
128
+ }).join("");
129
+ }
130
+
131
+ function text(value) {
132
+ fragments.push(escapeHtmlText(value));
133
+ }
134
+
135
+ function append(html) {
136
+ fragments.push(html);
137
+ }
138
+
139
+ function asString() {
140
+ return fragments.join("");
141
+ }
142
+
143
+ return {
144
+ asString: asString,
145
+ open: open,
146
+ close: close,
147
+ text: text,
148
+ selfClosing: selfClosing,
149
+ _append: append,
150
+ raw: append,
151
+ };
152
+ }
153
+
154
+ function escapeHtmlText(value) {
155
+ return value
156
+ .replace(/&/g, '&amp;')
157
+ .replace(/</g, '&lt;')
158
+ .replace(/>/g, '&gt;');
159
+ }
160
+
161
+ function escapeHtmlAttribute(value) {
162
+ return value
163
+ .replace(/&/g, '&amp;')
164
+ .replace(/"/g, '&quot;')
165
+ .replace(/</g, '&lt;')
166
+ .replace(/>/g, '&gt;');
167
+ }
@@ -0,0 +1,14 @@
1
+ var htmlWriter = require("./html-writer");
2
+ var markdownWriter = require("./markdown-writer");
3
+
4
+ exports.writer = writer;
5
+
6
+
7
+ function writer(options) {
8
+ options = options || {};
9
+ if (options.outputFormat === "markdown") {
10
+ return markdownWriter.writer();
11
+ } else {
12
+ return htmlWriter.writer(options);
13
+ }
14
+ }
@@ -0,0 +1,163 @@
1
+ var _ = require("underscore");
2
+
3
+
4
+ function symmetricMarkdownElement(end) {
5
+ return markdownElement(end, end);
6
+ }
7
+
8
+ function markdownElement(start, end) {
9
+ return function() {
10
+ return {start: start, end: end};
11
+ };
12
+ }
13
+
14
+ function markdownLink(attributes) {
15
+ var href = attributes.href || "";
16
+ if (href) {
17
+ return {
18
+ start: "[",
19
+ end: "](" + href + ")",
20
+ anchorPosition: "before"
21
+ };
22
+ } else {
23
+ return {};
24
+ }
25
+ }
26
+
27
+ function markdownImage(attributes) {
28
+ var src = attributes.src || "";
29
+ var altText = attributes.alt || "";
30
+ if (src || altText) {
31
+ return {start: "![" + altText + "](" + src + ")"};
32
+ } else {
33
+ return {};
34
+ }
35
+ }
36
+
37
+ function markdownList(options) {
38
+ return function(attributes, list) {
39
+ return {
40
+ start: list ? "\n" : "",
41
+ end: list ? "" : "\n",
42
+ list: {
43
+ isOrdered: options.isOrdered,
44
+ indent: list ? list.indent + 1 : 0,
45
+ count: 0
46
+ }
47
+ };
48
+ };
49
+ }
50
+
51
+ function markdownListItem(attributes, list, listItem) {
52
+ list = list || {indent: 0, isOrdered: false, count: 0};
53
+ list.count++;
54
+ listItem.hasClosed = false;
55
+
56
+ var bullet = list.isOrdered ? list.count + "." : "-";
57
+ var start = repeatString("\t", list.indent) + bullet + " ";
58
+
59
+ return {
60
+ start: start,
61
+ end: function() {
62
+ if (!listItem.hasClosed) {
63
+ listItem.hasClosed = true;
64
+ return "\n";
65
+ }
66
+ }
67
+ };
68
+ }
69
+
70
+ var htmlToMarkdown = {
71
+ "p": markdownElement("", "\n\n"),
72
+ "br": markdownElement("", " \n"),
73
+ "ul": markdownList({isOrdered: false}),
74
+ "ol": markdownList({isOrdered: true}),
75
+ "li": markdownListItem,
76
+ "strong": symmetricMarkdownElement("__"),
77
+ "em": symmetricMarkdownElement("*"),
78
+ "a": markdownLink,
79
+ "img": markdownImage
80
+ };
81
+
82
+ (function() {
83
+ for (var i = 1; i <= 6; i++) {
84
+ htmlToMarkdown["h" + i] = markdownElement(repeatString("#", i) + " ", "\n\n");
85
+ }
86
+ })();
87
+
88
+ function repeatString(value, count) {
89
+ return new Array(count + 1).join(value);
90
+ }
91
+
92
+ function markdownWriter() {
93
+ var fragments = [];
94
+ var elementStack = [];
95
+ var list = null;
96
+ var listItem = {};
97
+
98
+ function open(tagName, attributes) {
99
+ attributes = attributes || {};
100
+
101
+ var createElement = htmlToMarkdown[tagName] || function() {
102
+ return {};
103
+ };
104
+ var element = createElement(attributes, list, listItem);
105
+ elementStack.push({end: element.end, list: list});
106
+
107
+ if (element.list) {
108
+ list = element.list;
109
+ }
110
+
111
+ var anchorBeforeStart = element.anchorPosition === "before";
112
+ if (anchorBeforeStart) {
113
+ writeAnchor(attributes);
114
+ }
115
+
116
+ fragments.push(element.start || "");
117
+ if (!anchorBeforeStart) {
118
+ writeAnchor(attributes);
119
+ }
120
+ }
121
+
122
+ function writeAnchor(attributes) {
123
+ if (attributes.id) {
124
+ fragments.push('<a id="' + attributes.id + '"></a>');
125
+ }
126
+ }
127
+
128
+ function close(tagName) {
129
+ var element = elementStack.pop();
130
+ list = element.list;
131
+ var end = _.isFunction(element.end) ? element.end() : element.end;
132
+ fragments.push(end || "");
133
+ }
134
+
135
+ function selfClosing(tagName, attributes) {
136
+ open(tagName, attributes);
137
+ close(tagName);
138
+ }
139
+
140
+ function text(value) {
141
+ fragments.push(escapeMarkdown(value));
142
+ }
143
+
144
+ function asString() {
145
+ return fragments.join("");
146
+ }
147
+
148
+ return {
149
+ asString: asString,
150
+ open: open,
151
+ close: close,
152
+ text: text,
153
+ selfClosing: selfClosing
154
+ };
155
+ }
156
+
157
+ exports.writer = markdownWriter;
158
+
159
+ function escapeMarkdown(value) {
160
+ return value
161
+ .replace(/\\/g, '\\\\')
162
+ .replace(/([\`\*_\{\}\[\]\(\)\#\+\-\.\!])/g, '\\$1');
163
+ }
@@ -0,0 +1,8 @@
1
+ var nodes = require("./nodes");
2
+
3
+ exports.Element = nodes.Element;
4
+ exports.element = nodes.element;
5
+ exports.emptyElement = nodes.emptyElement;
6
+ exports.text = nodes.text;
7
+ exports.readString = require("./reader").readString;
8
+ exports.writeString = require("./writer").writeString;
@@ -0,0 +1,70 @@
1
+ var _ = require("underscore");
2
+
3
+
4
+ exports.Element = Element;
5
+ exports.element = function(name, attributes, children) {
6
+ return new Element(name, attributes, children);
7
+ };
8
+ exports.text = function(value) {
9
+ return {
10
+ type: "text",
11
+ value: value
12
+ };
13
+ };
14
+
15
+
16
+ var emptyElement = exports.emptyElement = {
17
+ first: function() {
18
+ return null;
19
+ },
20
+ firstOrEmpty: function() {
21
+ return emptyElement;
22
+ },
23
+ attributes: {},
24
+ children: []
25
+ };
26
+
27
+ function Element(name, attributes, children) {
28
+ this.type = "element";
29
+ this.name = name;
30
+ this.attributes = attributes || {};
31
+ this.children = children || [];
32
+ }
33
+
34
+ Element.prototype.first = function(name) {
35
+ return _.find(this.children, function(child) {
36
+ return child.name === name;
37
+ });
38
+ };
39
+
40
+ Element.prototype.firstOrEmpty = function(name) {
41
+ return this.first(name) || emptyElement;
42
+ };
43
+
44
+ Element.prototype.getElementsByTagName = function(name) {
45
+ var elements = _.filter(this.children, function(child) {
46
+ return child.name === name;
47
+ });
48
+ return toElementList(elements);
49
+ };
50
+
51
+ Element.prototype.text = function() {
52
+ if (this.children.length === 0) {
53
+ return "";
54
+ } else if (this.children.length !== 1 || this.children[0].type !== "text") {
55
+ throw new Error("Not implemented");
56
+ }
57
+ return this.children[0].value;
58
+ };
59
+
60
+ var elementListPrototype = {
61
+ getElementsByTagName: function(name) {
62
+ return toElementList(_.flatten(this.map(function(element) {
63
+ return element.getElementsByTagName(name);
64
+ }, true)));
65
+ }
66
+ };
67
+
68
+ function toElementList(array) {
69
+ return _.extend(array, elementListPrototype);
70
+ }
@@ -0,0 +1,75 @@
1
+ var promises = require("../promises");
2
+ var _ = require("underscore");
3
+
4
+ var xmldom = require("./xmldom");
5
+ var nodes = require("./nodes");
6
+ var Element = nodes.Element;
7
+
8
+ exports.readString = readString;
9
+
10
+ var Node = xmldom.Node;
11
+
12
+ function readString(xmlString, namespaceMap) {
13
+ namespaceMap = namespaceMap || {};
14
+
15
+ try {
16
+ var document = xmldom.parseFromString(xmlString, "text/xml");
17
+ } catch (error) {
18
+ return promises.reject(error);
19
+ }
20
+
21
+ if (document.documentElement.tagName === "parsererror") {
22
+ return promises.resolve(new Error(document.documentElement.textContent));
23
+ }
24
+
25
+ function convertNode(node) {
26
+ switch (node.nodeType) {
27
+ case Node.ELEMENT_NODE:
28
+ return convertElement(node);
29
+ case Node.TEXT_NODE:
30
+ return nodes.text(node.nodeValue);
31
+ }
32
+ }
33
+
34
+ function convertElement(element) {
35
+ var convertedName = convertName(element);
36
+
37
+ var convertedChildren = [];
38
+ _.forEach(element.childNodes, function (childNode) {
39
+ var convertedNode = convertNode(childNode);
40
+ if (convertedNode) {
41
+ convertedChildren.push(convertedNode);
42
+ }
43
+ });
44
+
45
+ var convertedAttributes = {};
46
+ _.forEach(element.attributes, function (attribute) {
47
+ convertedAttributes[convertName(attribute)] = attribute.value;
48
+ });
49
+
50
+ // return new Element(convertedName, convertedAttributes, convertedChildren);
51
+ var el = new Element(convertedName, convertedAttributes, convertedChildren);
52
+
53
+ // Giữ raw XML nguyên bản
54
+ el._rawXml = element.toString();
55
+
56
+ return el;
57
+ }
58
+
59
+ function convertName(node) {
60
+ if (node.namespaceURI) {
61
+ var mappedPrefix = namespaceMap[node.namespaceURI];
62
+ var prefix;
63
+ if (mappedPrefix) {
64
+ prefix = mappedPrefix + ":";
65
+ } else {
66
+ prefix = "{" + node.namespaceURI + "}";
67
+ }
68
+ return prefix + node.localName;
69
+ } else {
70
+ return node.localName;
71
+ }
72
+ }
73
+
74
+ return promises.resolve(convertNode(document.documentElement));
75
+ }
@@ -0,0 +1,61 @@
1
+ var _ = require("underscore");
2
+ var xmlbuilder = require("xmlbuilder");
3
+
4
+
5
+ exports.writeString = writeString;
6
+
7
+
8
+ function writeString(root, namespaces) {
9
+ var uriToPrefix = _.invert(namespaces);
10
+
11
+ var nodeWriters = {
12
+ element: writeElement,
13
+ text: writeTextNode
14
+ };
15
+
16
+ function writeNode(builder, node) {
17
+ return nodeWriters[node.type](builder, node);
18
+ }
19
+
20
+ function writeElement(builder, element) {
21
+ var elementBuilder = builder.element(mapElementName(element.name), element.attributes);
22
+ element.children.forEach(function(child) {
23
+ writeNode(elementBuilder, child);
24
+ });
25
+ }
26
+
27
+ function mapElementName(name) {
28
+ var longFormMatch = /^\{(.*)\}(.*)$/.exec(name);
29
+ if (longFormMatch) {
30
+ var prefix = uriToPrefix[longFormMatch[1]];
31
+ return prefix + (prefix === "" ? "" : ":") + longFormMatch[2];
32
+ } else {
33
+ return name;
34
+ }
35
+ }
36
+
37
+ function writeDocument(root) {
38
+ var builder = xmlbuilder
39
+ .create(mapElementName(root.name), {
40
+ version: '1.0',
41
+ encoding: 'UTF-8',
42
+ standalone: true
43
+ });
44
+
45
+ _.forEach(namespaces, function(uri, prefix) {
46
+ var key = "xmlns" + (prefix === "" ? "" : ":" + prefix);
47
+ builder.attribute(key, uri);
48
+ });
49
+
50
+ root.children.forEach(function(child) {
51
+ writeNode(builder, child);
52
+ });
53
+ return builder.end();
54
+ }
55
+
56
+ return writeDocument(root);
57
+ }
58
+
59
+ function writeTextNode(builder, node) {
60
+ builder.text(node.value);
61
+ }
@@ -0,0 +1,23 @@
1
+ var xmldom = require("@xmldom/xmldom");
2
+ var dom = require("@xmldom/xmldom/lib/dom");
3
+
4
+ function parseFromString(string) {
5
+ var error = null;
6
+
7
+ var domParser = new xmldom.DOMParser({
8
+ errorHandler: function(level, message) {
9
+ error = {level: level, message: message};
10
+ }
11
+ });
12
+
13
+ var document = domParser.parseFromString(string);
14
+
15
+ if (error === null) {
16
+ return document;
17
+ } else {
18
+ throw new Error(error.level + ": " + error.message);
19
+ }
20
+ }
21
+
22
+ exports.parseFromString = parseFromString;
23
+ exports.Node = dom.Node;
package/lib/zipfile.js ADDED
@@ -0,0 +1,72 @@
1
+ var base64js = require("base64-js");
2
+ var JSZip = require("jszip");
3
+
4
+ exports.openArrayBuffer = openArrayBuffer;
5
+ exports.splitPath = splitPath;
6
+ exports.joinPath = joinPath;
7
+
8
+ function openArrayBuffer(arrayBuffer) {
9
+ return JSZip.loadAsync(arrayBuffer).then(function(zipFile) {
10
+ function exists(name) {
11
+ return zipFile.file(name) !== null;
12
+ }
13
+
14
+ function read(name, encoding) {
15
+ return zipFile.file(name).async("uint8array").then(function(array) {
16
+ if (encoding === "base64") {
17
+ return base64js.fromByteArray(array);
18
+ } else if (encoding) {
19
+ var decoder = new TextDecoder(encoding);
20
+ return decoder.decode(array);
21
+ } else {
22
+ return array;
23
+ }
24
+ });
25
+ }
26
+
27
+ function write(name, contents) {
28
+ zipFile.file(name, contents);
29
+ }
30
+
31
+ function toArrayBuffer() {
32
+ return zipFile.generateAsync({type: "arraybuffer"});
33
+ }
34
+
35
+ return {
36
+ exists: exists,
37
+ read: read,
38
+ write: write,
39
+ toArrayBuffer: toArrayBuffer
40
+ };
41
+ });
42
+ }
43
+
44
+ function splitPath(path) {
45
+ var lastIndex = path.lastIndexOf("/");
46
+ if (lastIndex === -1) {
47
+ return {dirname: "", basename: path};
48
+ } else {
49
+ return {
50
+ dirname: path.substring(0, lastIndex),
51
+ basename: path.substring(lastIndex + 1)
52
+ };
53
+ }
54
+ }
55
+
56
+ function joinPath() {
57
+ var nonEmptyPaths = Array.prototype.filter.call(arguments, function(path) {
58
+ return path;
59
+ });
60
+
61
+ var relevantPaths = [];
62
+
63
+ nonEmptyPaths.forEach(function(path) {
64
+ if (/^\//.test(path)) {
65
+ relevantPaths = [path];
66
+ } else {
67
+ relevantPaths.push(path);
68
+ }
69
+ });
70
+
71
+ return relevantPaths.join("/");
72
+ }