@hyperlex/mammoth 1.4.9-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +77 -0
- package/.github/ISSUE_TEMPLATE.md +12 -0
- package/.idea/mammoth.js.iml +12 -0
- package/.idea/modules.xml +8 -0
- package/.idea/vcs.xml +6 -0
- package/.travis.yml +10 -0
- package/LICENSE +22 -0
- package/NEWS +373 -0
- package/README.md +883 -0
- package/bin/mammoth +38 -0
- package/browser/docx/files.js +14 -0
- package/browser/unzip.js +12 -0
- package/lib/document-to-html.js +453 -0
- package/lib/documents.js +238 -0
- package/lib/docx/body-reader.js +636 -0
- package/lib/docx/comments-reader.js +31 -0
- package/lib/docx/content-types-reader.js +58 -0
- package/lib/docx/document-xml-reader.js +26 -0
- package/lib/docx/docx-reader.js +222 -0
- package/lib/docx/files.js +67 -0
- package/lib/docx/notes-reader.js +28 -0
- package/lib/docx/numbering-xml.js +69 -0
- package/lib/docx/office-xml-reader.js +58 -0
- package/lib/docx/relationships-reader.js +43 -0
- package/lib/docx/style-map.js +75 -0
- package/lib/docx/styles-reader.js +70 -0
- package/lib/docx/uris.js +21 -0
- package/lib/html/ast.js +50 -0
- package/lib/html/index.js +41 -0
- package/lib/html/simplify.js +88 -0
- package/lib/images.js +29 -0
- package/lib/index.js +115 -0
- package/lib/main.js +63 -0
- package/lib/options-reader.js +98 -0
- package/lib/promises.js +42 -0
- package/lib/results.js +72 -0
- package/lib/style-reader.js +321 -0
- package/lib/styles/document-matchers.js +74 -0
- package/lib/styles/html-paths.js +81 -0
- package/lib/styles/parser/tokeniser.js +30 -0
- package/lib/transforms.js +61 -0
- package/lib/underline.js +11 -0
- package/lib/unzip.js +22 -0
- package/lib/writers/html-writer.js +160 -0
- package/lib/writers/index.js +14 -0
- package/lib/writers/markdown-writer.js +163 -0
- package/lib/xml/index.js +7 -0
- package/lib/xml/nodes.js +69 -0
- package/lib/xml/reader.js +83 -0
- package/lib/xml/writer.js +61 -0
- package/lib/zipfile.js +77 -0
- package/mammoth.browser.js +32950 -0
- package/mammoth.browser.min.js +18 -0
- package/package.json +65 -0
- package/test/.eslintrc.json +7 -0
- package/test/document-to-html.tests.js +834 -0
- package/test/docx/body-reader.tests.js +1342 -0
- package/test/docx/comments-reader.tests.js +52 -0
- package/test/docx/content-types-reader.tests.js +45 -0
- package/test/docx/document-matchers.js +37 -0
- package/test/docx/docx-reader.tests.js +179 -0
- package/test/docx/files.tests.js +94 -0
- package/test/docx/notes-reader.tests.js +35 -0
- package/test/docx/numbering-xml.tests.js +65 -0
- package/test/docx/office-xml-reader.tests.js +24 -0
- package/test/docx/relationships-reader.tests.js +65 -0
- package/test/docx/style-map.tests.js +112 -0
- package/test/docx/styles-reader.tests.js +133 -0
- package/test/docx/uris.tests.js +22 -0
- package/test/html/simplify.tests.js +134 -0
- package/test/html/write.tests.js +42 -0
- package/test/images.tests.js +34 -0
- package/test/main.tests.js +89 -0
- package/test/mammoth.tests.js +429 -0
- package/test/mocha.opts +1 -0
- package/test/options-reader.tests.js +63 -0
- package/test/results.tests.js +15 -0
- package/test/style-reader.tests.js +256 -0
- package/test/styles/document-matchers.tests.js +71 -0
- package/test/styles/html-paths.tests.js +20 -0
- package/test/styles/parser/tokeniser.tests.js +104 -0
- package/test/test-data/comments.docx +0 -0
- package/test/test-data/embedded-style-map.docx +0 -0
- package/test/test-data/empty.docx +0 -0
- package/test/test-data/empty.zip +0 -0
- package/test/test-data/endnotes.docx +0 -0
- package/test/test-data/external-picture.docx +0 -0
- package/test/test-data/footnote-hyperlink.docx +0 -0
- package/test/test-data/footnotes.docx +0 -0
- package/test/test-data/hello.zip +0 -0
- package/test/test-data/hyperlinks/word/_rels/document.xml.rels +10 -0
- package/test/test-data/hyperlinks/word/document.xml +18 -0
- package/test/test-data/simple/word/document.xml +18 -0
- package/test/test-data/simple-list.docx +0 -0
- package/test/test-data/single-paragraph.docx +0 -0
- package/test/test-data/strikethrough.docx +0 -0
- package/test/test-data/tables.docx +0 -0
- package/test/test-data/text-box.docx +0 -0
- package/test/test-data/tiny-picture-target-base-relative.docx +0 -0
- package/test/test-data/tiny-picture.docx +0 -0
- package/test/test-data/tiny-picture.png +0 -0
- package/test/test-data/underline.docx +0 -0
- package/test/test-data/utf8-bom.docx +0 -0
- package/test/test.js +11 -0
- package/test/testing.js +55 -0
- package/test/transforms.tests.js +125 -0
- package/test/unzip.tests.js +38 -0
- package/test/writers/html-writer.tests.js +133 -0
- package/test/writers/markdown-writer.tests.js +304 -0
- package/test/xml/reader.tests.js +85 -0
- package/test/xml/writer.tests.js +81 -0
- package/test/zipfile.tests.js +59 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
exports.readContentTypesFromXml = readContentTypesFromXml;
|
|
2
|
+
|
|
3
|
+
var fallbackContentTypes = {
|
|
4
|
+
"png": "png",
|
|
5
|
+
"gif": "gif",
|
|
6
|
+
"jpeg": "jpeg",
|
|
7
|
+
"jpg": "jpeg",
|
|
8
|
+
"tif": "tiff",
|
|
9
|
+
"tiff": "tiff",
|
|
10
|
+
"bmp": "bmp"
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
exports.defaultContentTypes = contentTypes({}, {});
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
function readContentTypesFromXml(element) {
|
|
17
|
+
var extensionDefaults = {};
|
|
18
|
+
var overrides = {};
|
|
19
|
+
|
|
20
|
+
element.children.forEach(function(child) {
|
|
21
|
+
if (child.name === "content-types:Default") {
|
|
22
|
+
extensionDefaults[child.attributes.Extension] = child.attributes.ContentType;
|
|
23
|
+
}
|
|
24
|
+
if (child.name === "content-types:Override") {
|
|
25
|
+
var name = child.attributes.PartName;
|
|
26
|
+
if (name.charAt(0) === "/") {
|
|
27
|
+
name = name.substring(1);
|
|
28
|
+
}
|
|
29
|
+
overrides[name] = child.attributes.ContentType;
|
|
30
|
+
}
|
|
31
|
+
});
|
|
32
|
+
return contentTypes(overrides, extensionDefaults);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function contentTypes(overrides, extensionDefaults) {
|
|
36
|
+
return {
|
|
37
|
+
findContentType: function(path) {
|
|
38
|
+
var overrideContentType = overrides[path];
|
|
39
|
+
if (overrideContentType) {
|
|
40
|
+
return overrideContentType;
|
|
41
|
+
} else {
|
|
42
|
+
var pathParts = path.split(".");
|
|
43
|
+
var extension = pathParts[pathParts.length - 1];
|
|
44
|
+
if (extensionDefaults.hasOwnProperty(extension)) {
|
|
45
|
+
return extensionDefaults[extension];
|
|
46
|
+
} else {
|
|
47
|
+
var fallback = fallbackContentTypes[extension.toLowerCase()];
|
|
48
|
+
if (fallback) {
|
|
49
|
+
return "image/" + fallback;
|
|
50
|
+
} else {
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
exports.DocumentXmlReader = DocumentXmlReader;
|
|
2
|
+
|
|
3
|
+
var documents = require("../documents");
|
|
4
|
+
var Result = require("../results").Result;
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
function DocumentXmlReader(options) {
|
|
8
|
+
var bodyReader = options.bodyReader;
|
|
9
|
+
|
|
10
|
+
function convertXmlToDocument(element) {
|
|
11
|
+
var body = element.first("w:body");
|
|
12
|
+
|
|
13
|
+
var result = bodyReader.readXmlElements(body.children)
|
|
14
|
+
.map(function(children) {
|
|
15
|
+
return new documents.Document(children, {
|
|
16
|
+
notes: options.notes,
|
|
17
|
+
comments: options.comments
|
|
18
|
+
});
|
|
19
|
+
});
|
|
20
|
+
return new Result(result.value, result.messages);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
return {
|
|
24
|
+
convertXmlToDocument: convertXmlToDocument
|
|
25
|
+
};
|
|
26
|
+
}
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
exports.read = read;
|
|
2
|
+
exports._findPartPaths = findPartPaths;
|
|
3
|
+
|
|
4
|
+
var path = require("path");
|
|
5
|
+
|
|
6
|
+
var promises = require("../promises");
|
|
7
|
+
var documents = require("../documents");
|
|
8
|
+
var Result = require("../results").Result;
|
|
9
|
+
var zipfile = require("../zipfile");
|
|
10
|
+
|
|
11
|
+
var readXmlFromZipFile = require("./office-xml-reader").readXmlFromZipFile;
|
|
12
|
+
var createBodyReader = require("./body-reader").createBodyReader;
|
|
13
|
+
var DocumentXmlReader = require("./document-xml-reader").DocumentXmlReader;
|
|
14
|
+
var relationshipsReader = require("./relationships-reader");
|
|
15
|
+
var contentTypesReader = require("./content-types-reader");
|
|
16
|
+
var numberingXml = require("./numbering-xml");
|
|
17
|
+
var stylesReader = require("./styles-reader");
|
|
18
|
+
var notesReader = require("./notes-reader");
|
|
19
|
+
var commentsReader = require("./comments-reader");
|
|
20
|
+
var Files = require("./files").Files;
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
function read(docxFile, input) {
|
|
24
|
+
input = input || {};
|
|
25
|
+
|
|
26
|
+
return promises.props({
|
|
27
|
+
contentTypes: readContentTypesFromZipFile(docxFile),
|
|
28
|
+
partPaths: findPartPaths(docxFile),
|
|
29
|
+
docxFile: docxFile,
|
|
30
|
+
files: new Files(input.path ? path.dirname(input.path) : null)
|
|
31
|
+
}).also(function(result) {
|
|
32
|
+
return {
|
|
33
|
+
styles: readStylesFromZipFile(docxFile, result.partPaths.styles)
|
|
34
|
+
};
|
|
35
|
+
}).also(function(result) {
|
|
36
|
+
return {
|
|
37
|
+
numbering: readNumberingFromZipFile(docxFile, result.partPaths.numbering, result.styles)
|
|
38
|
+
};
|
|
39
|
+
}).also(function(result) {
|
|
40
|
+
return {
|
|
41
|
+
footnotes: readXmlFileWithBody(result.partPaths.footnotes, result, function(bodyReader, xml) {
|
|
42
|
+
if (xml) {
|
|
43
|
+
return notesReader.createFootnotesReader(bodyReader)(xml);
|
|
44
|
+
} else {
|
|
45
|
+
return new Result([]);
|
|
46
|
+
}
|
|
47
|
+
}),
|
|
48
|
+
endnotes: readXmlFileWithBody(result.partPaths.endnotes, result, function(bodyReader, xml) {
|
|
49
|
+
if (xml) {
|
|
50
|
+
return notesReader.createEndnotesReader(bodyReader)(xml);
|
|
51
|
+
} else {
|
|
52
|
+
return new Result([]);
|
|
53
|
+
}
|
|
54
|
+
}),
|
|
55
|
+
comments: readXmlFileWithBody(result.partPaths.comments, result, function(bodyReader, xml) {
|
|
56
|
+
if (xml) {
|
|
57
|
+
return commentsReader.createCommentsReader(bodyReader)(xml);
|
|
58
|
+
} else {
|
|
59
|
+
return new Result([]);
|
|
60
|
+
}
|
|
61
|
+
})
|
|
62
|
+
};
|
|
63
|
+
}).also(function(result) {
|
|
64
|
+
return {
|
|
65
|
+
notes: result.footnotes.flatMap(function(footnotes) {
|
|
66
|
+
return result.endnotes.map(function(endnotes) {
|
|
67
|
+
return new documents.Notes(footnotes.concat(endnotes));
|
|
68
|
+
});
|
|
69
|
+
})
|
|
70
|
+
};
|
|
71
|
+
}).then(function(result) {
|
|
72
|
+
return readXmlFileWithBody(result.partPaths.mainDocument, result, function(bodyReader, xml) {
|
|
73
|
+
return result.notes.flatMap(function(notes) {
|
|
74
|
+
return result.comments.flatMap(function(comments) {
|
|
75
|
+
var reader = new DocumentXmlReader({
|
|
76
|
+
bodyReader: bodyReader,
|
|
77
|
+
notes: notes,
|
|
78
|
+
comments: comments
|
|
79
|
+
});
|
|
80
|
+
return reader.convertXmlToDocument(xml);
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function findPartPaths(docxFile) {
|
|
88
|
+
return readPackageRelationships(docxFile).then(function(packageRelationships) {
|
|
89
|
+
var mainDocumentPath = findPartPath({
|
|
90
|
+
docxFile: docxFile,
|
|
91
|
+
relationships: packageRelationships,
|
|
92
|
+
relationshipType: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
|
93
|
+
basePath: "",
|
|
94
|
+
fallbackPath: "word/document.xml"
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
if (!docxFile.exists(mainDocumentPath)) {
|
|
98
|
+
throw new Error("Could not find main document part. Are you sure this is a valid .docx file?");
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return xmlFileReader({
|
|
102
|
+
filename: relationshipsFilename(mainDocumentPath),
|
|
103
|
+
readElement: relationshipsReader.readRelationships,
|
|
104
|
+
defaultValue: relationshipsReader.defaultValue
|
|
105
|
+
})(docxFile).then(function(documentRelationships) {
|
|
106
|
+
function findPartRelatedToMainDocument(name) {
|
|
107
|
+
return findPartPath({
|
|
108
|
+
docxFile: docxFile,
|
|
109
|
+
relationships: documentRelationships,
|
|
110
|
+
relationshipType: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/" + name,
|
|
111
|
+
basePath: zipfile.splitPath(mainDocumentPath).dirname,
|
|
112
|
+
fallbackPath: "word/" + name + ".xml"
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return {
|
|
117
|
+
mainDocument: mainDocumentPath,
|
|
118
|
+
comments: findPartRelatedToMainDocument("comments"),
|
|
119
|
+
endnotes: findPartRelatedToMainDocument("endnotes"),
|
|
120
|
+
footnotes: findPartRelatedToMainDocument("footnotes"),
|
|
121
|
+
numbering: findPartRelatedToMainDocument("numbering"),
|
|
122
|
+
styles: findPartRelatedToMainDocument("styles")
|
|
123
|
+
};
|
|
124
|
+
});
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function findPartPath(options) {
|
|
129
|
+
var docxFile = options.docxFile;
|
|
130
|
+
var relationships = options.relationships;
|
|
131
|
+
var relationshipType = options.relationshipType;
|
|
132
|
+
var basePath = options.basePath;
|
|
133
|
+
var fallbackPath = options.fallbackPath;
|
|
134
|
+
|
|
135
|
+
var targets = relationships.findTargetsByType(relationshipType);
|
|
136
|
+
var normalisedTargets = targets.map(function(target) {
|
|
137
|
+
return stripPrefix(zipfile.joinPath(basePath, target), "/");
|
|
138
|
+
});
|
|
139
|
+
var validTargets = normalisedTargets.filter(function(target) {
|
|
140
|
+
return docxFile.exists(target);
|
|
141
|
+
});
|
|
142
|
+
if (validTargets.length === 0) {
|
|
143
|
+
return fallbackPath;
|
|
144
|
+
} else {
|
|
145
|
+
return validTargets[0];
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function stripPrefix(value, prefix) {
|
|
150
|
+
if (value.substring(0, prefix.length) === prefix) {
|
|
151
|
+
return value.substring(prefix.length);
|
|
152
|
+
} else {
|
|
153
|
+
return value;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function xmlFileReader(options) {
|
|
158
|
+
return function(zipFile) {
|
|
159
|
+
return readXmlFromZipFile(zipFile, options.filename)
|
|
160
|
+
.then(function(element) {
|
|
161
|
+
return element ? options.readElement(element) : options.defaultValue;
|
|
162
|
+
});
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function readXmlFileWithBody(filename, options, func) {
|
|
167
|
+
var readRelationshipsFromZipFile = xmlFileReader({
|
|
168
|
+
filename: relationshipsFilename(filename),
|
|
169
|
+
readElement: relationshipsReader.readRelationships,
|
|
170
|
+
defaultValue: relationshipsReader.defaultValue
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
return readRelationshipsFromZipFile(options.docxFile).then(function(relationships) {
|
|
174
|
+
var bodyReader = new createBodyReader({
|
|
175
|
+
relationships: relationships,
|
|
176
|
+
contentTypes: options.contentTypes,
|
|
177
|
+
docxFile: options.docxFile,
|
|
178
|
+
numbering: options.numbering,
|
|
179
|
+
styles: options.styles,
|
|
180
|
+
files: options.files
|
|
181
|
+
});
|
|
182
|
+
return readXmlFromZipFile(options.docxFile, filename)
|
|
183
|
+
.then(function(xml) {
|
|
184
|
+
return func(bodyReader, xml);
|
|
185
|
+
});
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function relationshipsFilename(filename) {
|
|
190
|
+
var split = zipfile.splitPath(filename);
|
|
191
|
+
return zipfile.joinPath(split.dirname, "_rels", split.basename + ".rels");
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
var readContentTypesFromZipFile = xmlFileReader({
|
|
195
|
+
filename: "[Content_Types].xml",
|
|
196
|
+
readElement: contentTypesReader.readContentTypesFromXml,
|
|
197
|
+
defaultValue: contentTypesReader.defaultContentTypes
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
function readNumberingFromZipFile(zipFile, path, styles) {
|
|
201
|
+
return xmlFileReader({
|
|
202
|
+
filename: path,
|
|
203
|
+
readElement: function(element) {
|
|
204
|
+
return numberingXml.readNumberingXml(element, {styles: styles});
|
|
205
|
+
},
|
|
206
|
+
defaultValue: numberingXml.defaultNumbering
|
|
207
|
+
})(zipFile);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function readStylesFromZipFile(zipFile, path) {
|
|
211
|
+
return xmlFileReader({
|
|
212
|
+
filename: path,
|
|
213
|
+
readElement: stylesReader.readStylesXml,
|
|
214
|
+
defaultValue: stylesReader.defaultStyles
|
|
215
|
+
})(zipFile);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
var readPackageRelationships = xmlFileReader({
|
|
219
|
+
filename: "_rels/.rels",
|
|
220
|
+
readElement: relationshipsReader.readRelationships,
|
|
221
|
+
defaultValue: relationshipsReader.defaultValue
|
|
222
|
+
});
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
var fs = require("fs");
|
|
2
|
+
var url = require("url");
|
|
3
|
+
var os = require("os");
|
|
4
|
+
var resolvePath = require("path").resolve;
|
|
5
|
+
var isAbsolutePath = require('path-is-absolute');
|
|
6
|
+
|
|
7
|
+
var promises = require("../promises");
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
exports.Files = Files;
|
|
11
|
+
exports.uriToPath = uriToPath;
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
function Files(base) {
|
|
15
|
+
function read(uri, encoding) {
|
|
16
|
+
return resolveUri(uri).then(function(path) {
|
|
17
|
+
return readFile(path, encoding).caught(function(error) {
|
|
18
|
+
var message = "could not open external image: '" + uri + "' (document directory: '" + base + "')\n" + error.message;
|
|
19
|
+
return promises.reject(new Error(message));
|
|
20
|
+
});
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function resolveUri(uri) {
|
|
25
|
+
var path = uriToPath(uri);
|
|
26
|
+
if (isAbsolutePath(path)) {
|
|
27
|
+
return promises.resolve(path);
|
|
28
|
+
} else if (base) {
|
|
29
|
+
return promises.resolve(resolvePath(base, path));
|
|
30
|
+
} else {
|
|
31
|
+
return promises.reject(new Error("could not find external image '" + uri + "', path of input document is unknown"));
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
read: read
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
var readFile = promises.promisify(fs.readFile.bind(fs));
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
function uriToPath(uriString, platform) {
|
|
44
|
+
if (!platform) {
|
|
45
|
+
platform = os.platform();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
var uri = url.parse(uriString);
|
|
49
|
+
if (isLocalFileUri(uri) || isRelativeUri(uri)) {
|
|
50
|
+
var path = decodeURIComponent(uri.path);
|
|
51
|
+
if (platform === "win32" && /^\/[a-z]:/i.test(path)) {
|
|
52
|
+
return path.slice(1);
|
|
53
|
+
} else {
|
|
54
|
+
return path;
|
|
55
|
+
}
|
|
56
|
+
} else {
|
|
57
|
+
throw new Error("Could not convert URI to path: " + uriString);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function isLocalFileUri(uri) {
|
|
62
|
+
return uri.protocol === "file:" && (!uri.host || uri.host === "localhost");
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function isRelativeUri(uri) {
|
|
66
|
+
return !uri.protocol && !uri.host;
|
|
67
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
var documents = require("../documents");
|
|
2
|
+
var Result = require("../results").Result;
|
|
3
|
+
|
|
4
|
+
exports.createFootnotesReader = createReader.bind(this, "footnote");
|
|
5
|
+
exports.createEndnotesReader = createReader.bind(this, "endnote");
|
|
6
|
+
|
|
7
|
+
function createReader(noteType, bodyReader) {
|
|
8
|
+
function readNotesXml(element) {
|
|
9
|
+
return Result.combine(element.getElementsByTagName("w:" + noteType)
|
|
10
|
+
.filter(isFootnoteElement)
|
|
11
|
+
.map(readFootnoteElement));
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function isFootnoteElement(element) {
|
|
15
|
+
var type = element.attributes["w:type"];
|
|
16
|
+
return type !== "continuationSeparator" && type !== "separator";
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function readFootnoteElement(footnoteElement) {
|
|
20
|
+
var id = footnoteElement.attributes["w:id"];
|
|
21
|
+
return bodyReader.readXmlElements(footnoteElement.children)
|
|
22
|
+
.map(function(body) {
|
|
23
|
+
return documents.Note({noteType: noteType, noteId: id, body: body});
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return readNotesXml;
|
|
28
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
exports.readNumberingXml = readNumberingXml;
|
|
2
|
+
exports.Numbering = Numbering;
|
|
3
|
+
exports.defaultNumbering = new Numbering({});
|
|
4
|
+
|
|
5
|
+
function Numbering(nums, abstractNums, styles) {
|
|
6
|
+
function findLevel(numId, level) {
|
|
7
|
+
var num = nums[numId];
|
|
8
|
+
if (num) {
|
|
9
|
+
var abstractNum = abstractNums[num.abstractNumId];
|
|
10
|
+
if (abstractNum.numStyleLink == null) {
|
|
11
|
+
return abstractNums[num.abstractNumId].levels[level];
|
|
12
|
+
} else {
|
|
13
|
+
var style = styles.findNumberingStyleById(abstractNum.numStyleLink);
|
|
14
|
+
return findLevel(style.numId, level);
|
|
15
|
+
}
|
|
16
|
+
} else {
|
|
17
|
+
return null;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
return {
|
|
22
|
+
findLevel: findLevel
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function readNumberingXml(root, options) {
|
|
27
|
+
if (!options || !options.styles) {
|
|
28
|
+
throw new Error("styles is missing");
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
var abstractNums = readAbstractNums(root);
|
|
32
|
+
var nums = readNums(root, abstractNums);
|
|
33
|
+
return new Numbering(nums, abstractNums, options.styles);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function readAbstractNums(root) {
|
|
37
|
+
var abstractNums = {};
|
|
38
|
+
root.getElementsByTagName("w:abstractNum").forEach(function(element) {
|
|
39
|
+
var id = element.attributes["w:abstractNumId"];
|
|
40
|
+
abstractNums[id] = readAbstractNum(element);
|
|
41
|
+
});
|
|
42
|
+
return abstractNums;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function readAbstractNum(element) {
|
|
46
|
+
var levels = {};
|
|
47
|
+
element.getElementsByTagName("w:lvl").forEach(function(levelElement) {
|
|
48
|
+
var levelIndex = levelElement.attributes["w:ilvl"];
|
|
49
|
+
var numFmt = levelElement.first("w:numFmt").attributes["w:val"];
|
|
50
|
+
levels[levelIndex] = {
|
|
51
|
+
isOrdered: numFmt !== "bullet",
|
|
52
|
+
level: levelIndex
|
|
53
|
+
};
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
var numStyleLink = element.firstOrEmpty("w:numStyleLink").attributes["w:val"];
|
|
57
|
+
|
|
58
|
+
return {levels: levels, numStyleLink: numStyleLink};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function readNums(root) {
|
|
62
|
+
var nums = {};
|
|
63
|
+
root.getElementsByTagName("w:num").forEach(function(element) {
|
|
64
|
+
var numId = element.attributes["w:numId"];
|
|
65
|
+
var abstractNumId = element.first("w:abstractNumId").attributes["w:val"];
|
|
66
|
+
nums[numId] = {abstractNumId: abstractNumId};
|
|
67
|
+
});
|
|
68
|
+
return nums;
|
|
69
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
var _ = require("underscore");
|
|
2
|
+
|
|
3
|
+
var promises = require("../promises");
|
|
4
|
+
var xml = require("../xml");
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
exports.read = read;
|
|
8
|
+
exports.readXmlFromZipFile = readXmlFromZipFile;
|
|
9
|
+
|
|
10
|
+
var xmlNamespaceMap = {
|
|
11
|
+
"http://schemas.openxmlformats.org/wordprocessingml/2006/main": "w",
|
|
12
|
+
"http://schemas.openxmlformats.org/officeDocument/2006/relationships": "r",
|
|
13
|
+
"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing": "wp",
|
|
14
|
+
"http://schemas.openxmlformats.org/drawingml/2006/main": "a",
|
|
15
|
+
"http://schemas.openxmlformats.org/drawingml/2006/picture": "pic",
|
|
16
|
+
"http://schemas.openxmlformats.org/package/2006/content-types": "content-types",
|
|
17
|
+
"urn:schemas-microsoft-com:vml": "v",
|
|
18
|
+
"http://schemas.openxmlformats.org/markup-compatibility/2006": "mc",
|
|
19
|
+
"urn:schemas-microsoft-com:office:word": "office-word"
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
function read(xmlString) {
|
|
24
|
+
return xml.readString(xmlString, xmlNamespaceMap)
|
|
25
|
+
.then(function(document) {
|
|
26
|
+
return collapseAlternateContent(document)[0];
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
function readXmlFromZipFile(docxFile, path) {
|
|
32
|
+
if (docxFile.exists(path)) {
|
|
33
|
+
return docxFile.read(path, "utf-8")
|
|
34
|
+
.then(stripUtf8Bom)
|
|
35
|
+
.then(read);
|
|
36
|
+
} else {
|
|
37
|
+
return promises.resolve(null);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
function stripUtf8Bom(xmlString) {
|
|
43
|
+
return xmlString.replace(/^\uFEFF/g, '');
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
function collapseAlternateContent(node) {
|
|
48
|
+
if (node.type === "element") {
|
|
49
|
+
if (node.name === "mc:AlternateContent") {
|
|
50
|
+
return node.first("mc:Fallback").children;
|
|
51
|
+
} else {
|
|
52
|
+
node.children = _.flatten(node.children.map(collapseAlternateContent, true));
|
|
53
|
+
return [node];
|
|
54
|
+
}
|
|
55
|
+
} else {
|
|
56
|
+
return [node];
|
|
57
|
+
}
|
|
58
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
exports.readRelationships = readRelationships;
|
|
2
|
+
exports.defaultValue = new Relationships([]);
|
|
3
|
+
exports.Relationships = Relationships;
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
function readRelationships(element) {
|
|
7
|
+
var relationships = [];
|
|
8
|
+
element.children.forEach(function(child) {
|
|
9
|
+
if (child.name === "{http://schemas.openxmlformats.org/package/2006/relationships}Relationship") {
|
|
10
|
+
var relationship = {
|
|
11
|
+
relationshipId: child.attributes.Id,
|
|
12
|
+
target: child.attributes.Target,
|
|
13
|
+
type: child.attributes.Type
|
|
14
|
+
};
|
|
15
|
+
relationships.push(relationship);
|
|
16
|
+
}
|
|
17
|
+
});
|
|
18
|
+
return new Relationships(relationships);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function Relationships(relationships) {
|
|
22
|
+
var targetsByRelationshipId = {};
|
|
23
|
+
relationships.forEach(function(relationship) {
|
|
24
|
+
targetsByRelationshipId[relationship.relationshipId] = relationship.target;
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
var targetsByType = {};
|
|
28
|
+
relationships.forEach(function(relationship) {
|
|
29
|
+
if (!targetsByType[relationship.type]) {
|
|
30
|
+
targetsByType[relationship.type] = [];
|
|
31
|
+
}
|
|
32
|
+
targetsByType[relationship.type].push(relationship.target);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
findTargetByRelationshipId: function(relationshipId) {
|
|
37
|
+
return targetsByRelationshipId[relationshipId];
|
|
38
|
+
},
|
|
39
|
+
findTargetsByType: function(type) {
|
|
40
|
+
return targetsByType[type] || [];
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
var _ = require("underscore");
|
|
2
|
+
|
|
3
|
+
var promises = require("../promises");
|
|
4
|
+
var xml = require("../xml");
|
|
5
|
+
|
|
6
|
+
exports.writeStyleMap = writeStyleMap;
|
|
7
|
+
exports.readStyleMap = readStyleMap;
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
var schema = "http://schemas.zwobble.org/mammoth/style-map";
|
|
11
|
+
var styleMapPath = "mammoth/style-map";
|
|
12
|
+
var styleMapAbsolutePath = "/" + styleMapPath;
|
|
13
|
+
|
|
14
|
+
function writeStyleMap(docxFile, styleMap) {
|
|
15
|
+
docxFile.write(styleMapPath, styleMap);
|
|
16
|
+
return updateRelationships(docxFile).then(function() {
|
|
17
|
+
return updateContentTypes(docxFile);
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function updateRelationships(docxFile) {
|
|
22
|
+
var path = "word/_rels/document.xml.rels";
|
|
23
|
+
var relationshipsUri = "http://schemas.openxmlformats.org/package/2006/relationships";
|
|
24
|
+
var relationshipElementName = "{" + relationshipsUri + "}Relationship";
|
|
25
|
+
return docxFile.read(path, "utf8")
|
|
26
|
+
.then(xml.readString)
|
|
27
|
+
.then(function(relationshipsContainer) {
|
|
28
|
+
var relationships = relationshipsContainer.children;
|
|
29
|
+
addOrUpdateElement(relationships, relationshipElementName, "Id", {
|
|
30
|
+
"Id": "rMammothStyleMap",
|
|
31
|
+
"Type": schema,
|
|
32
|
+
"Target": styleMapAbsolutePath
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
var namespaces = {"": relationshipsUri};
|
|
36
|
+
return docxFile.write(path, xml.writeString(relationshipsContainer, namespaces));
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function updateContentTypes(docxFile) {
|
|
41
|
+
var path = "[Content_Types].xml";
|
|
42
|
+
var contentTypesUri = "http://schemas.openxmlformats.org/package/2006/content-types";
|
|
43
|
+
var overrideName = "{" + contentTypesUri + "}Override";
|
|
44
|
+
return docxFile.read(path, "utf8")
|
|
45
|
+
.then(xml.readString)
|
|
46
|
+
.then(function(typesElement) {
|
|
47
|
+
var children = typesElement.children;
|
|
48
|
+
addOrUpdateElement(children, overrideName, "PartName", {
|
|
49
|
+
"PartName": styleMapAbsolutePath,
|
|
50
|
+
"ContentType": "text/prs.mammoth.style-map"
|
|
51
|
+
});
|
|
52
|
+
var namespaces = {"": contentTypesUri};
|
|
53
|
+
return docxFile.write(path, xml.writeString(typesElement, namespaces));
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function addOrUpdateElement(elements, name, identifyingAttribute, attributes) {
|
|
58
|
+
var existingElement = _.find(elements, function(element) {
|
|
59
|
+
return element.name === name &&
|
|
60
|
+
element.attributes[identifyingAttribute] === attributes[identifyingAttribute];
|
|
61
|
+
});
|
|
62
|
+
if (existingElement) {
|
|
63
|
+
existingElement.attributes = attributes;
|
|
64
|
+
} else {
|
|
65
|
+
elements.push(xml.element(name, attributes));
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function readStyleMap(docxFile) {
|
|
70
|
+
if (docxFile.exists(styleMapPath)) {
|
|
71
|
+
return docxFile.read(styleMapPath, "utf8");
|
|
72
|
+
} else {
|
|
73
|
+
return promises.resolve(null);
|
|
74
|
+
}
|
|
75
|
+
}
|