@hyperlex/mammoth 1.4.9-beta → 1.4.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +0 -1
- package/.idea/compiler.xml +6 -0
- package/.idea/inspectionProfiles/Project_Default.xml +6 -0
- package/.idea/mammoth.js.iml +1 -5
- package/.idea/vcs.xml +1 -1
- package/.idea/workspace.xml +173 -0
- package/NEWS +55 -0
- package/README.md +39 -18
- package/lib/document-to-html.js +3 -0
- package/lib/documents.js +2 -0
- package/lib/docx/body-reader.js +74 -17
- package/lib/docx/numbering-xml.js +27 -4
- package/lib/index.d.ts +78 -0
- package/lib/index.js +7 -10
- package/lib/raw-text.js +14 -0
- package/lib/style-reader.js +15 -13
- package/lib/styles/document-matchers.js +1 -0
- package/lib/zipfile.js +26 -26
- package/mammoth.browser.js +10436 -19087
- package/mammoth.browser.min.js +21 -18
- package/package-lock.json +2654 -0
- package/package.json +11 -12
- package/test/document-to-html.tests.js +24 -0
- package/test/docx/body-reader.tests.js +170 -13
- package/test/docx/numbering-xml.tests.js +38 -0
- package/test/docx/style-map.tests.js +45 -44
- package/test/raw-text.tests.js +61 -0
- package/test/style-reader.tests.js +32 -25
- package/test/test-data/comments.docx +0 -0
- package/test/test-data/footnote-hyperlink.docx +0 -0
- package/test/test-data/footnotes.docx +0 -0
- package/test/test-data/simple-list.docx +0 -0
- package/test/test-data/single-paragraph.docx +0 -0
- package/test/test-data/strikethrough.docx +0 -0
- package/test/test-data/tables.docx +0 -0
- package/test/test-data/text-box.docx +0 -0
- package/test/test-data/tiny-picture.docx +0 -0
- package/test/test-data/underline.docx +0 -0
- package/test/zipfile.tests.js +12 -10
- package/.github/ISSUE_TEMPLATE.md +0 -12
- package/.travis.yml +0 -10
|
@@ -1,13 +1,28 @@
|
|
|
1
|
+
var _ = require("underscore");
|
|
2
|
+
|
|
1
3
|
exports.readNumberingXml = readNumberingXml;
|
|
2
4
|
exports.Numbering = Numbering;
|
|
3
|
-
exports.defaultNumbering = new Numbering({});
|
|
5
|
+
exports.defaultNumbering = new Numbering({}, {});
|
|
4
6
|
|
|
5
7
|
function Numbering(nums, abstractNums, styles) {
|
|
8
|
+
var allLevels = _.flatten(_.values(abstractNums).map(function(abstractNum) {
|
|
9
|
+
return _.values(abstractNum.levels);
|
|
10
|
+
}));
|
|
11
|
+
|
|
12
|
+
var levelsByParagraphStyleId = _.indexBy(
|
|
13
|
+
allLevels.filter(function(level) {
|
|
14
|
+
return level.paragraphStyleId != null;
|
|
15
|
+
}),
|
|
16
|
+
"paragraphStyleId"
|
|
17
|
+
);
|
|
18
|
+
|
|
6
19
|
function findLevel(numId, level) {
|
|
7
20
|
var num = nums[numId];
|
|
8
21
|
if (num) {
|
|
9
22
|
var abstractNum = abstractNums[num.abstractNumId];
|
|
10
|
-
if (abstractNum
|
|
23
|
+
if (!abstractNum) {
|
|
24
|
+
return null;
|
|
25
|
+
} else if (abstractNum.numStyleLink == null) {
|
|
11
26
|
return abstractNums[num.abstractNumId].levels[level];
|
|
12
27
|
} else {
|
|
13
28
|
var style = styles.findNumberingStyleById(abstractNum.numStyleLink);
|
|
@@ -18,8 +33,13 @@ function Numbering(nums, abstractNums, styles) {
|
|
|
18
33
|
}
|
|
19
34
|
}
|
|
20
35
|
|
|
36
|
+
function findLevelByParagraphStyleId(styleId) {
|
|
37
|
+
return levelsByParagraphStyleId[styleId] || null;
|
|
38
|
+
}
|
|
39
|
+
|
|
21
40
|
return {
|
|
22
|
-
findLevel: findLevel
|
|
41
|
+
findLevel: findLevel,
|
|
42
|
+
findLevelByParagraphStyleId: findLevelByParagraphStyleId
|
|
23
43
|
};
|
|
24
44
|
}
|
|
25
45
|
|
|
@@ -47,9 +67,12 @@ function readAbstractNum(element) {
|
|
|
47
67
|
element.getElementsByTagName("w:lvl").forEach(function(levelElement) {
|
|
48
68
|
var levelIndex = levelElement.attributes["w:ilvl"];
|
|
49
69
|
var numFmt = levelElement.first("w:numFmt").attributes["w:val"];
|
|
70
|
+
var paragraphStyleId = levelElement.firstOrEmpty("w:pStyle").attributes["w:val"];
|
|
71
|
+
|
|
50
72
|
levels[levelIndex] = {
|
|
51
73
|
isOrdered: numFmt !== "bullet",
|
|
52
|
-
level: levelIndex
|
|
74
|
+
level: levelIndex,
|
|
75
|
+
paragraphStyleId: paragraphStyleId
|
|
53
76
|
};
|
|
54
77
|
});
|
|
55
78
|
|
package/lib/index.d.ts
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
interface Mammoth {
|
|
2
|
+
convertToHtml: (input: Input, options?: Options) => Promise<Result>;
|
|
3
|
+
extractRawText: (input: Input) => Promise<Result>;
|
|
4
|
+
embedStyleMap: (input: Input, styleMap: string) => Promise<{toBuffer: () => Buffer}>;
|
|
5
|
+
images: Images;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
type Input = NodeJsInput | BrowserInput;
|
|
9
|
+
|
|
10
|
+
type NodeJsInput = PathInput | BufferInput;
|
|
11
|
+
|
|
12
|
+
interface PathInput {
|
|
13
|
+
path: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
interface BufferInput {
|
|
17
|
+
buffer: Buffer;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
type BrowserInput = ArrayBufferInput;
|
|
21
|
+
|
|
22
|
+
interface ArrayBufferInput {
|
|
23
|
+
arrayBuffer: ArrayBuffer;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
interface Options {
|
|
27
|
+
styleMap?: string | Array<string>;
|
|
28
|
+
includeEmbeddedStyleMap?: boolean;
|
|
29
|
+
includeDefaultStyleMap?: boolean;
|
|
30
|
+
convertImage?: ImageConverter;
|
|
31
|
+
ignoreEmptyParagraphs?: boolean;
|
|
32
|
+
idPrefix?: string;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
interface ImageConverter {
|
|
36
|
+
__mammothBrand: "ImageConverter";
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
interface Image {
|
|
40
|
+
contentType: string;
|
|
41
|
+
read: ImageRead;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
interface ImageRead {
|
|
45
|
+
(): Promise<Buffer>;
|
|
46
|
+
(encoding: string): Promise<string>;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
interface ImageAttributes {
|
|
50
|
+
src: string;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
interface Images {
|
|
54
|
+
dataUri: ImageConverter;
|
|
55
|
+
imgElement: (f: (image: Image) => Promise<ImageAttributes>) => ImageConverter;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
interface Result {
|
|
59
|
+
value: string;
|
|
60
|
+
messages: Array<Message>;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
type Message = Warning | Error;
|
|
64
|
+
|
|
65
|
+
interface Warning {
|
|
66
|
+
type: "warning";
|
|
67
|
+
message: string;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
interface Error {
|
|
71
|
+
type: "error";
|
|
72
|
+
message: string;
|
|
73
|
+
error: unknown;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
declare const mammoth: Mammoth;
|
|
77
|
+
|
|
78
|
+
export = mammoth;
|
package/lib/index.js
CHANGED
|
@@ -3,6 +3,7 @@ var _ = require("underscore");
|
|
|
3
3
|
var docxReader = require("./docx/docx-reader");
|
|
4
4
|
var docxStyleMap = require("./docx/style-map");
|
|
5
5
|
var DocumentConverter = require("./document-to-html").DocumentConverter;
|
|
6
|
+
var convertElementToRawText = require("./raw-text").convertElementToRawText;
|
|
6
7
|
var readStyle = require("./style-reader").readStyle;
|
|
7
8
|
var readOptions = require("./options-reader").readOptions;
|
|
8
9
|
var unzip = require("./unzip");
|
|
@@ -89,23 +90,19 @@ function extractRawText(input) {
|
|
|
89
90
|
});
|
|
90
91
|
}
|
|
91
92
|
|
|
92
|
-
function convertElementToRawText(element) {
|
|
93
|
-
if (element.type === "text") {
|
|
94
|
-
return element.value;
|
|
95
|
-
} else {
|
|
96
|
-
var tail = element.type === "paragraph" ? "\n\n" : "";
|
|
97
|
-
return (element.children || []).map(convertElementToRawText).join("") + tail;
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
|
|
101
93
|
function embedStyleMap(input, styleMap) {
|
|
102
94
|
return unzip.openZip(input)
|
|
103
95
|
.tap(function(docxFile) {
|
|
104
96
|
return docxStyleMap.writeStyleMap(docxFile, styleMap);
|
|
105
97
|
})
|
|
106
98
|
.then(function(docxFile) {
|
|
99
|
+
return docxFile.toBuffer();
|
|
100
|
+
})
|
|
101
|
+
.then(function(buffer) {
|
|
107
102
|
return {
|
|
108
|
-
toBuffer:
|
|
103
|
+
toBuffer: function() {
|
|
104
|
+
return buffer;
|
|
105
|
+
}
|
|
109
106
|
};
|
|
110
107
|
});
|
|
111
108
|
}
|
package/lib/raw-text.js
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
var documents = require("./documents");
|
|
2
|
+
|
|
3
|
+
function convertElementToRawText(element) {
|
|
4
|
+
if (element.type === "text") {
|
|
5
|
+
return element.value;
|
|
6
|
+
} else if (element.type === documents.types.tab) {
|
|
7
|
+
return "\t";
|
|
8
|
+
} else {
|
|
9
|
+
var tail = element.type === "paragraph" ? "\n\n" : "";
|
|
10
|
+
return (element.children || []).map(convertElementToRawText).join("") + tail;
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
exports.convertElementToRawText = convertElementToRawText;
|
package/lib/style-reader.js
CHANGED
|
@@ -39,7 +39,7 @@ function readDocumentMatcher(string) {
|
|
|
39
39
|
|
|
40
40
|
function documentMatcherRule() {
|
|
41
41
|
var sequence = lop.rules.sequence;
|
|
42
|
-
|
|
42
|
+
|
|
43
43
|
var identifierToConstant = function(identifier, constant) {
|
|
44
44
|
return lop.rules.then(
|
|
45
45
|
lop.rules.token("identifier", identifier),
|
|
@@ -48,15 +48,15 @@ function documentMatcherRule() {
|
|
|
48
48
|
}
|
|
49
49
|
);
|
|
50
50
|
};
|
|
51
|
-
|
|
51
|
+
|
|
52
52
|
var paragraphRule = identifierToConstant("p", documentMatchers.paragraph);
|
|
53
53
|
var runRule = identifierToConstant("r", documentMatchers.run);
|
|
54
|
-
|
|
54
|
+
|
|
55
55
|
var elementTypeRule = lop.rules.firstOf("p or r or table",
|
|
56
56
|
paragraphRule,
|
|
57
57
|
runRule
|
|
58
58
|
);
|
|
59
|
-
|
|
59
|
+
|
|
60
60
|
var styleIdRule = lop.rules.then(
|
|
61
61
|
classRule,
|
|
62
62
|
function(styleId) {
|
|
@@ -86,7 +86,7 @@ function documentMatcherRule() {
|
|
|
86
86
|
}
|
|
87
87
|
)
|
|
88
88
|
);
|
|
89
|
-
|
|
89
|
+
|
|
90
90
|
var styleNameRule = lop.rules.sequence(
|
|
91
91
|
lop.rules.tokenOfType("open-square-bracket"),
|
|
92
92
|
lop.rules.sequence.cut(),
|
|
@@ -95,7 +95,7 @@ function documentMatcherRule() {
|
|
|
95
95
|
lop.rules.tokenOfType("close-square-bracket")
|
|
96
96
|
).head();
|
|
97
97
|
|
|
98
|
-
|
|
98
|
+
|
|
99
99
|
var listTypeRule = lop.rules.firstOf("list type",
|
|
100
100
|
identifierToConstant("ordered-list", {isOrdered: true}),
|
|
101
101
|
identifierToConstant("unordered-list", {isOrdered: false})
|
|
@@ -130,7 +130,7 @@ function documentMatcherRule() {
|
|
|
130
130
|
return matcherOptions;
|
|
131
131
|
});
|
|
132
132
|
}
|
|
133
|
-
|
|
133
|
+
|
|
134
134
|
var paragraphOrRun = sequence(
|
|
135
135
|
sequence.capture(elementTypeRule),
|
|
136
136
|
sequence.capture(createMatcherSuffixesRule([
|
|
@@ -141,7 +141,7 @@ function documentMatcherRule() {
|
|
|
141
141
|
).map(function(createMatcher, matcherOptions) {
|
|
142
142
|
return createMatcher(matcherOptions);
|
|
143
143
|
});
|
|
144
|
-
|
|
144
|
+
|
|
145
145
|
var table = sequence(
|
|
146
146
|
lop.rules.token("identifier", "table"),
|
|
147
147
|
sequence.capture(createMatcherSuffixesRule([
|
|
@@ -156,9 +156,10 @@ function documentMatcherRule() {
|
|
|
156
156
|
var italic = identifierToConstant("i", documentMatchers.italic);
|
|
157
157
|
var underline = identifierToConstant("u", documentMatchers.underline);
|
|
158
158
|
var strikethrough = identifierToConstant("strike", documentMatchers.strikethrough);
|
|
159
|
+
var allCaps = identifierToConstant("all-caps", documentMatchers.allCaps);
|
|
159
160
|
var smallCaps = identifierToConstant("small-caps", documentMatchers.smallCaps);
|
|
160
161
|
var commentReference = identifierToConstant("comment-reference", documentMatchers.commentReference);
|
|
161
|
-
|
|
162
|
+
|
|
162
163
|
var breakMatcher = sequence(
|
|
163
164
|
lop.rules.token("identifier", "br"),
|
|
164
165
|
sequence.cut(),
|
|
@@ -187,6 +188,7 @@ function documentMatcherRule() {
|
|
|
187
188
|
italic,
|
|
188
189
|
underline,
|
|
189
190
|
strikethrough,
|
|
191
|
+
allCaps,
|
|
190
192
|
smallCaps,
|
|
191
193
|
commentReference,
|
|
192
194
|
breakMatcher
|
|
@@ -211,7 +213,7 @@ function htmlPathRule() {
|
|
|
211
213
|
}).valueOrElse(false);
|
|
212
214
|
}
|
|
213
215
|
);
|
|
214
|
-
|
|
216
|
+
|
|
215
217
|
var separatorRule = lop.rules.then(
|
|
216
218
|
lop.rules.optional(lop.rules.sequence(
|
|
217
219
|
lop.rules.tokenOfType("colon"),
|
|
@@ -229,7 +231,7 @@ function htmlPathRule() {
|
|
|
229
231
|
identifierRule,
|
|
230
232
|
lop.rules.tokenOfType("choice")
|
|
231
233
|
);
|
|
232
|
-
|
|
234
|
+
|
|
233
235
|
var styleElementRule = lop.rules.sequence(
|
|
234
236
|
capture(tagNamesRule),
|
|
235
237
|
capture(lop.rules.zeroOrMore(classRule)),
|
|
@@ -249,7 +251,7 @@ function htmlPathRule() {
|
|
|
249
251
|
}
|
|
250
252
|
return htmlPaths.element(tagName, attributes, options);
|
|
251
253
|
});
|
|
252
|
-
|
|
254
|
+
|
|
253
255
|
return lop.rules.firstOf("html path",
|
|
254
256
|
lop.rules.then(lop.rules.tokenOfType("bang"), function() {
|
|
255
257
|
return htmlPaths.ignore;
|
|
@@ -267,7 +269,7 @@ function htmlPathRule() {
|
|
|
267
269
|
)
|
|
268
270
|
);
|
|
269
271
|
}
|
|
270
|
-
|
|
272
|
+
|
|
271
273
|
var identifierRule = lop.rules.then(
|
|
272
274
|
lop.rules.tokenOfType("identifier"),
|
|
273
275
|
decodeEscapeSequences
|
|
@@ -5,6 +5,7 @@ exports.bold = new Matcher("bold");
|
|
|
5
5
|
exports.italic = new Matcher("italic");
|
|
6
6
|
exports.underline = new Matcher("underline");
|
|
7
7
|
exports.strikethrough = new Matcher("strikethrough");
|
|
8
|
+
exports.allCaps = new Matcher("allCaps");
|
|
8
9
|
exports.smallCaps = new Matcher("smallCaps");
|
|
9
10
|
exports.commentReference = new Matcher("commentReference");
|
|
10
11
|
exports.lineBreak = new Matcher("break", {breakType: "line"});
|
package/lib/zipfile.js
CHANGED
|
@@ -1,41 +1,41 @@
|
|
|
1
1
|
var JSZip = require("jszip");
|
|
2
2
|
|
|
3
|
-
var promises = require("./promises");
|
|
4
|
-
|
|
5
3
|
exports.openArrayBuffer = openArrayBuffer;
|
|
6
4
|
exports.splitPath = splitPath;
|
|
7
5
|
exports.joinPath = joinPath;
|
|
8
6
|
|
|
9
7
|
function openArrayBuffer(arrayBuffer) {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
8
|
+
return JSZip.loadAsync(arrayBuffer).then(function(zipFile) {
|
|
9
|
+
function exists(name) {
|
|
10
|
+
return zipFile.file(name) !== null;
|
|
11
|
+
}
|
|
14
12
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
13
|
+
function read(name, encoding) {
|
|
14
|
+
return zipFile.file(name).async("uint8array").then(function(array) {
|
|
15
|
+
var buffer = uint8ArrayToBuffer(array);
|
|
16
|
+
if (encoding) {
|
|
17
|
+
return buffer.toString(encoding);
|
|
18
|
+
} else {
|
|
19
|
+
return buffer;
|
|
20
|
+
}
|
|
21
|
+
});
|
|
22
22
|
}
|
|
23
|
-
}
|
|
24
23
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
function write(name, contents) {
|
|
25
|
+
zipFile.file(name, contents);
|
|
26
|
+
}
|
|
28
27
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
28
|
+
function toBuffer() {
|
|
29
|
+
return zipFile.generateAsync({type: "nodebuffer"});
|
|
30
|
+
}
|
|
32
31
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
32
|
+
return {
|
|
33
|
+
exists: exists,
|
|
34
|
+
read: read,
|
|
35
|
+
write: write,
|
|
36
|
+
toBuffer: toBuffer
|
|
37
|
+
};
|
|
38
|
+
});
|
|
39
39
|
}
|
|
40
40
|
|
|
41
41
|
function uint8ArrayToBuffer(array) {
|