@hyperlex/mammoth 1.4.9-beta → 1.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/.eslintrc.json +0 -1
  2. package/.idea/compiler.xml +6 -0
  3. package/.idea/inspectionProfiles/Project_Default.xml +6 -0
  4. package/.idea/mammoth.js.iml +1 -5
  5. package/.idea/vcs.xml +1 -1
  6. package/.idea/workspace.xml +173 -0
  7. package/NEWS +55 -0
  8. package/README.md +39 -18
  9. package/lib/document-to-html.js +3 -0
  10. package/lib/documents.js +2 -0
  11. package/lib/docx/body-reader.js +74 -17
  12. package/lib/docx/numbering-xml.js +27 -4
  13. package/lib/index.d.ts +78 -0
  14. package/lib/index.js +7 -10
  15. package/lib/raw-text.js +14 -0
  16. package/lib/style-reader.js +15 -13
  17. package/lib/styles/document-matchers.js +1 -0
  18. package/lib/zipfile.js +26 -26
  19. package/mammoth.browser.js +10436 -19087
  20. package/mammoth.browser.min.js +21 -18
  21. package/package-lock.json +2654 -0
  22. package/package.json +11 -12
  23. package/test/document-to-html.tests.js +24 -0
  24. package/test/docx/body-reader.tests.js +170 -13
  25. package/test/docx/numbering-xml.tests.js +38 -0
  26. package/test/docx/style-map.tests.js +45 -44
  27. package/test/raw-text.tests.js +61 -0
  28. package/test/style-reader.tests.js +32 -25
  29. package/test/test-data/comments.docx +0 -0
  30. package/test/test-data/footnote-hyperlink.docx +0 -0
  31. package/test/test-data/footnotes.docx +0 -0
  32. package/test/test-data/simple-list.docx +0 -0
  33. package/test/test-data/single-paragraph.docx +0 -0
  34. package/test/test-data/strikethrough.docx +0 -0
  35. package/test/test-data/tables.docx +0 -0
  36. package/test/test-data/text-box.docx +0 -0
  37. package/test/test-data/tiny-picture.docx +0 -0
  38. package/test/test-data/underline.docx +0 -0
  39. package/test/zipfile.tests.js +12 -10
  40. package/.github/ISSUE_TEMPLATE.md +0 -12
  41. package/.travis.yml +0 -10
@@ -1,13 +1,28 @@
1
+ var _ = require("underscore");
2
+
1
3
  exports.readNumberingXml = readNumberingXml;
2
4
  exports.Numbering = Numbering;
3
- exports.defaultNumbering = new Numbering({});
5
+ exports.defaultNumbering = new Numbering({}, {});
4
6
 
5
7
  function Numbering(nums, abstractNums, styles) {
8
+ var allLevels = _.flatten(_.values(abstractNums).map(function(abstractNum) {
9
+ return _.values(abstractNum.levels);
10
+ }));
11
+
12
+ var levelsByParagraphStyleId = _.indexBy(
13
+ allLevels.filter(function(level) {
14
+ return level.paragraphStyleId != null;
15
+ }),
16
+ "paragraphStyleId"
17
+ );
18
+
6
19
  function findLevel(numId, level) {
7
20
  var num = nums[numId];
8
21
  if (num) {
9
22
  var abstractNum = abstractNums[num.abstractNumId];
10
- if (abstractNum.numStyleLink == null) {
23
+ if (!abstractNum) {
24
+ return null;
25
+ } else if (abstractNum.numStyleLink == null) {
11
26
  return abstractNums[num.abstractNumId].levels[level];
12
27
  } else {
13
28
  var style = styles.findNumberingStyleById(abstractNum.numStyleLink);
@@ -18,8 +33,13 @@ function Numbering(nums, abstractNums, styles) {
18
33
  }
19
34
  }
20
35
 
36
+ function findLevelByParagraphStyleId(styleId) {
37
+ return levelsByParagraphStyleId[styleId] || null;
38
+ }
39
+
21
40
  return {
22
- findLevel: findLevel
41
+ findLevel: findLevel,
42
+ findLevelByParagraphStyleId: findLevelByParagraphStyleId
23
43
  };
24
44
  }
25
45
 
@@ -47,9 +67,12 @@ function readAbstractNum(element) {
47
67
  element.getElementsByTagName("w:lvl").forEach(function(levelElement) {
48
68
  var levelIndex = levelElement.attributes["w:ilvl"];
49
69
  var numFmt = levelElement.first("w:numFmt").attributes["w:val"];
70
+ var paragraphStyleId = levelElement.firstOrEmpty("w:pStyle").attributes["w:val"];
71
+
50
72
  levels[levelIndex] = {
51
73
  isOrdered: numFmt !== "bullet",
52
- level: levelIndex
74
+ level: levelIndex,
75
+ paragraphStyleId: paragraphStyleId
53
76
  };
54
77
  });
55
78
 
package/lib/index.d.ts ADDED
@@ -0,0 +1,78 @@
1
+ interface Mammoth {
2
+ convertToHtml: (input: Input, options?: Options) => Promise<Result>;
3
+ extractRawText: (input: Input) => Promise<Result>;
4
+ embedStyleMap: (input: Input, styleMap: string) => Promise<{toBuffer: () => Buffer}>;
5
+ images: Images;
6
+ }
7
+
8
+ type Input = NodeJsInput | BrowserInput;
9
+
10
+ type NodeJsInput = PathInput | BufferInput;
11
+
12
+ interface PathInput {
13
+ path: string;
14
+ }
15
+
16
+ interface BufferInput {
17
+ buffer: Buffer;
18
+ }
19
+
20
+ type BrowserInput = ArrayBufferInput;
21
+
22
+ interface ArrayBufferInput {
23
+ arrayBuffer: ArrayBuffer;
24
+ }
25
+
26
+ interface Options {
27
+ styleMap?: string | Array<string>;
28
+ includeEmbeddedStyleMap?: boolean;
29
+ includeDefaultStyleMap?: boolean;
30
+ convertImage?: ImageConverter;
31
+ ignoreEmptyParagraphs?: boolean;
32
+ idPrefix?: string;
33
+ }
34
+
35
+ interface ImageConverter {
36
+ __mammothBrand: "ImageConverter";
37
+ }
38
+
39
+ interface Image {
40
+ contentType: string;
41
+ read: ImageRead;
42
+ }
43
+
44
+ interface ImageRead {
45
+ (): Promise<Buffer>;
46
+ (encoding: string): Promise<string>;
47
+ }
48
+
49
+ interface ImageAttributes {
50
+ src: string;
51
+ }
52
+
53
+ interface Images {
54
+ dataUri: ImageConverter;
55
+ imgElement: (f: (image: Image) => Promise<ImageAttributes>) => ImageConverter;
56
+ }
57
+
58
+ interface Result {
59
+ value: string;
60
+ messages: Array<Message>;
61
+ }
62
+
63
+ type Message = Warning | Error;
64
+
65
+ interface Warning {
66
+ type: "warning";
67
+ message: string;
68
+ }
69
+
70
+ interface Error {
71
+ type: "error";
72
+ message: string;
73
+ error: unknown;
74
+ }
75
+
76
+ declare const mammoth: Mammoth;
77
+
78
+ export = mammoth;
package/lib/index.js CHANGED
@@ -3,6 +3,7 @@ var _ = require("underscore");
3
3
  var docxReader = require("./docx/docx-reader");
4
4
  var docxStyleMap = require("./docx/style-map");
5
5
  var DocumentConverter = require("./document-to-html").DocumentConverter;
6
+ var convertElementToRawText = require("./raw-text").convertElementToRawText;
6
7
  var readStyle = require("./style-reader").readStyle;
7
8
  var readOptions = require("./options-reader").readOptions;
8
9
  var unzip = require("./unzip");
@@ -89,23 +90,19 @@ function extractRawText(input) {
89
90
  });
90
91
  }
91
92
 
92
- function convertElementToRawText(element) {
93
- if (element.type === "text") {
94
- return element.value;
95
- } else {
96
- var tail = element.type === "paragraph" ? "\n\n" : "";
97
- return (element.children || []).map(convertElementToRawText).join("") + tail;
98
- }
99
- }
100
-
101
93
  function embedStyleMap(input, styleMap) {
102
94
  return unzip.openZip(input)
103
95
  .tap(function(docxFile) {
104
96
  return docxStyleMap.writeStyleMap(docxFile, styleMap);
105
97
  })
106
98
  .then(function(docxFile) {
99
+ return docxFile.toBuffer();
100
+ })
101
+ .then(function(buffer) {
107
102
  return {
108
- toBuffer: docxFile.toBuffer
103
+ toBuffer: function() {
104
+ return buffer;
105
+ }
109
106
  };
110
107
  });
111
108
  }
@@ -0,0 +1,14 @@
1
+ var documents = require("./documents");
2
+
3
+ function convertElementToRawText(element) {
4
+ if (element.type === "text") {
5
+ return element.value;
6
+ } else if (element.type === documents.types.tab) {
7
+ return "\t";
8
+ } else {
9
+ var tail = element.type === "paragraph" ? "\n\n" : "";
10
+ return (element.children || []).map(convertElementToRawText).join("") + tail;
11
+ }
12
+ }
13
+
14
+ exports.convertElementToRawText = convertElementToRawText;
@@ -39,7 +39,7 @@ function readDocumentMatcher(string) {
39
39
 
40
40
  function documentMatcherRule() {
41
41
  var sequence = lop.rules.sequence;
42
-
42
+
43
43
  var identifierToConstant = function(identifier, constant) {
44
44
  return lop.rules.then(
45
45
  lop.rules.token("identifier", identifier),
@@ -48,15 +48,15 @@ function documentMatcherRule() {
48
48
  }
49
49
  );
50
50
  };
51
-
51
+
52
52
  var paragraphRule = identifierToConstant("p", documentMatchers.paragraph);
53
53
  var runRule = identifierToConstant("r", documentMatchers.run);
54
-
54
+
55
55
  var elementTypeRule = lop.rules.firstOf("p or r or table",
56
56
  paragraphRule,
57
57
  runRule
58
58
  );
59
-
59
+
60
60
  var styleIdRule = lop.rules.then(
61
61
  classRule,
62
62
  function(styleId) {
@@ -86,7 +86,7 @@ function documentMatcherRule() {
86
86
  }
87
87
  )
88
88
  );
89
-
89
+
90
90
  var styleNameRule = lop.rules.sequence(
91
91
  lop.rules.tokenOfType("open-square-bracket"),
92
92
  lop.rules.sequence.cut(),
@@ -95,7 +95,7 @@ function documentMatcherRule() {
95
95
  lop.rules.tokenOfType("close-square-bracket")
96
96
  ).head();
97
97
 
98
-
98
+
99
99
  var listTypeRule = lop.rules.firstOf("list type",
100
100
  identifierToConstant("ordered-list", {isOrdered: true}),
101
101
  identifierToConstant("unordered-list", {isOrdered: false})
@@ -130,7 +130,7 @@ function documentMatcherRule() {
130
130
  return matcherOptions;
131
131
  });
132
132
  }
133
-
133
+
134
134
  var paragraphOrRun = sequence(
135
135
  sequence.capture(elementTypeRule),
136
136
  sequence.capture(createMatcherSuffixesRule([
@@ -141,7 +141,7 @@ function documentMatcherRule() {
141
141
  ).map(function(createMatcher, matcherOptions) {
142
142
  return createMatcher(matcherOptions);
143
143
  });
144
-
144
+
145
145
  var table = sequence(
146
146
  lop.rules.token("identifier", "table"),
147
147
  sequence.capture(createMatcherSuffixesRule([
@@ -156,9 +156,10 @@ function documentMatcherRule() {
156
156
  var italic = identifierToConstant("i", documentMatchers.italic);
157
157
  var underline = identifierToConstant("u", documentMatchers.underline);
158
158
  var strikethrough = identifierToConstant("strike", documentMatchers.strikethrough);
159
+ var allCaps = identifierToConstant("all-caps", documentMatchers.allCaps);
159
160
  var smallCaps = identifierToConstant("small-caps", documentMatchers.smallCaps);
160
161
  var commentReference = identifierToConstant("comment-reference", documentMatchers.commentReference);
161
-
162
+
162
163
  var breakMatcher = sequence(
163
164
  lop.rules.token("identifier", "br"),
164
165
  sequence.cut(),
@@ -187,6 +188,7 @@ function documentMatcherRule() {
187
188
  italic,
188
189
  underline,
189
190
  strikethrough,
191
+ allCaps,
190
192
  smallCaps,
191
193
  commentReference,
192
194
  breakMatcher
@@ -211,7 +213,7 @@ function htmlPathRule() {
211
213
  }).valueOrElse(false);
212
214
  }
213
215
  );
214
-
216
+
215
217
  var separatorRule = lop.rules.then(
216
218
  lop.rules.optional(lop.rules.sequence(
217
219
  lop.rules.tokenOfType("colon"),
@@ -229,7 +231,7 @@ function htmlPathRule() {
229
231
  identifierRule,
230
232
  lop.rules.tokenOfType("choice")
231
233
  );
232
-
234
+
233
235
  var styleElementRule = lop.rules.sequence(
234
236
  capture(tagNamesRule),
235
237
  capture(lop.rules.zeroOrMore(classRule)),
@@ -249,7 +251,7 @@ function htmlPathRule() {
249
251
  }
250
252
  return htmlPaths.element(tagName, attributes, options);
251
253
  });
252
-
254
+
253
255
  return lop.rules.firstOf("html path",
254
256
  lop.rules.then(lop.rules.tokenOfType("bang"), function() {
255
257
  return htmlPaths.ignore;
@@ -267,7 +269,7 @@ function htmlPathRule() {
267
269
  )
268
270
  );
269
271
  }
270
-
272
+
271
273
  var identifierRule = lop.rules.then(
272
274
  lop.rules.tokenOfType("identifier"),
273
275
  decodeEscapeSequences
@@ -5,6 +5,7 @@ exports.bold = new Matcher("bold");
5
5
  exports.italic = new Matcher("italic");
6
6
  exports.underline = new Matcher("underline");
7
7
  exports.strikethrough = new Matcher("strikethrough");
8
+ exports.allCaps = new Matcher("allCaps");
8
9
  exports.smallCaps = new Matcher("smallCaps");
9
10
  exports.commentReference = new Matcher("commentReference");
10
11
  exports.lineBreak = new Matcher("break", {breakType: "line"});
package/lib/zipfile.js CHANGED
@@ -1,41 +1,41 @@
1
1
  var JSZip = require("jszip");
2
2
 
3
- var promises = require("./promises");
4
-
5
3
  exports.openArrayBuffer = openArrayBuffer;
6
4
  exports.splitPath = splitPath;
7
5
  exports.joinPath = joinPath;
8
6
 
9
7
  function openArrayBuffer(arrayBuffer) {
10
- var zipFile = new JSZip(arrayBuffer);
11
- function exists(name) {
12
- return zipFile.file(name) !== null;
13
- }
8
+ return JSZip.loadAsync(arrayBuffer).then(function(zipFile) {
9
+ function exists(name) {
10
+ return zipFile.file(name) !== null;
11
+ }
14
12
 
15
- function read(name, encoding) {
16
- var array = zipFile.file(name).asUint8Array();
17
- var buffer = uint8ArrayToBuffer(array);
18
- if (encoding) {
19
- return promises.when(buffer.toString(encoding));
20
- } else {
21
- return promises.when(buffer);
13
+ function read(name, encoding) {
14
+ return zipFile.file(name).async("uint8array").then(function(array) {
15
+ var buffer = uint8ArrayToBuffer(array);
16
+ if (encoding) {
17
+ return buffer.toString(encoding);
18
+ } else {
19
+ return buffer;
20
+ }
21
+ });
22
22
  }
23
- }
24
23
 
25
- function write(name, contents) {
26
- zipFile.file(name, contents);
27
- }
24
+ function write(name, contents) {
25
+ zipFile.file(name, contents);
26
+ }
28
27
 
29
- function toBuffer() {
30
- return zipFile.generate({type: "nodebuffer"});
31
- }
28
+ function toBuffer() {
29
+ return zipFile.generateAsync({type: "nodebuffer"});
30
+ }
32
31
 
33
- return {
34
- exists: exists,
35
- read: read,
36
- write: write,
37
- toBuffer: toBuffer
38
- };
32
+ return {
33
+ exists: exists,
34
+ read: read,
35
+ write: write,
36
+ toBuffer: toBuffer
37
+ };
38
+ });
39
39
  }
40
40
 
41
41
  function uint8ArrayToBuffer(array) {