@hyperlex/mammoth 1.4.9-beta → 1.4.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +0 -1
- package/.idea/compiler.xml +6 -0
- package/.idea/inspectionProfiles/Project_Default.xml +6 -0
- package/.idea/mammoth.js.iml +1 -5
- package/.idea/vcs.xml +1 -1
- package/.idea/workspace.xml +173 -0
- package/NEWS +55 -0
- package/README.md +39 -18
- package/lib/document-to-html.js +3 -0
- package/lib/documents.js +2 -0
- package/lib/docx/body-reader.js +74 -17
- package/lib/docx/numbering-xml.js +27 -4
- package/lib/index.d.ts +78 -0
- package/lib/index.js +7 -10
- package/lib/raw-text.js +14 -0
- package/lib/style-reader.js +15 -13
- package/lib/styles/document-matchers.js +1 -0
- package/lib/zipfile.js +26 -26
- package/mammoth.browser.js +10436 -19087
- package/mammoth.browser.min.js +21 -18
- package/package-lock.json +2654 -0
- package/package.json +11 -12
- package/test/document-to-html.tests.js +24 -0
- package/test/docx/body-reader.tests.js +170 -13
- package/test/docx/numbering-xml.tests.js +38 -0
- package/test/docx/style-map.tests.js +45 -44
- package/test/raw-text.tests.js +61 -0
- package/test/style-reader.tests.js +32 -25
- package/test/test-data/comments.docx +0 -0
- package/test/test-data/footnote-hyperlink.docx +0 -0
- package/test/test-data/footnotes.docx +0 -0
- package/test/test-data/simple-list.docx +0 -0
- package/test/test-data/single-paragraph.docx +0 -0
- package/test/test-data/strikethrough.docx +0 -0
- package/test/test-data/tables.docx +0 -0
- package/test/test-data/text-box.docx +0 -0
- package/test/test-data/tiny-picture.docx +0 -0
- package/test/test-data/underline.docx +0 -0
- package/test/zipfile.tests.js +12 -10
- package/.github/ISSUE_TEMPLATE.md +0 -12
- package/.travis.yml +0 -10
package/.eslintrc.json
CHANGED
|
@@ -52,7 +52,6 @@
|
|
|
52
52
|
"no-spaced-func": ["error"],
|
|
53
53
|
"no-trailing-spaces": ["error", {"skipBlankLines": true}],
|
|
54
54
|
"no-whitespace-before-property": ["error"],
|
|
55
|
-
"object-curly-spacing": ["error", "never"],
|
|
56
55
|
"one-var": ["error", "never"],
|
|
57
56
|
"semi": ["error", "always"],
|
|
58
57
|
"semi-spacing": ["error", {"before": false}],
|
package/.idea/mammoth.js.iml
CHANGED
|
@@ -1,11 +1,7 @@
|
|
|
1
1
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
2
|
<module type="WEB_MODULE" version="4">
|
|
3
3
|
<component name="NewModuleRootManager">
|
|
4
|
-
<content url="file://$MODULE_DIR$"
|
|
5
|
-
<excludeFolder url="file://$MODULE_DIR$/temp" />
|
|
6
|
-
<excludeFolder url="file://$MODULE_DIR$/.tmp" />
|
|
7
|
-
<excludeFolder url="file://$MODULE_DIR$/tmp" />
|
|
8
|
-
</content>
|
|
4
|
+
<content url="file://$MODULE_DIR$" />
|
|
9
5
|
<orderEntry type="inheritedJdk" />
|
|
10
6
|
<orderEntry type="sourceFolder" forTests="false" />
|
|
11
7
|
</component>
|
package/.idea/vcs.xml
CHANGED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<project version="4">
|
|
3
|
+
<component name="AutoImportSettings">
|
|
4
|
+
<option name="autoReloadType" value="SELECTIVE" />
|
|
5
|
+
</component>
|
|
6
|
+
<component name="ChangeListManager">
|
|
7
|
+
<list default="true" id="fb2a5579-0030-43eb-b358-b723f29ab6e1" name="Changes" comment="Don't ignore browser build and commit yarn.lock">
|
|
8
|
+
<change beforePath="$PROJECT_DIR$/package.json" beforeDir="false" afterPath="$PROJECT_DIR$/package.json" afterDir="false" />
|
|
9
|
+
<change beforePath="$PROJECT_DIR$/yarn.lock" beforeDir="false" afterPath="$PROJECT_DIR$/yarn.lock" afterDir="false" />
|
|
10
|
+
</list>
|
|
11
|
+
<option name="SHOW_DIALOG" value="false" />
|
|
12
|
+
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
|
13
|
+
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
|
14
|
+
<option name="LAST_RESOLUTION" value="IGNORE" />
|
|
15
|
+
</component>
|
|
16
|
+
<component name="EmbeddingIndexingInfo">
|
|
17
|
+
<option name="cachedIndexableFilesCount" value="119" />
|
|
18
|
+
<option name="fileBasedEmbeddingIndicesEnabled" value="true" />
|
|
19
|
+
</component>
|
|
20
|
+
<component name="Git.Settings">
|
|
21
|
+
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
|
|
22
|
+
</component>
|
|
23
|
+
<component name="McpProjectServerCommands">
|
|
24
|
+
<commands />
|
|
25
|
+
<urls />
|
|
26
|
+
</component>
|
|
27
|
+
<component name="ProjectColorInfo"><![CDATA[{
|
|
28
|
+
"associatedIndex": 8
|
|
29
|
+
}]]></component>
|
|
30
|
+
<component name="ProjectId" id="39nIvtlpBCXmNpPviWMaHuovjPb" />
|
|
31
|
+
<component name="ProjectViewState">
|
|
32
|
+
<option name="autoscrollFromSource" value="true" />
|
|
33
|
+
<option name="hideEmptyMiddlePackages" value="true" />
|
|
34
|
+
<option name="showLibraryContents" value="true" />
|
|
35
|
+
</component>
|
|
36
|
+
<component name="PropertiesComponent"><![CDATA[{
|
|
37
|
+
"keyToString": {
|
|
38
|
+
"ModuleVcsDetector.initialDetectionPerformed": "true",
|
|
39
|
+
"RunOnceActivity.MCP Project settings loaded": "true",
|
|
40
|
+
"RunOnceActivity.ShowReadmeOnStart": "true",
|
|
41
|
+
"RunOnceActivity.git.unshallow": "true",
|
|
42
|
+
"RunOnceActivity.typescript.service.memoryLimit.init": "true",
|
|
43
|
+
"com.intellij.ml.llm.matterhorn.ej.ui.settings.DefaultModelSelectionForGA.v1": "true",
|
|
44
|
+
"git-widget-placeholder": "Rebasing master",
|
|
45
|
+
"javascript.preferred.runtime.type.id": "node",
|
|
46
|
+
"junie.onboarding.icon.badge.shown": "true",
|
|
47
|
+
"last_opened_file_path": "/Users/sdiaz/work/mammoth.js",
|
|
48
|
+
"node.js.detected.package.eslint": "true",
|
|
49
|
+
"node.js.detected.package.tslint": "true",
|
|
50
|
+
"node.js.selected.package.eslint": "(autodetect)",
|
|
51
|
+
"node.js.selected.package.tslint": "(autodetect)",
|
|
52
|
+
"nodejs_package_manager_path": "yarn",
|
|
53
|
+
"to.speed.mode.migration.done": "true",
|
|
54
|
+
"vue.rearranger.settings.migration": "true"
|
|
55
|
+
}
|
|
56
|
+
}]]></component>
|
|
57
|
+
<component name="SharedIndexes">
|
|
58
|
+
<attachedChunks>
|
|
59
|
+
<set>
|
|
60
|
+
<option value="bundled-js-predefined-d6986cc7102b-9b0f141eb926-JavaScript-WS-253.30387.83" />
|
|
61
|
+
</set>
|
|
62
|
+
</attachedChunks>
|
|
63
|
+
</component>
|
|
64
|
+
<component name="TaskManager">
|
|
65
|
+
<task active="true" id="Default" summary="Default task">
|
|
66
|
+
<changelist id="fb2a5579-0030-43eb-b358-b723f29ab6e1" name="Changes" comment="" />
|
|
67
|
+
<created>1771325642160</created>
|
|
68
|
+
<option name="number" value="Default" />
|
|
69
|
+
<option name="presentableId" value="Default" />
|
|
70
|
+
<updated>1771325642160</updated>
|
|
71
|
+
<workItem from="1771325643308" duration="3144000" />
|
|
72
|
+
</task>
|
|
73
|
+
<task id="LOCAL-00001" summary="Don't ignore browser build and commit yarn.lock">
|
|
74
|
+
<option name="closed" value="true" />
|
|
75
|
+
<created>1771336874338</created>
|
|
76
|
+
<option name="number" value="00001" />
|
|
77
|
+
<option name="presentableId" value="LOCAL-00001" />
|
|
78
|
+
<option name="project" value="LOCAL" />
|
|
79
|
+
<updated>1771336874338</updated>
|
|
80
|
+
</task>
|
|
81
|
+
<task id="LOCAL-00002" summary="Publish package to npm registry">
|
|
82
|
+
<option name="closed" value="true" />
|
|
83
|
+
<created>1771337011188</created>
|
|
84
|
+
<option name="number" value="00002" />
|
|
85
|
+
<option name="presentableId" value="LOCAL-00002" />
|
|
86
|
+
<option name="project" value="LOCAL" />
|
|
87
|
+
<updated>1771337011188</updated>
|
|
88
|
+
</task>
|
|
89
|
+
<task id="LOCAL-00003" summary="Publish package to npm registry">
|
|
90
|
+
<option name="closed" value="true" />
|
|
91
|
+
<created>1771337081623</created>
|
|
92
|
+
<option name="number" value="00003" />
|
|
93
|
+
<option name="presentableId" value="LOCAL-00003" />
|
|
94
|
+
<option name="project" value="LOCAL" />
|
|
95
|
+
<updated>1771337081623</updated>
|
|
96
|
+
</task>
|
|
97
|
+
<task id="LOCAL-00004" summary="Don't ignore browser build and commit yarn.lock">
|
|
98
|
+
<option name="closed" value="true" />
|
|
99
|
+
<created>1771337138560</created>
|
|
100
|
+
<option name="number" value="00004" />
|
|
101
|
+
<option name="presentableId" value="LOCAL-00004" />
|
|
102
|
+
<option name="project" value="LOCAL" />
|
|
103
|
+
<updated>1771337138560</updated>
|
|
104
|
+
</task>
|
|
105
|
+
<task id="LOCAL-00005" summary="Don't ignore browser build and commit yarn.lock">
|
|
106
|
+
<option name="closed" value="true" />
|
|
107
|
+
<created>1771337146350</created>
|
|
108
|
+
<option name="number" value="00005" />
|
|
109
|
+
<option name="presentableId" value="LOCAL-00005" />
|
|
110
|
+
<option name="project" value="LOCAL" />
|
|
111
|
+
<updated>1771337146350</updated>
|
|
112
|
+
</task>
|
|
113
|
+
<task id="LOCAL-00006" summary="Don't ignore browser build and commit yarn.lock">
|
|
114
|
+
<option name="closed" value="true" />
|
|
115
|
+
<created>1771337192606</created>
|
|
116
|
+
<option name="number" value="00006" />
|
|
117
|
+
<option name="presentableId" value="LOCAL-00006" />
|
|
118
|
+
<option name="project" value="LOCAL" />
|
|
119
|
+
<updated>1771337192606</updated>
|
|
120
|
+
</task>
|
|
121
|
+
<option name="localTasksCounter" value="7" />
|
|
122
|
+
<servers />
|
|
123
|
+
</component>
|
|
124
|
+
<component name="TypeScriptGeneratedFilesManager">
|
|
125
|
+
<option name="version" value="3" />
|
|
126
|
+
</component>
|
|
127
|
+
<component name="Vcs.Log.Tabs.Properties">
|
|
128
|
+
<option name="RECENT_FILTERS">
|
|
129
|
+
<map>
|
|
130
|
+
<entry key="Branch">
|
|
131
|
+
<value>
|
|
132
|
+
<list>
|
|
133
|
+
<RecentGroup>
|
|
134
|
+
<option name="FILTER_VALUES">
|
|
135
|
+
<option value="HEAD" />
|
|
136
|
+
</option>
|
|
137
|
+
</RecentGroup>
|
|
138
|
+
</list>
|
|
139
|
+
</value>
|
|
140
|
+
</entry>
|
|
141
|
+
</map>
|
|
142
|
+
</option>
|
|
143
|
+
<option name="TAB_STATES">
|
|
144
|
+
<map>
|
|
145
|
+
<entry key="MAIN">
|
|
146
|
+
<value>
|
|
147
|
+
<State>
|
|
148
|
+
<option name="FILTERS">
|
|
149
|
+
<map>
|
|
150
|
+
<entry key="branch">
|
|
151
|
+
<value>
|
|
152
|
+
<list>
|
|
153
|
+
<option value="HEAD" />
|
|
154
|
+
</list>
|
|
155
|
+
</value>
|
|
156
|
+
</entry>
|
|
157
|
+
</map>
|
|
158
|
+
</option>
|
|
159
|
+
</State>
|
|
160
|
+
</value>
|
|
161
|
+
</entry>
|
|
162
|
+
</map>
|
|
163
|
+
</option>
|
|
164
|
+
</component>
|
|
165
|
+
<component name="VcsManagerConfiguration">
|
|
166
|
+
<option name="CHECK_CODE_SMELLS_BEFORE_PROJECT_COMMIT" value="false" />
|
|
167
|
+
<option name="CHECK_NEW_TODO" value="false" />
|
|
168
|
+
<MESSAGE value="Publish package to npm registry" />
|
|
169
|
+
<MESSAGE value="Dynamic styleMaps for paragraphs # Conflicts: #	mammoth.browser.js #	mammoth.browser.min.js" />
|
|
170
|
+
<MESSAGE value="Don't ignore browser build and commit yarn.lock" />
|
|
171
|
+
<option name="LAST_COMMIT_MESSAGE" value="Don't ignore browser build and commit yarn.lock" />
|
|
172
|
+
</component>
|
|
173
|
+
</project>
|
package/NEWS
CHANGED
|
@@ -1,3 +1,58 @@
|
|
|
1
|
+
# 1.4.21
|
|
2
|
+
|
|
3
|
+
* Ignore w:u elements when w:val is missing.
|
|
4
|
+
|
|
5
|
+
# 1.4.20
|
|
6
|
+
|
|
7
|
+
* Emit warning instead of throwing exception when image file cannot be found for
|
|
8
|
+
a:blip elements.
|
|
9
|
+
|
|
10
|
+
# 1.4.19
|
|
11
|
+
|
|
12
|
+
* Add TypeScript declarations.
|
|
13
|
+
|
|
14
|
+
# 1.4.18
|
|
15
|
+
|
|
16
|
+
* When extracting raw text, convert tab elements to tab characters.
|
|
17
|
+
|
|
18
|
+
* Handle internal hyperlinks created with complex fields.
|
|
19
|
+
|
|
20
|
+
* Update JSZip to 3.2.0. This addresses CVE-2021-23413 in JSZip.
|
|
21
|
+
|
|
22
|
+
# 1.4.17
|
|
23
|
+
|
|
24
|
+
* Handle w:num with invalid w:abstractNumId.
|
|
25
|
+
* Update underscore to 1.13.1.
|
|
26
|
+
|
|
27
|
+
# 1.4.16
|
|
28
|
+
|
|
29
|
+
* Convert symbols in supported fonts to corresponding Unicode characters.
|
|
30
|
+
|
|
31
|
+
# 1.4.15
|
|
32
|
+
|
|
33
|
+
* Support numbering defined by paragraph style.
|
|
34
|
+
|
|
35
|
+
# 1.4.14
|
|
36
|
+
|
|
37
|
+
* Add style mapping for all caps.
|
|
38
|
+
|
|
39
|
+
# 1.4.13
|
|
40
|
+
|
|
41
|
+
* Use package-lock.json instead of npm-shrinkwrap.json.
|
|
42
|
+
|
|
43
|
+
# 1.4.12
|
|
44
|
+
|
|
45
|
+
* Handle underline elements where w:val is "none".
|
|
46
|
+
|
|
47
|
+
# 1.4.11
|
|
48
|
+
|
|
49
|
+
* Re-publishing to remove superfluous files.
|
|
50
|
+
|
|
51
|
+
# 1.4.10
|
|
52
|
+
|
|
53
|
+
* Read font size for runs.
|
|
54
|
+
* Support soft hyphens.
|
|
55
|
+
|
|
1
56
|
# 1.4.9
|
|
2
57
|
|
|
3
58
|
* Allow hyperlinks to be collapsed.
|
package/README.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Mammoth .docx to HTML converter
|
|
2
2
|
|
|
3
3
|
Mammoth is designed to convert .docx documents,
|
|
4
|
-
such as those created by Microsoft Word,
|
|
4
|
+
such as those created by Microsoft Word, Google Docs and LibreOffice,
|
|
5
5
|
and convert them to HTML.
|
|
6
6
|
Mammoth aims to produce simple and clean HTML by using semantic information in the document,
|
|
7
7
|
and ignoring other details.
|
|
@@ -104,18 +104,19 @@ Where `custom-style-map` looks something like:
|
|
|
104
104
|
p[style-name='Aside Heading'] => div.aside > h2:fresh
|
|
105
105
|
p[style-name='Aside Text'] => div.aside > p:fresh
|
|
106
106
|
|
|
107
|
-
|
|
107
|
+
A description of the syntax for style maps can be found in the section ["Writing style maps"](#writing-style-maps).
|
|
108
108
|
|
|
109
109
|
#### Markdown
|
|
110
110
|
|
|
111
|
+
Markdown support is deprecated.
|
|
112
|
+
Generating HTML and using a separate library to convert the HTML to Markdown is recommended,
|
|
113
|
+
and is likely to produce better results.
|
|
114
|
+
|
|
111
115
|
Using `--output-format=markdown` will cause Markdown to be generated.
|
|
112
116
|
For instance:
|
|
113
117
|
|
|
114
118
|
mammoth document.docx --output-format=markdown
|
|
115
119
|
|
|
116
|
-
Markdown support is still in its early stages,
|
|
117
|
-
so you may find some features are unsupported.
|
|
118
|
-
|
|
119
120
|
### Library
|
|
120
121
|
|
|
121
122
|
In node.js, mammoth can be required in the usual way:
|
|
@@ -167,7 +168,7 @@ Mammoth maps some common .docx styles to HTML elements.
|
|
|
167
168
|
For instance,
|
|
168
169
|
a paragraph with the style name `Heading 1` is converted to a `h1` element.
|
|
169
170
|
You can pass in a custom map for styles by passing an options object with a `styleMap` property as a second argument to `convertToHtml`.
|
|
170
|
-
A description of the syntax for style maps can be found in the section "Writing style maps".
|
|
171
|
+
A description of the syntax for style maps can be found in the section ["Writing style maps"](#writing-style-maps).
|
|
171
172
|
For instance, if paragraphs with the style name `Section Title` should be converted to `h1` elements,
|
|
172
173
|
and paragraphs with the style name `Subsection Title` should be converted to `h2` elements:
|
|
173
174
|
|
|
@@ -343,7 +344,7 @@ Converts the source document to HTML.
|
|
|
343
344
|
ignoring blank lines and lines starting with `#`:
|
|
344
345
|
If `options.styleMap` is an array,
|
|
345
346
|
each element is expected to be a string representing a single style mapping.
|
|
346
|
-
See "Writing style maps" for a reference to the syntax for style maps.
|
|
347
|
+
See ["Writing style maps"](#writing-style-maps) for a reference to the syntax for style maps.
|
|
347
348
|
|
|
348
349
|
* `includeEmbeddedStyleMap`: by default,
|
|
349
350
|
if the document contains an embedded style map, then it is combined with the default style map.
|
|
@@ -543,10 +544,12 @@ var options = {
|
|
|
543
544
|
Or if you want paragraphs that have been explicitly set to use monospace fonts to represent code:
|
|
544
545
|
|
|
545
546
|
```javascript
|
|
547
|
+
const monospaceFonts = ["consolas", "courier", "courier new"];
|
|
548
|
+
|
|
546
549
|
function transformParagraph(paragraph) {
|
|
547
550
|
var runs = mammoth.transforms.getDescendantsOfType(paragraph, "run");
|
|
548
551
|
var isMatch = runs.length > 0 && runs.every(function(run) {
|
|
549
|
-
return run.font &&
|
|
552
|
+
return run.font && monospaceFonts.indexOf(run.font.toLowerCase()) !== -1;
|
|
550
553
|
});
|
|
551
554
|
if (isMatch) {
|
|
552
555
|
return {
|
|
@@ -624,7 +627,7 @@ You can specify this by using the `:fresh` modifier:
|
|
|
624
627
|
|
|
625
628
|
`p[style-name='Heading 1'] => h1:fresh`
|
|
626
629
|
|
|
627
|
-
The two
|
|
630
|
+
The two consecutive `Heading 1` .docx paragraphs will then be converted to two separate `h1` elements.
|
|
628
631
|
|
|
629
632
|
Reusing elements is useful in generating more complicated HTML structures.
|
|
630
633
|
For instance, suppose your .docx contains asides.
|
|
@@ -725,6 +728,17 @@ strike
|
|
|
725
728
|
Note that this matches text that has had strikethrough explicitly applied to it.
|
|
726
729
|
It will not match any text that is struckthrough because of its paragraph or run style.
|
|
727
730
|
|
|
731
|
+
#### All caps
|
|
732
|
+
|
|
733
|
+
Match explicitly all caps text:
|
|
734
|
+
|
|
735
|
+
```
|
|
736
|
+
all-caps
|
|
737
|
+
```
|
|
738
|
+
|
|
739
|
+
Note that this matches text that has had all caps explicitly applied to it.
|
|
740
|
+
It will not match any text that is all caps because of its paragraph or run style.
|
|
741
|
+
|
|
728
742
|
#### Small caps
|
|
729
743
|
|
|
730
744
|
Match explicitly small caps text:
|
|
@@ -736,6 +750,15 @@ small-caps
|
|
|
736
750
|
Note that this matches text that has had small caps explicitly applied to it.
|
|
737
751
|
It will not match any text that is small caps because of its paragraph or run style.
|
|
738
752
|
|
|
753
|
+
#### Ignoring document elements
|
|
754
|
+
|
|
755
|
+
Use `!` to ignore a document element.
|
|
756
|
+
For instance, to ignore any paragraph with the style `Comment`:
|
|
757
|
+
|
|
758
|
+
```
|
|
759
|
+
p[style-name='Comment'] => !
|
|
760
|
+
```
|
|
761
|
+
|
|
739
762
|
### HTML paths
|
|
740
763
|
|
|
741
764
|
#### Single elements
|
|
@@ -798,15 +821,6 @@ div.aside > h2
|
|
|
798
821
|
|
|
799
822
|
You can nest elements to any depth.
|
|
800
823
|
|
|
801
|
-
#### Ignoring document elements
|
|
802
|
-
|
|
803
|
-
Use `!` to ignore a document element.
|
|
804
|
-
For instance, to ignore any paragraph with the style `Comment`:
|
|
805
|
-
|
|
806
|
-
```
|
|
807
|
-
p[style-name='Comment'] => !
|
|
808
|
-
```
|
|
809
|
-
|
|
810
824
|
## Upgrading to later versions
|
|
811
825
|
|
|
812
826
|
### 1.0.0
|
|
@@ -881,3 +895,10 @@ Thanks to the following people for their contributions to Mammoth:
|
|
|
881
895
|
* [Jacob Wang](https://github.com/jaceyshome)
|
|
882
896
|
|
|
883
897
|
* Supporting styles defined without names
|
|
898
|
+
|
|
899
|
+
## Donations
|
|
900
|
+
|
|
901
|
+
If you'd like to say thanks, feel free to [make a donation through Ko-fi](https://ko-fi.com/S6S01MG20).
|
|
902
|
+
|
|
903
|
+
If you use Mammoth as part of your business,
|
|
904
|
+
please consider supporting the ongoing maintenance of Mammoth by [making a weekly donation through Liberapay](https://liberapay.com/mwilliamson/donate).
|
package/lib/document-to-html.js
CHANGED
|
@@ -130,6 +130,9 @@ function DocumentConversion(options, comments) {
|
|
|
130
130
|
if (run.isSmallCaps) {
|
|
131
131
|
paths.push(findHtmlPathForRunProperty("smallCaps"));
|
|
132
132
|
}
|
|
133
|
+
if (run.isAllCaps) {
|
|
134
|
+
paths.push(findHtmlPathForRunProperty("allCaps"));
|
|
135
|
+
}
|
|
133
136
|
if (run.isStrikethrough) {
|
|
134
137
|
paths.push(findHtmlPathForRunProperty("strikethrough", "s"));
|
|
135
138
|
}
|
package/lib/documents.js
CHANGED
|
@@ -73,9 +73,11 @@ function Run(children, properties) {
|
|
|
73
73
|
isUnderline: properties.isUnderline,
|
|
74
74
|
isItalic: properties.isItalic,
|
|
75
75
|
isStrikethrough: properties.isStrikethrough,
|
|
76
|
+
isAllCaps: properties.isAllCaps,
|
|
76
77
|
isSmallCaps: properties.isSmallCaps,
|
|
77
78
|
verticalAlignment: properties.verticalAlignment || verticalAlignment.baseline,
|
|
78
79
|
font: properties.font || null,
|
|
80
|
+
fontSize: properties.fontSize || null,
|
|
79
81
|
size: properties.size || null,
|
|
80
82
|
color: properties.color || null,
|
|
81
83
|
highlight: properties.highlight || null
|
package/lib/docx/body-reader.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
exports.createBodyReader = createBodyReader;
|
|
2
2
|
exports._readNumberingProperties = readNumberingProperties;
|
|
3
3
|
|
|
4
|
+
var dingbatToUnicode = require("dingbat-to-unicode");
|
|
4
5
|
var _ = require("underscore");
|
|
5
6
|
|
|
6
7
|
var documents = require("../documents");
|
|
@@ -85,17 +86,23 @@ function BodyReader(options) {
|
|
|
85
86
|
|
|
86
87
|
function readRunProperties(element) {
|
|
87
88
|
return readRunStyle(element).map(function(style) {
|
|
89
|
+
var fontSizeString = element.firstOrEmpty("w:sz").attributes["w:val"];
|
|
90
|
+
// w:sz gives the font size in half points, so halve the value to get the size in points
|
|
91
|
+
var fontSize = /^[0-9]+$/.test(fontSizeString) ? parseInt(fontSizeString, 10) / 2 : null;
|
|
92
|
+
|
|
88
93
|
return {
|
|
89
94
|
type: "runProperties",
|
|
90
95
|
styleId: style.styleId,
|
|
91
96
|
styleName: style.name,
|
|
92
97
|
verticalAlignment: element.firstOrEmpty("w:vertAlign").attributes["w:val"],
|
|
93
98
|
font: element.firstOrEmpty("w:rFonts").attributes["w:ascii"],
|
|
99
|
+
fontSize: fontSize,
|
|
94
100
|
size: element.firstOrEmpty("w:sz").attributes["w:val"],
|
|
95
101
|
isBold: readBooleanElement(element.first("w:b")),
|
|
96
|
-
isUnderline:
|
|
102
|
+
isUnderline: readUnderline(element.first("w:u")),
|
|
97
103
|
isItalic: readBooleanElement(element.first("w:i")),
|
|
98
104
|
isStrikethrough: readBooleanElement(element.first("w:strike")),
|
|
105
|
+
isAllCaps: readBooleanElement(element.first("w:caps")),
|
|
99
106
|
isSmallCaps: readBooleanElement(element.first("w:smallCaps")),
|
|
100
107
|
color: element.firstOrEmpty("w:color").attributes["w:val"],
|
|
101
108
|
highlight: element.firstOrEmpty("w:highlight").attributes["w:val"]
|
|
@@ -103,6 +110,15 @@ function BodyReader(options) {
|
|
|
103
110
|
});
|
|
104
111
|
}
|
|
105
112
|
|
|
113
|
+
function readUnderline(element) {
|
|
114
|
+
if (element) {
|
|
115
|
+
var value = element.attributes["w:val"];
|
|
116
|
+
return value !== undefined && value !== "false" && value !== "0" && value !== "none";
|
|
117
|
+
} else {
|
|
118
|
+
return false;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
106
122
|
function readBooleanElement(element) {
|
|
107
123
|
if (element) {
|
|
108
124
|
var value = element.attributes["w:val"];
|
|
@@ -153,28 +169,33 @@ function BodyReader(options) {
|
|
|
153
169
|
} else if (type === "end") {
|
|
154
170
|
complexFieldStack.pop();
|
|
155
171
|
} else if (type === "separate") {
|
|
156
|
-
var
|
|
157
|
-
var complexField =
|
|
172
|
+
var hyperlinkOptions = parseHyperlinkFieldCode(currentInstrText.join(''));
|
|
173
|
+
var complexField = hyperlinkOptions === null ? unknownComplexField : {type: "hyperlink", options: hyperlinkOptions};
|
|
158
174
|
complexFieldStack.pop();
|
|
159
175
|
complexFieldStack.push(complexField);
|
|
160
176
|
}
|
|
161
177
|
return emptyResult();
|
|
162
178
|
}
|
|
163
179
|
|
|
164
|
-
function
|
|
180
|
+
function currentHyperlinkOptions() {
|
|
165
181
|
var topHyperlink = _.last(complexFieldStack.filter(function(complexField) {
|
|
166
182
|
return complexField.type === "hyperlink";
|
|
167
183
|
}));
|
|
168
|
-
return topHyperlink ? topHyperlink.
|
|
184
|
+
return topHyperlink ? topHyperlink.options : null;
|
|
169
185
|
}
|
|
170
186
|
|
|
171
187
|
function parseHyperlinkFieldCode(code) {
|
|
172
|
-
var
|
|
173
|
-
if (
|
|
174
|
-
return
|
|
175
|
-
}
|
|
176
|
-
|
|
188
|
+
var externalLinkResult = /\s*HYPERLINK "(.*)"/.exec(code);
|
|
189
|
+
if (externalLinkResult) {
|
|
190
|
+
return {href: externalLinkResult[1]};
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
var internalLinkResult = /\s*HYPERLINK\s+\\l\s+"(.*)"/.exec(code);
|
|
194
|
+
if (internalLinkResult) {
|
|
195
|
+
return {anchor: internalLinkResult[1]};
|
|
177
196
|
}
|
|
197
|
+
|
|
198
|
+
return null;
|
|
178
199
|
}
|
|
179
200
|
|
|
180
201
|
function readInstrText(element) {
|
|
@@ -182,6 +203,24 @@ function BodyReader(options) {
|
|
|
182
203
|
return emptyResult();
|
|
183
204
|
}
|
|
184
205
|
|
|
206
|
+
function readSymbol(element) {
|
|
207
|
+
// See 17.3.3.30 sym (Symbol Character) of ECMA-376 4th edition Part 1
|
|
208
|
+
var font = element.attributes["w:font"];
|
|
209
|
+
var char = element.attributes["w:char"];
|
|
210
|
+
var unicodeCharacter = dingbatToUnicode.hex(font, char);
|
|
211
|
+
if (unicodeCharacter == null && /^F0..$/.test(char)) {
|
|
212
|
+
unicodeCharacter = dingbatToUnicode.hex(font, char.substring(2));
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
if (unicodeCharacter == null) {
|
|
216
|
+
return emptyResultWithMessages([warning(
|
|
217
|
+
"A w:sym element with an unsupported character was ignored: char " + char + " in font " + font
|
|
218
|
+
)]);
|
|
219
|
+
} else {
|
|
220
|
+
return elementResult(new documents.Text(unicodeCharacter.string));
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
185
224
|
function noteReferenceReader(noteType) {
|
|
186
225
|
return function(element) {
|
|
187
226
|
var noteId = element.attributes["w:id"];
|
|
@@ -221,7 +260,7 @@ function BodyReader(options) {
|
|
|
221
260
|
styleId: style.styleId,
|
|
222
261
|
styleName: style.name,
|
|
223
262
|
alignment: element.firstOrEmpty("w:jc").attributes["w:val"],
|
|
224
|
-
numbering: readNumberingProperties(element.firstOrEmpty("w:numPr"), numbering),
|
|
263
|
+
numbering: readNumberingProperties(style.styleId, element.firstOrEmpty("w:numPr"), numbering),
|
|
225
264
|
indent: readParagraphIndent(element.firstOrEmpty("w:ind")),
|
|
226
265
|
spacing: readParagraphSpacing(element.firstOrEmpty("w:spacing")),
|
|
227
266
|
border: readParagraphBorders(element.firstOrEmpty("w:pBdr"))
|
|
@@ -234,9 +273,9 @@ function BodyReader(options) {
|
|
|
234
273
|
var properties = _.find(children, isRunProperties);
|
|
235
274
|
children = children.filter(negate(isRunProperties));
|
|
236
275
|
|
|
237
|
-
var
|
|
238
|
-
if (
|
|
239
|
-
children = [new documents.Hyperlink(children,
|
|
276
|
+
var hyperlinkOptions = currentHyperlinkOptions();
|
|
277
|
+
if (hyperlinkOptions !== null) {
|
|
278
|
+
children = [new documents.Hyperlink(children, hyperlinkOptions)];
|
|
240
279
|
}
|
|
241
280
|
|
|
242
281
|
return new documents.Run(children, properties);
|
|
@@ -254,6 +293,10 @@ function BodyReader(options) {
|
|
|
254
293
|
"w:noBreakHyphen": function() {
|
|
255
294
|
return elementResult(new documents.Text("\u2011"));
|
|
256
295
|
},
|
|
296
|
+
"w:softHyphen": function(element) {
|
|
297
|
+
return elementResult(new documents.Text("\u00AD"));
|
|
298
|
+
},
|
|
299
|
+
"w:sym": readSymbol,
|
|
257
300
|
"w:hyperlink": function(element) {
|
|
258
301
|
var relationshipId = element.attributes["r:id"];
|
|
259
302
|
var anchor = element.attributes["w:anchor"];
|
|
@@ -451,7 +494,12 @@ function BodyReader(options) {
|
|
|
451
494
|
function readBlip(element, blip) {
|
|
452
495
|
var properties = element.first("wp:docPr").attributes;
|
|
453
496
|
var altText = isBlank(properties.descr) ? properties.title : properties.descr;
|
|
454
|
-
|
|
497
|
+
var blipImageFile = findBlipImageFile(blip);
|
|
498
|
+
if (blipImageFile === null) {
|
|
499
|
+
return emptyResultWithMessages([warning("Could not find image file for a:blip element")]);
|
|
500
|
+
} else {
|
|
501
|
+
return readImage(blipImageFile, altText);
|
|
502
|
+
}
|
|
455
503
|
}
|
|
456
504
|
|
|
457
505
|
function isBlank(value) {
|
|
@@ -463,12 +511,14 @@ function BodyReader(options) {
|
|
|
463
511
|
var linkRelationshipId = blip.attributes["r:link"];
|
|
464
512
|
if (embedRelationshipId) {
|
|
465
513
|
return findEmbeddedImageFile(embedRelationshipId);
|
|
466
|
-
} else {
|
|
514
|
+
} else if (linkRelationshipId) {
|
|
467
515
|
var imagePath = relationships.findTargetByRelationshipId(linkRelationshipId);
|
|
468
516
|
return {
|
|
469
517
|
path: imagePath,
|
|
470
518
|
read: files.read.bind(files, imagePath)
|
|
471
519
|
};
|
|
520
|
+
} else {
|
|
521
|
+
return null;
|
|
472
522
|
}
|
|
473
523
|
}
|
|
474
524
|
|
|
@@ -512,7 +562,14 @@ function BodyReader(options) {
|
|
|
512
562
|
}
|
|
513
563
|
|
|
514
564
|
|
|
515
|
-
function readNumberingProperties(element, numbering) {
|
|
565
|
+
function readNumberingProperties(styleId, element, numbering) {
|
|
566
|
+
if (styleId != null) {
|
|
567
|
+
var levelByStyleId = numbering.findLevelByParagraphStyleId(styleId);
|
|
568
|
+
if (levelByStyleId != null) {
|
|
569
|
+
return levelByStyleId;
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
|
|
516
573
|
var level = element.firstOrEmpty("w:ilvl").attributes["w:val"];
|
|
517
574
|
var numId = element.firstOrEmpty("w:numId").attributes["w:val"];
|
|
518
575
|
if (level === undefined || numId === undefined) {
|