html2doc 1.4.2.1 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,878 +0,0 @@
1
- require "base64"
2
-
3
- def html_input(xml)
4
- <<~HTML
5
- <html><head><title>blank</title>
6
- <meta name="Originator" content="Me"/>
7
- </head>
8
- <body>
9
- #{xml}
10
- </body></html>
11
- HTML
12
- end
13
-
14
- def html_input_no_title(xml)
15
- <<~HTML
16
- <html><head>
17
- <meta name="Originator" content="Me"/>
18
- </head>
19
- <body>
20
- #{xml}
21
- </body></html>
22
- HTML
23
- end
24
-
25
- def html_input_empty_head(xml)
26
- <<~HTML
27
- <html><head></head>
28
- <body>
29
- #{xml}
30
- </body></html>
31
- HTML
32
- end
33
-
34
- WORD_HDR = <<~HDR.freeze
35
- MIME-Version: 1.0
36
- Content-Type: multipart/related; boundary="----=_NextPart_"
37
-
38
- ------=_NextPart_
39
- Content-ID: <test.htm>
40
- Content-Disposition: inline; filename="test.htm"
41
- Content-Type: text/html; charset="utf-8"
42
-
43
- <?xml version="1.0"?>
44
- <html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head>
45
- <xml>
46
- <w:WordDocument>
47
- <w:View>Print</w:View>
48
- <w:Zoom>100</w:Zoom>
49
- <w:DoNotOptimizeForBrowser/>
50
- </w:WordDocument>
51
- </xml>
52
- <meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
53
-
54
- <link rel=File-List href="cid:filelist.xml"/>
55
- <title>blank</title><style><![CDATA[
56
- <!--
57
- HDR
58
-
59
- WORD_HDR_END = <<~HDR.freeze
60
- -->
61
- ]]></style>
62
- <meta name="Originator" content="Me"/>
63
- </head>
64
- HDR
65
-
66
- def word_body(xml, footnote)
67
- <<~BODY
68
- <body>
69
- #{xml}
70
- #{footnote}</body></html>
71
- BODY
72
- end
73
-
74
- WORD_FTR1 = <<~FTR.freeze
75
- ------=_NextPart_
76
- Content-ID: <filelist.xml>
77
- Content-Disposition: inline; filename="filelist.xml"
78
- Content-Transfer-Encoding: base64
79
- Content-Type: #{Html2Doc.new({}).mime_type('filelist.xml')}
80
-
81
- PHhtbCB4bWxuczpvPSJ1cm46c2NoZW1hcy1taWNyb3NvZnQtY29tOm9mZmljZTpvZmZpY2UiPgog
82
- ICAgICAgIDxvOk1haW5GaWxlIEhSZWY9Ii4uL3Rlc3QuaHRtIi8+ICA8bzpGaWxlIEhSZWY9ImZp
83
- bGVsaXN0LnhtbCIvPgo8L3htbD4K
84
-
85
- ------=_NextPart_--
86
- FTR
87
-
88
- WORD_FTR2 = <<~FTR.freeze
89
- ------=_NextPart_
90
- Content-ID: <filelist.xml>
91
- Content-Disposition: inline; filename="filelist.xml"
92
- Content-Transfer-Encoding: base64
93
- Content-Type: #{Html2Doc.new({}).mime_type('filelist.xml')}
94
- PHhtbCB4bWxuczpvPSJ1cm46c2NoZW1hcy1taWNyb3NvZnQtY29tOm9mZmljZTpvZmZpY2UiPgog
95
- ICAgICAgIDxvOk1haW5GaWxlIEhSZWY9Ii4uL3Rlc3QuaHRtIi8+ICA8bzpGaWxlIEhSZWY9ImZp
96
- bGVsaXN0LnhtbCIvPgogIDxvOkZpbGUgSFJlZj0iaGVhZGVyLmh0bWwiLz4KPC94bWw+Cg==
97
- ------=_NextPart_
98
- FTR
99
-
100
- WORD_FTR3 = <<~FTR.freeze
101
- ------=_NextPart_
102
- Content-ID: <filelist.xml>
103
- Content-Disposition: inline; filename="filelist.xml"
104
- Content-Transfer-Encoding: base64
105
- Content-Type: #{Html2Doc.new({}).mime_type('filelist.xml')}
106
-
107
- PHhtbCB4bWxuczpvPSJ1cm46c2NoZW1hcy1taWNyb3NvZnQtY29tOm9mZmljZTpvZmZpY2UiPgog
108
- ICAgICAgIDxvOk1haW5GaWxlIEhSZWY9Ii4uL3Rlc3QuaHRtIi8+ICA8bzpGaWxlIEhSZWY9IjFh
109
- YzIwNjVmLTAzZjAtNGM3YS1iOWE2LTkyZTgyMDU5MWJmMC5wbmciLz4KICA8bzpGaWxlIEhSZWY9
110
- ImZpbGVsaXN0LnhtbCIvPgo8L3htbD4K
111
- ------=_NextPart_
112
- Content-ID: <cb7b0d19-891e-4634-815a-570d019d454c.png>
113
- Content-Disposition: inline; filename="cb7b0d19-891e-4634-815a-570d019d454c.png"
114
- Content-Transfer-Encoding: base64
115
- Content-Type: image/png
116
- ------=_NextPart_--
117
- FTR
118
-
119
- HEADERHTML = <<~FTR.freeze
120
- <html xmlns:v="urn:schemas-microsoft-com:vml"
121
- xmlns:o="urn:schemas-microsoft-com:office:office"
122
- xmlns:w="urn:schemas-microsoft-com:office:word"
123
- xmlns:m="http://schemas.microsoft.com/office/2004/12/omml"
124
- xmlns:mv="http://macVmlSchemaUri" xmlns="http://www.w3.org/TR/REC-html40">
125
- <head>
126
- <meta name=Title content="">
127
- <meta name=Keywords content="">
128
- <meta http-equiv=Content-Type content="text/html; charset=utf-8">
129
- <meta name=ProgId content=Word.Document>
130
- <meta name=Generator content="Microsoft Word 15">
131
- <meta name=Originator content="Microsoft Word 15">
132
- <link id=Main-File rel=Main-File href="FILENAME.html">
133
- <!--[if gte mso 9]><xml>
134
- <o:shapedefaults v:ext="edit" spidmax="2049"/>
135
- </xml><![endif]-->
136
- </head>
137
- <body lang=EN link=blue vlink="#954F72">
138
- <div style='mso-element:footnote-separator' id=fs>
139
- <p class=MsoNormal style='margin-bottom:0cm;margin-bottom:.0001pt;line-height:
140
- normal'><span lang=EN-GB><span style='mso-special-character:footnote-separator'><![if !supportFootnotes]>
141
- <hr align=left size=1 width="33%">
142
- <![endif]></span></span></p>
143
- </div>
144
- <div style='mso-element:footnote-continuation-separator' id=fcs>
145
- <p class=MsoNormal style='margin-bottom:0cm;margin-bottom:.0001pt;line-height:
146
- normal'><span lang=EN-GB><span style='mso-special-character:footnote-continuation-separator'><![if !supportFootnotes]>
147
- <hr align=left size=1>
148
- <![endif]></span></span></p>
149
- </div>
150
- <div style='mso-element:endnote-separator' id=es>
151
- <p class=MsoNormal style='margin-bottom:0cm;margin-bottom:.0001pt;line-height:
152
- normal'><span lang=EN-GB><span style='mso-special-character:footnote-separator'><![if !supportFootnotes]>
153
- <hr align=left size=1 width="33%">
154
- <![endif]></span></span></p>
155
- </div>
156
- <div style='mso-element:endnote-continuation-separator' id=ecs>
157
- <p class=MsoNormal style='margin-bottom:0cm;margin-bottom:.0001pt;line-height:
158
- normal'><span lang=EN-GB><span style='mso-special-character:footnote-continuation-separator'><![if !supportFootnotes]>
159
- <hr align=left size=1>
160
- <![endif]></span></span></p>
161
- </div>
162
- <div style='mso-element:header' id=eh1>
163
- <p class=MsoHeader align=left style='text-align:left;line-height:12.0pt;
164
- mso-line-height-rule:exactly'><span lang=EN-GB>ISO/IEC&#x26;nbsp;CD 17301-1:2016(E)</span></p>
165
- </div>
166
- <div style='mso-element:header' id=h1>
167
- <p class=MsoHeader style='margin-bottom:18.0pt'><span lang=EN-GB
168
- style='font-size:10.0pt;mso-bidi-font-size:11.0pt;font-weight:normal'>&#xa9;
169
- ISO/IEC&#x26;nbsp;2016&#x26;nbsp;&#x2013; All rights reserved</span><span lang=EN-GB
170
- style='font-weight:normal'><o:p></o:p></span></p>
171
- </div>
172
- <div style='mso-element:footer' id=ef1>
173
- <p class=MsoFooter style='margin-top:12.0pt;line-height:12.0pt;mso-line-height-rule:
174
- exactly'><!--[if supportFields]><b style='mso-bidi-font-weight:normal'><span
175
- lang=EN-GB style='font-size:10.0pt;mso-bidi-font-size:11.0pt'><span
176
- style='mso-element:field-begin'></span><span
177
- style='mso-spacerun:yes'>&#xa0;</span>PAGE<span style='mso-spacerun:yes'>&#xa0;&#xa0;
178
- </span>\\* MERGEFORMAT <span style='mso-element:field-separator'></span></span></b><![endif]--><b
179
- style='mso-bidi-font-weight:normal'><span lang=EN-GB style='font-size:10.0pt;
180
- mso-bidi-font-size:11.0pt'><span style='mso-no-proof:yes'>2</span></span></b><!--[if supportFields]><b
181
- style='mso-bidi-font-weight:normal'><span lang=EN-GB style='font-size:10.0pt;
182
- mso-bidi-font-size:11.0pt'><span style='mso-element:field-end'></span></span></b><![endif]--><span
183
- lang=EN-GB style='font-size:10.0pt;mso-bidi-font-size:11.0pt'><span
184
- style='mso-tab-count:1'>&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0; </span>&#xa9;
185
- ISO/IEC&#x26;nbsp;2016&#x26;nbsp;&#x2013; All rights reserved<o:p></o:p></span></p>
186
- </div>
187
- <div style='mso-element:header' id=eh2>
188
- <p class=MsoHeader align=left style='text-align:left;line-height:12.0pt;
189
- mso-line-height-rule:exactly'><span lang=EN-GB>ISO/IEC&#x26;nbsp;CD 17301-1:2016(E)</span></p>
190
- </div>
191
- <div style='mso-element:header' id=h2>
192
- <p class=MsoHeader align=right style='text-align:right;line-height:12.0pt;
193
- mso-line-height-rule:exactly'><span lang=EN-GB>ISO/IEC&#x26;nbsp;CD 17301-1:2016(E)</span></p>
194
- </div>
195
- <div style='mso-element:footer' id=ef2>
196
- <p class=MsoFooter style='line-height:12.0pt;mso-line-height-rule:exactly'><!--[if supportFields]><span
197
- lang=EN-GB style='font-size:10.0pt;mso-bidi-font-size:11.0pt'><span
198
- style='mso-element:field-begin'></span><span
199
- style='mso-spacerun:yes'>&#xa0;</span>PAGE<span style='mso-spacerun:yes'>&#xa0;&#xa0;
200
- </span>\\* MERGEFORMAT <span style='mso-element:field-separator'></span></span><![endif]--><span
201
- lang=EN-GB style='font-size:10.0pt;mso-bidi-font-size:11.0pt'><span
202
- style='mso-no-proof:yes'>ii</span></span><!--[if supportFields]><span
203
- lang=EN-GB style='font-size:10.0pt;mso-bidi-font-size:11.0pt'><span
204
- style='mso-element:field-end'></span></span><![endif]--><span lang=EN-GB
205
- style='font-size:10.0pt;mso-bidi-font-size:11.0pt'><span style='mso-tab-count:
206
- 1'>&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0; </span>&#xa9;
207
- ISO/IEC&#x26;nbsp;2016&#x26;nbsp;&#x2013; All rights reserved<o:p></o:p></span></p>
208
- </div>
209
- <div style='mso-element:footer' id=f2>
210
- <p class=MsoFooter style='line-height:12.0pt'><span lang=EN-GB
211
- style='font-size:10.0pt;mso-bidi-font-size:11.0pt'>&#xa9; ISO/IEC&#x26;nbsp;2016&#x26;nbsp;&#x2013; All
212
- rights reserved<span style='mso-tab-count:1'>&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0; </span></span><!--[if supportFields]><span
213
- lang=EN-GB style='font-size:10.0pt;mso-bidi-font-size:11.0pt'><span
214
- style='mso-element:field-begin'></span> PAGE<span style='mso-spacerun:yes'>&#xa0;&#xa0;
215
- </span>\\* MERGEFORMAT <span style='mso-element:field-separator'></span></span><![endif]--><span
216
- lang=EN-GB style='font-size:10.0pt;mso-bidi-font-size:11.0pt'><span
217
- style='mso-no-proof:yes'>iii</span></span><!--[if supportFields]><span
218
- lang=EN-GB style='font-size:10.0pt;mso-bidi-font-size:11.0pt'><span
219
- style='mso-element:field-end'></span></span><![endif]--><span lang=EN-GB
220
- style='font-size:10.0pt;mso-bidi-font-size:11.0pt'><o:p></o:p></span></p>
221
- </div>
222
- <div style='mso-element:footer' id=ef3>
223
- <p class=MsoFooter style='margin-top:12.0pt;line-height:12.0pt;mso-line-height-rule:
224
- exactly'><!--[if supportFields]><b style='mso-bidi-font-weight:normal'><span
225
- lang=EN-GB style='font-size:10.0pt;mso-bidi-font-size:11.0pt'><span
226
- style='mso-element:field-begin'></span><span
227
- style='mso-spacerun:yes'>&#xa0;</span>PAGE<span style='mso-spacerun:yes'>&#xa0;&#xa0;
228
- </span>\\* MERGEFORMAT <span style='mso-element:field-separator'></span></span></b><![endif]--><b
229
- style='mso-bidi-font-weight:normal'><span lang=EN-GB style='font-size:10.0pt;
230
- mso-bidi-font-size:11.0pt'><span style='mso-no-proof:yes'>2</span></span></b><!--[if supportFields]><b
231
- style='mso-bidi-font-weight:normal'><span lang=EN-GB style='font-size:10.0pt;
232
- mso-bidi-font-size:11.0pt'><span style='mso-element:field-end'></span></span></b><![endif]--><span
233
- lang=EN-GB style='font-size:10.0pt;mso-bidi-font-size:11.0pt'><span
234
- style='mso-tab-count:1'>&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0; </span>&#xa9;
235
- ISO/IEC&#x26;nbsp;2016&#x26;nbsp;&#x2013; All rights reserved<o:p></o:p></span></p>
236
- </div>
237
- <div style='mso-element:footer' id=f3>
238
- <p class=MsoFooter style='line-height:12.0pt'><span lang=EN-GB
239
- style='font-size:10.0pt;mso-bidi-font-size:11.0pt'>&#xa9; ISO/IEC&#x26;nbsp;2016&#x26;nbsp;&#x2013; All
240
- rights reserved<span style='mso-tab-count:1'>&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0; </span></span><!--[if supportFields]><b
241
- style='mso-bidi-font-weight:normal'><span lang=EN-GB style='font-size:10.0pt;
242
- mso-bidi-font-size:11.0pt'><span style='mso-element:field-begin'></span>
243
- PAGE<span style='mso-spacerun:yes'>&#xa0;&#xa0; </span>\\* MERGEFORMAT <span
244
- style='mso-element:field-separator'></span></span></b><![endif]--><b
245
- style='mso-bidi-font-weight:normal'><span lang=EN-GB style='font-size:10.0pt;
246
- mso-bidi-font-size:11.0pt'><span style='mso-no-proof:yes'>3</span></span></b><!--[if supportFields]><b
247
- style='mso-bidi-font-weight:normal'><span lang=EN-GB style='font-size:10.0pt;
248
- mso-bidi-font-size:11.0pt'><span style='mso-element:field-end'></span></span></b><![endif]--><span
249
- lang=EN-GB style='font-size:10.0pt;mso-bidi-font-size:11.0pt'><o:p></o:p></span></p>
250
- </div>
251
- </body>
252
- </html>
253
- FTR
254
-
255
- ASCII_MATH = '<m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=1</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary><m:r><m:t>=</m:t></m:r><m:sSup><m:e><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:f><m:fPr><m:type m:val="bar"></m:type></m:fPr><m:num><m:r><m:t>n</m:t></m:r><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:r><m:t>n+1</m:t></m:r></m:e></m:d></m:num><m:den><m:r><m:t>2</m:t></m:r></m:den></m:f></m:e></m:d></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:sSup>'.freeze
256
-
257
- DEFAULT_STYLESHEET = File.read("lib/html2doc/wordstyle.css",
258
- encoding: "utf-8").freeze
259
-
260
- def guid_clean(xml)
261
- xml.gsub(/NextPart_[0-9a-f.]+/, "NextPart_")
262
- end
263
-
264
- def image_clean(xml)
265
- xml.gsub(%r{[0-9a-f-]+\.png}, "image.png")
266
- .gsub(%r{[0-9a-f-]+\.gif}, "image.gif")
267
- .gsub(%r{[0-9a-f-]+\.(jpeg|jpg)}, "image.jpg")
268
- .gsub(%r{------=_NextPart_\s+Content-Location: file:///C:/Doc/test_files/image\.(png|gif).*?\s-----=_NextPart_}m, "------=_NextPart_")
269
- .gsub(%r{Content-Type: image/(png|gif|jpeg)[^-]*------=_NextPart_-?-?}m, "")
270
- .gsub(%r{ICAgICAg[^-]*-----}m, "-----")
271
- .gsub(%r{\s*</img>\s*}m, "</img>")
272
- .gsub(%r{</body>\s*</html>}m, "</body></html>")
273
- end
274
-
275
- RSpec.describe Html2Doc do
276
- it "has a version number" do
277
- expect(Html2Doc::VERSION).not_to be nil
278
- end
279
-
280
- it "preserves Word HTML directives" do
281
- Html2Doc.new(filename: "test").process(html_input(%[A<!--[if gte mso 9]>X<![endif]-->B]))
282
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
283
- .to match_fuzzy(<<~OUTPUT)
284
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
285
- #{word_body(%{A<!--[if gte mso 9]>X<![endif]-->B},
286
- '<div style="mso-element:footnote-list"/>')}
287
- #{WORD_FTR1}
288
- OUTPUT
289
- end
290
-
291
- it "processes a blank document" do
292
- Html2Doc.new(filename: "test").process(html_input(""))
293
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
294
- .to match_fuzzy(<<~OUTPUT)
295
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
296
- #{word_body('', '<div style="mso-element:footnote-list"/>')} #{WORD_FTR1}
297
- OUTPUT
298
- end
299
-
300
- it "removes any temp files" do
301
- File.delete("test.doc")
302
- Html2Doc.new(filename: "test").process(html_input(""))
303
- expect(File.exist?("test.doc")).to be true
304
- expect(File.exist?("test.htm")).to be false
305
- expect(File.exist?("test_files")).to be false
306
- end
307
-
308
- it "processes a stylesheet in an HTML document with a title" do
309
- Html2Doc.new(filename: "test", stylesheet: "lib/html2doc/wordstyle.css")
310
- .process(html_input(""))
311
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
312
- .to match_fuzzy(<<~OUTPUT)
313
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
314
- #{word_body('', '<div style="mso-element:footnote-list"/>')} #{WORD_FTR1}
315
- OUTPUT
316
- end
317
-
318
- it "processes a stylesheet in an HTML document without a title" do
319
- Html2Doc.new(filename: "test",
320
- stylesheet: "lib/html2doc/wordstyle.css")
321
- .process(html_input_no_title(""))
322
- expect(guid_clean(File.read("test.doc",
323
- encoding: "utf-8")))
324
- .to match_fuzzy(<<~OUTPUT)
325
- #{WORD_HDR.sub('<title>blank</title>', '')}
326
- #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
327
- #{word_body('', '<div style="mso-element:footnote-list"/>')} #{WORD_FTR1}
328
- OUTPUT
329
- end
330
-
331
- it "processes a stylesheet in an HTML document with an empty head" do
332
- Html2Doc.new(filename: "test",
333
- stylesheet: "lib/html2doc/wordstyle.css")
334
- .process(html_input_empty_head(""))
335
- word_hdr_end = WORD_HDR_END
336
- .sub(%(<meta name="Originator" content="Me"/>\n), "")
337
- .sub("</style>\n</head>", "</style></head>")
338
- expect(guid_clean(File.read("test.doc",
339
- encoding: "utf-8")))
340
- .to match_fuzzy(<<~OUTPUT)
341
- #{WORD_HDR.sub('<title>blank</title>', '')}
342
- #{DEFAULT_STYLESHEET}
343
- #{word_hdr_end}
344
- #{word_body('', '<div style="mso-element:footnote-list"/>')} #{WORD_FTR1}
345
- OUTPUT
346
- end
347
-
348
- it "processes a header" do
349
- Html2Doc.new(filename: "test",
350
- header_file: "spec/header.html")
351
- .process(html_input(""))
352
- html = guid_clean(File.read("test.doc", encoding: "utf-8"))
353
- hdr = Base64.decode64(
354
- html
355
- .sub(%r{^.*Content-Location: file:///C:/Doc/test_files/header.html}, "")
356
- .sub(%r{^.*Content-Type: text/html charset="utf-8"}m, "")
357
- .sub(%r{------=_NextPart_--.*$}m, ""),
358
- ).force_encoding("UTF-8")
359
- # expect(hdr.gsub(/\xa0/, " ")).to match_fuzzy(HEADERHTML)
360
- expect(HTMLEntities.new.encode(hdr, :hexadecimal)
361
- .gsub(/&#x3c;/, "<").gsub(/&#x3e;/, ">")
362
- .gsub(/&#x27;/, "'").gsub(/&#x22;/, '"')
363
- .gsub(/&#xd;/, "&#xa;").gsub(/&#xa;/, "\n"))
364
- .to match_fuzzy(HEADERHTML)
365
- expect(html.sub(%r{Content-ID: <header.html>.*$}m, ""))
366
- .to match_fuzzy(<<~OUTPUT)
367
- #{WORD_HDR} #{DEFAULT_STYLESHEET.gsub(/url\("[^"]+"\)/, 'url(cid:header.html)')}
368
- #{WORD_HDR_END} #{word_body('', '<div style="mso-element:footnote-list"/>')} #{WORD_FTR2}
369
- OUTPUT
370
- end
371
-
372
- it "processes a header with an image" do
373
- Html2Doc.new(filename: "test",
374
- header_file: "spec/header_img.html")
375
- .process(html_input(""))
376
- doc = guid_clean(File.read("test.doc", encoding: "utf-8"))
377
- expect(doc).to match(%r{Content-Type: image/png})
378
- expect(doc).to match(%r{iVBORw0KGgoAAAANSUhEUgAAA5cAAAN7CAYAAADRE24cAAAgAElEQVR4XuydB5gUxdaGC65gTogB})
379
- end
380
-
381
- it "processes a header with an image with absolute path" do
382
- doc = File.read("spec/header_img.html", encoding: "utf-8")
383
- File.open("spec/header_img1.html", "w:UTF-8") do |f|
384
- f.write(
385
- doc.sub(%r{spec/19160-6.png},
386
- File.expand_path(File.join(File.dirname(__FILE__),
387
- "19160-6.png"))),
388
- )
389
- end
390
- Html2Doc.new(filename: "test",
391
- header_file: "spec/header_img1.html")
392
- .process(html_input(""))
393
- doc = guid_clean(File.read("test.doc", encoding: "utf-8"))
394
- expect(doc).to match(%r{Content-Type: image/png})
395
- expect(doc).to match(%r{iVBORw0KGgoAAAANSUhEUgAAA5cAAAN7CAYAAADRE24cAAAgAElEQVR4XuydB5gUxdaGC65gTogB})
396
- end
397
-
398
- it "processes a populated document" do
399
- simple_body = "<h1>Hello word!</h1>
400
- <div>This is a very simple document</div>"
401
- Html2Doc.new(filename: "test").process(html_input(simple_body))
402
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
403
- .to match_fuzzy(<<~OUTPUT)
404
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
405
- #{word_body(simple_body, '<div style="mso-element:footnote-list"/>')}
406
- #{WORD_FTR1}
407
- OUTPUT
408
- end
409
-
410
- it "processes AsciiMath" do
411
- Html2Doc.new(filename: "test",
412
- asciimathdelims: ["{{", "}}"])
413
- .process(html_input(%[<div>{{sum_(i=1)^n i^3=((n(n+1))/2)^2 text("integer"))}}</div>]))
414
- expect(guid_clean(File.read("test.doc",
415
- encoding: "utf-8")))
416
- .to match_fuzzy(<<~OUTPUT)
417
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
418
- #{word_body(%{
419
- <div><m:oMath>
420
- <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=1</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary><span style="font-style:normal;"><m:r><m:rPr><m:sty m:val="p"></m:sty></m:rPr><m:t>=</m:t></m:r></span><m:sSup><m:e><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:f><m:fPr><m:type m:val="bar"></m:type></m:fPr><m:num><m:r><m:t>n</m:t></m:r><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:r><m:t>n+1</m:t></m:r></m:e></m:d></m:num><m:den><m:r><m:t>2</m:t></m:r></m:den></m:f></m:e></m:d></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:sSup><m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>"integer"</m:t></m:r><span style="font-style:normal;"><m:r><m:rPr><m:sty m:val="p"></m:sty></m:rPr><m:t>)</m:t></m:r></span>
421
- </m:oMath>
422
- </div>}, '<div style="mso-element:footnote-list"/>')}
423
- #{WORD_FTR1}
424
- OUTPUT
425
- end
426
-
427
- it "processes mstyle" do
428
- Html2Doc.new(filename: "test", asciimathdelims: ["{{", "}}"])
429
- .process(html_input(%[<div>{{bb (-log_2 (p_u)) bb "BB" bbb "BBB" cc "CC" bcc "BCC" tt "TT" fr "FR" bfr "BFR" sf "SF" bsf "BSFα" sfi "SFI" sfbi "SFBIα" bii "BII" ii "II"}}</div>]))
430
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
431
- .to match_fuzzy(<<~OUTPUT)
432
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
433
- #{word_body(%{
434
- <div><m:oMath>
435
- <span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x2212;</m:t></m:r></span><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>log</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>2</m:t></m:r></span></m:sub></m:sSub><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>p</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>u</m:t></m:r></span></m:sub></m:sSub></m:e></m:d><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:nor></m:nor><m:sty m:val="b"></m:sty></m:rPr><m:t>BB</m:t></m:r></span><m:r><m:rPr><m:nor></m:nor><m:scr m:val="double-struck"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D539;&#x1D539;&#x1D539;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr></m:rPr><m:t>&#x1D49E;&#x1D49E;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x1D4D1;&#x1D4D2;&#x1D4D2;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="monospace"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D683;&#x1D683;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D509;&#x211C;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x1D56D;&#x1D571;&#x1D57D;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D5B2;&#x1D5A5;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x1D5D5;&#x1D5E6;&#x1D5D9;&#x1D770;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr></m:rPr><m:t>&#x1D5B2;&#x1D5A5;&#x1D5A8;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="bi"></m:sty></m:rPr><m:t>&#x1D64E;&#x1D641;&#x1D63D;&#x1D644;&#x1D7AA;</m:t></m:r><span class="nostem" style="font-weight:bold;"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="bi"></m:sty></m:rPr><m:t>BII</m:t></m:r></span><span class="nostem"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="i"></m:sty></m:rPr><m:t>II</m:t></m:r></span>
436
- </m:oMath>
437
- </div>}, '<div style="mso-element:footnote-list"/>')}
438
- #{WORD_FTR1}
439
- OUTPUT
440
- end
441
-
442
- it "processes spaces in AsciiMath" do
443
- Html2Doc.new(filename: "test", asciimathdelims: ["{{", "}}"])
444
- .process(html_input(%[<div>{{text " integer ")}}</div>]))
445
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
446
- .to match_fuzzy(<<~OUTPUT)
447
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
448
- #{word_body('
449
- <div><m:oMath>
450
- <m:r><m:t>text</m:t></m:r><m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>&#xA0;integer&#xA0;</m:t></m:r><span style="font-style:normal;"><m:r><m:rPr><m:sty m:val="p"></m:sty></m:rPr><m:t>)</m:t></m:r></span>
451
- </m:oMath>
452
- </div>', '<div style="mso-element:footnote-list"/>')}
453
- #{WORD_FTR1}
454
- OUTPUT
455
- end
456
-
457
- it "processes spaces in MathML mtext" do
458
- Html2Doc.new(filename: "test", asciimathdelims: ["{{", "}}"])
459
- .process(html_input("<div><math xmlns='http://www.w3.org/1998/Math/MathML'>
460
- <mrow><mi>H</mi><mtext> original </mtext><mi>J</mi></mrow>
461
- </math></div>"))
462
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
463
- .to match_fuzzy(<<~OUTPUT)
464
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
465
- #{word_body('<div><m:oMath>
466
- <m:r><m:t>H</m:t></m:r><m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>&#xA0;original&#xA0;</m:t></m:r><m:r><m:t>J</m:t></m:r>
467
- </m:oMath>
468
- </div>', '<div style="mso-element:footnote-list"/>')}
469
- #{WORD_FTR1}
470
- OUTPUT
471
- end
472
-
473
- it "unwraps and converts accent in MathML" do
474
- Html2Doc.new(filename: "test", asciimathdelims: ["{{", "}}"])
475
- .process(html_input("<div><math xmlns='http://www.w3.org/1998/Math/MathML'>
476
- <mover accent='true'><mrow><mi>p</mi></mrow><mrow><mo>^</mo></mrow></mover>
477
- </math></div>"))
478
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
479
- .to match_fuzzy(<<~OUTPUT)
480
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
481
- #{word_body('<div><m:oMath>
482
- <m:acc><m:accPr><m:chr m:val="&#x302;"></m:chr></m:accPr><m:e><m:r><m:t>p</m:t></m:r></m:e></m:acc>
483
- </m:oMath>
484
- </div>', '<div style="mso-element:footnote-list"/>')}
485
- #{WORD_FTR1}
486
- OUTPUT
487
- end
488
-
489
- it "left-aligns AsciiMath" do
490
- Html2Doc.new(filename: "test", asciimathdelims: ["{{", "}}"])
491
- .process(html_input("<div style='text-align:left;'>{{sum_(i=1)^n i^3=((n(n+1))/2)^2}}</div>"))
492
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
493
- .to match_fuzzy(<<~OUTPUT)
494
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
495
- #{word_body(%{
496
- <div style="text-align:left;"><m:oMathPara><m:oMathParaPr><m:jc m:val="left"/></m:oMathParaPr><m:oMath>
497
- <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=1</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary><span style="font-style:normal;"><m:r><m:rPr><m:sty m:val="p"></m:sty></m:rPr><m:t>=</m:t></m:r></span><m:sSup><m:e><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:f><m:fPr><m:type m:val="bar"></m:type></m:fPr><m:num><m:r><m:t>n</m:t></m:r><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:r><m:t>n+1</m:t></m:r></m:e></m:d></m:num><m:den><m:r><m:t>2</m:t></m:r></m:den></m:f></m:e></m:d></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:sSup>
498
- </m:oMath>
499
- </m:oMathPara></div>}, '<div style="mso-element:footnote-list"/>')}
500
- #{WORD_FTR1}
501
- OUTPUT
502
- end
503
-
504
- it "right-aligns AsciiMath" do
505
- Html2Doc.new(filename: "test",
506
- asciimathdelims: ["{{", "}}"])
507
- .process(html_input("<div style='text-align:right;'>{{sum_(i=1)^n i^3=((n(n+1))/2)^2}}</div>"))
508
- expect(guid_clean(File.read("test.doc",
509
- encoding: "utf-8")))
510
- .to match_fuzzy(<<~OUTPUT)
511
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
512
- #{word_body(%{
513
- <div style="text-align:right;"><m:oMathPara><m:oMathParaPr><m:jc m:val="right"/></m:oMathParaPr><m:oMath>
514
- <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=1</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary><span style="font-style:normal;"><m:r><m:rPr><m:sty m:val="p"></m:sty></m:rPr><m:t>=</m:t></m:r></span><m:sSup><m:e><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:f><m:fPr><m:type m:val="bar"></m:type></m:fPr><m:num><m:r><m:t>n</m:t></m:r><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:r><m:t>n+1</m:t></m:r></m:e></m:d></m:num><m:den><m:r><m:t>2</m:t></m:r></m:den></m:f></m:e></m:d></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:sSup>
515
- </m:oMath>
516
- </m:oMathPara></div>}, '<div style="mso-element:footnote-list"/>')}
517
- #{WORD_FTR1}
518
- OUTPUT
519
- end
520
-
521
- it "raises error in processing of broken AsciiMath" do
522
- begin
523
- expect do
524
- Html2Doc.new(filename: "test", asciimathdelims: ["{{", "}}"])
525
- .process(html_input(%[<div style='text-align:right;'>{{u_c = 6.6"unitsml(kHz)}}</div>]))
526
- end.to output('parsing: u_c = 6.6"unitsml(kHz)').to_stderr
527
- rescue StandardError
528
- end
529
- expect do
530
- Html2Doc.new(filename: "test", asciimathdelims: ["{{", "}}"])
531
- .process(html_input(%[<div style='text-align:right;'>{{u_c = 6.6"unitsml(kHz)}}</div>]))
532
- end.to raise_error(StandardError)
533
- end
534
-
535
- it "wraps msup after munderover in MathML" do
536
- Html2Doc.new(filename: "test", asciimathdelims: ["{{", "}}"])
537
- .process(html_input("<div><math xmlns='http://www.w3.org/1998/Math/MathML'>
538
- <munderover><mo>&#x2211;</mo><mrow><mi>i</mi><mo>=</mo><mn>0</mn></mrow><mrow><mi>n</mi></mrow></munderover><msup><mn>2</mn><mrow><mi>i</mi></mrow></msup></math></div>"))
539
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
540
- .to match_fuzzy(<<~OUTPUT)
541
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
542
- #{word_body('<div><m:oMath>
543
- <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=0</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>2</m:t></m:r></m:e><m:sup><m:r><m:t>i</m:t></m:r></m:sup></m:sSup></m:e></m:nary></m:oMath>
544
- </div>', '<div style="mso-element:footnote-list"/>')}
545
- #{WORD_FTR1}
546
- OUTPUT
547
- end
548
-
549
- it "processes tabs" do
550
- simple_body = "<h1>Hello word!</h1>
551
- <div>This is a very &tab; simple document</div>"
552
- Html2Doc.new(filename: "test").process(html_input(simple_body))
553
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
554
- .to match_fuzzy(<<~OUTPUT)
555
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
556
- #{word_body(simple_body.gsub(/&tab;/, %[<span style="mso-tab-count:1">&#xA0; </span>]), '<div style="mso-element:footnote-list"/>')}
557
- #{WORD_FTR1}
558
- OUTPUT
559
- end
560
-
561
- it "makes unstyled paragraphs be MsoNormal" do
562
- simple_body = '<h1>Hello word!</h1>
563
- <p>This is a very simple document</p>
564
- <p class="x">This style stays</p>'
565
- Html2Doc.new(filename: "test").process(html_input(simple_body))
566
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
567
- .to match_fuzzy(<<~OUTPUT)
568
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
569
- #{word_body(simple_body.gsub(/<p>/, %[<p class="MsoNormal">]), '<div style="mso-element:footnote-list"/>')}
570
- #{WORD_FTR1}
571
- OUTPUT
572
- end
573
-
574
- it "makes unstyled list entries be MsoNormal" do
575
- simple_body = '<h1>Hello word!</h1>
576
- <ul>
577
- <li>This is a very simple document</li>
578
- <li class="x">This style stays</li>
579
- </ul>'
580
- Html2Doc.new(filename: "test").process(html_input(simple_body))
581
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
582
- .to match_fuzzy(<<~OUTPUT)
583
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
584
- #{word_body(simple_body.gsub(/<li>/, %[<li class="MsoNormal">]), '<div style="mso-element:footnote-list"/>')}
585
- #{WORD_FTR1}
586
- OUTPUT
587
- end
588
-
589
- it "resizes images for height, in a file in a subdirectory" do
590
- simple_body = '<img src="19160-6.png">'
591
- Html2Doc.new(filename: "spec/test", imagedir: "spec")
592
- .process(html_input(simple_body))
593
- testdoc = File.read("spec/test.doc", encoding: "utf-8")
594
- expect(testdoc).to match(%r{Content-Type: image/png})
595
- expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
596
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
597
- #{image_clean(word_body('<img src="cid:cb7b0d19-891e-4634-815a-570d019d454c.png" width="400" height="388"></img>', '<div style="mso-element:footnote-list"/>'))}
598
- #{image_clean(WORD_FTR3)}
599
- OUTPUT
600
- end
601
-
602
- it "resizes images for width" do
603
- simple_body = '<img src="spec/19160-7.gif">'
604
- Html2Doc.new(filename: "test", imagedir: ".")
605
- .process(html_input(simple_body))
606
- testdoc = File.read("test.doc", encoding: "utf-8")
607
- expect(testdoc).to match(%r{Content-Type: image/gif})
608
- expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
609
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
610
- #{image_clean(word_body('<img src="cid:cb7b0d19-891e-4634-815a-570d019d454c.gif" width="400" height="118"></img>', '<div style="mso-element:footnote-list"/>'))}
611
- #{image_clean(WORD_FTR3).gsub(/image\.png/, 'image.gif')}
612
- OUTPUT
613
- end
614
-
615
- it "resizes images for height" do
616
- simple_body = '<img src="spec/19160-8.jpg">'
617
- Html2Doc.new(filename: "test", imagedir: ".")
618
- .process(html_input(simple_body))
619
- testdoc = File.read("test.doc", encoding: "utf-8")
620
- expect(testdoc).to match(%r{Content-Type: image/jpeg})
621
- expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
622
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
623
- #{image_clean(word_body('<img src="cid:cb7b0d19-891e-4634-815a-570d019d454c.jpg" width="208" height="680"></img>', '<div style="mso-element:footnote-list"/>'))}
624
- #{image_clean(WORD_FTR3).gsub(/image\.png/, 'image.jpg')}
625
- OUTPUT
626
- end
627
-
628
- it "resizes images with missing or auto sizes" do
629
- image = { "src" => "spec/19160-8.jpg" }
630
- expect(Html2Doc.new({}).image_resize(image, "spec/19160-8.jpg", 100, 100))
631
- .to eq [30, 100]
632
- image["width"] = "20"
633
- expect(Html2Doc.new({}).image_resize(image, "spec/19160-8.jpg", 100, 100))
634
- .to eq [20, 65]
635
- image.delete("width")
636
- image["height"] = "50"
637
- expect(Html2Doc.new({}).image_resize(image, "spec/19160-8.jpg", 100, 100))
638
- .to eq [15, 50]
639
- image.delete("height")
640
- image["width"] = "500"
641
- expect(Html2Doc.new({}).image_resize(image, "spec/19160-8.jpg", 100, 100))
642
- .to eq [30, 100]
643
- image.delete("width")
644
- image["height"] = "500"
645
- expect(Html2Doc.new({}).image_resize(image, "spec/19160-8.jpg", 100, 100))
646
- .to eq [30, 100]
647
- image["width"] = "20"
648
- image["height"] = "auto"
649
- expect(Html2Doc.new({}).image_resize(image, "spec/19160-8.jpg", 100, 100))
650
- .to eq [20, 65]
651
- image["width"] = "auto"
652
- image["height"] = "50"
653
- expect(Html2Doc.new({}).image_resize(image, "spec/19160-8.jpg", 100, 100))
654
- .to eq [15, 50]
655
- image["width"] = "500"
656
- image["height"] = "auto"
657
- expect(Html2Doc.new({}).image_resize(image, "spec/19160-8.jpg", 100, 100))
658
- .to eq [30, 100]
659
- image["width"] = "auto"
660
- image["height"] = "500"
661
- expect(Html2Doc.new({}).image_resize(image, "spec/19160-8.jpg", 100, 100))
662
- .to eq [30, 100]
663
- image["width"] = "auto"
664
- image["height"] = "auto"
665
- expect(Html2Doc.new({}).image_resize(image, "spec/19160-8.jpg", 100, 100))
666
- .to eq [30, 100]
667
- end
668
-
669
- it "does not move images if they are external URLs" do
670
- simple_body = '<img src="https://example.com/19160-6.png">'
671
- Html2Doc.new(filename: "test", imagedir: ".")
672
- .process(html_input(simple_body))
673
- testdoc = File.read("test.doc", encoding: "utf-8")
674
- expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
675
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
676
- #{image_clean(word_body('<img src="https://example.com/19160-6.png"></img>', '<div style="mso-element:footnote-list"/>'))}
677
- #{image_clean(WORD_FTR1)}
678
- OUTPUT
679
- end
680
-
681
- it "deals with absolute image locations" do
682
- simple_body = %{<img src="#{__dir__}/19160-6.png">}
683
- Html2Doc.new(filename: "spec/test", imagedir: ".")
684
- .process(html_input(simple_body))
685
- testdoc = File.read("spec/test.doc", encoding: "utf-8")
686
- expect(testdoc).to match(%r{Content-Type: image/png})
687
- expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
688
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
689
- #{image_clean(word_body('<img src="cid:cb7b0d19-891e-4634-815a-570d019d454c.png" width="400" height="388"></img>', '<div style="mso-element:footnote-list"/>'))}
690
- #{image_clean(WORD_FTR3)}
691
- OUTPUT
692
- end
693
-
694
- # it "warns about SVG" do
695
- # simple_body = '<img src="https://example.com/19160-6.svg">'
696
- # expect{ Html2Doc.process(html_input(simple_body), filename: "test") }
697
- # .to output("https://example.com/19160-6.svg: SVG not supported\n").to_stderr
698
- # end
699
-
700
- it "processes epub:type footnotes" do
701
- simple_body = '<div>This is a very simple
702
- document<a epub:type="footnote" href="#a1">1</a> allegedly<a epub:type="footnote" href="#a2">2</a></div>
703
- <aside id="a1">Footnote</aside>
704
- <aside id="a2">Other Footnote</aside>'
705
- Html2Doc.new(filename: "test").process(html_input(simple_body))
706
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
707
- .to match_fuzzy(<<~OUTPUT)
708
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
709
- #{word_body('<div>This is a very simple
710
- document<a epub:type="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a> allegedly<a epub:type="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
711
- '<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
712
- <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Footnote</p></div>
713
- <div style="mso-element:footnote" id="ftn2">
714
- <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
715
- </div>')}
716
- #{WORD_FTR1}
717
- OUTPUT
718
- end
719
-
720
- it "processes class footnotes" do
721
- simple_body = '<div>This is a very simple
722
- document<a class="footnote" href="#a1">1</a> allegedly<a class="footnote" href="#a2">2</a></div>
723
- <aside id="a1">Footnote</aside>
724
- <aside id="a2">Other Footnote</aside>'
725
- Html2Doc.new(filename: "test").process(html_input(simple_body))
726
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
727
- .to match_fuzzy(<<~OUTPUT)
728
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
729
- #{word_body('<div>This is a very simple
730
- document<a class="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a> allegedly<a class="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
731
- '<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
732
- <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Footnote</p></div>
733
- <div style="mso-element:footnote" id="ftn2">
734
- <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
735
- </div>')}
736
- #{WORD_FTR1}
737
- OUTPUT
738
- end
739
-
740
- it "processes footnotes with text wrapping the footnote reference" do
741
- simple_body = '<div>This is a very simple
742
- document<a class="footnote" href="#a1">(<span class="MsoFootnoteReference">1</span>)</a> allegedly<a class="footnote" href="#a2">2</a></div>
743
- <aside id="a1">Footnote</aside>
744
- <aside id="a2">Other Footnote</aside>'
745
- Html2Doc.new(filename: "test").process(html_input(simple_body))
746
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
747
- .to match_fuzzy(<<~OUTPUT)
748
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
749
- #{word_body('<div>This is a very simple
750
- document<a class="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference">(</span><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span><span class="MsoFootnoteReference">)</span></a> allegedly<a class="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
751
- '<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
752
- <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference">(</span><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span><span class="MsoFootnoteReference">)</span></a>Footnote</p></div>
753
- <div style="mso-element:footnote" id="ftn2">
754
- <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
755
- </div>')}
756
- #{WORD_FTR1}
757
- OUTPUT
758
- end
759
-
760
- it "extracts paragraphs from footnotes" do
761
- simple_body = '<div>This is a very simple
762
- document<a class="footnote" href="#a1">1</a> allegedly<a class="footnote" href="#a2">2</a></div>
763
- <aside id="a1"><p>Footnote</p></aside>
764
- <div id="a2"><p>Other Footnote</p></div>'
765
- Html2Doc.new(filename: "test").process(html_input(simple_body))
766
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
767
- .to match_fuzzy(<<~OUTPUT)
768
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
769
- #{word_body('<div>This is a very simple
770
- document<a class="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a> allegedly<a class="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
771
- '<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
772
- <p class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Footnote</p></div>
773
- <div style="mso-element:footnote" id="ftn2">
774
- <p class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
775
- </div>')}
776
- #{WORD_FTR1}
777
- OUTPUT
778
- end
779
-
780
- it "labels lists with list styles" do
781
- simple_body = <<~BODY
782
- <div><ul id="0">
783
- <li><div><p><ol id="1"><li><ul id="2"><li><p><ol id="3"><li><ol id="4"><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li><div><ul id="5"><li>C</li></ul></div>
784
- BODY
785
- Html2Doc.new(filename: "test", liststyles: { ul: "l1", ol: "l2" })
786
- .process(html_input(simple_body))
787
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
788
- .to match_fuzzy(<<~OUTPUT)
789
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
790
- #{word_body('<div>
791
- <p style="mso-list:l1 level1 lfo1;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p><div><p style="mso-list:l1 level1 lfo2;" class="MsoListParagraphCxSpFirst">C</p></div>
792
- </div>',
793
- '<div style="mso-element:footnote-list"/>')}
794
- #{WORD_FTR1}
795
- OUTPUT
796
- end
797
-
798
- it "restarts numbering of lists with list styles" do
799
- simple_body = <<~BODY
800
- <div>
801
- <ol id="1"><li><div><p><ol id="2"><li><ul id="3"><li><p><ol id="4"><li><ol id="5"><li>A</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ol>
802
- <ol id="6"><li><div><p><ol id="7"><li><ul id="8"><li><p><ol id="9"><li><ol id="10"><li>A</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ol></div>
803
- BODY
804
- Html2Doc.new(filename: "test", liststyles: { ul: "l1", ol: "l2" })
805
- .process(html_input(simple_body))
806
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
807
- .to match_fuzzy(<<~OUTPUT)
808
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
809
- #{word_body('<div>
810
- <p style="mso-list:l2 level1 lfo1;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpFirst">A</p></p></p></p></div></p>
811
- <p style="mso-list:l2 level1 lfo2;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo2;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo2;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo2;" class="MsoListParagraphCxSpFirst">A</p></p></p></p></div></p></div>',
812
- '<div style="mso-element:footnote-list"/>')}
813
- #{WORD_FTR1}
814
- OUTPUT
815
- end
816
-
817
- it "labels lists with multiple list styles" do
818
- simple_body = <<~BODY
819
- <div><ul class="steps" id="0">
820
- <li><div><p><ol id="1"><li><ul id="2"><li><p><ol id="3"><li><ol id="4"><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
821
- <div><ul id="5">
822
- <li><div><p><ol id="6"><li><ul id="7"><li><p><ol id="8"><li><ol id="9"><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
823
- <div><ul class="other" id="10">
824
- <li><div><p><ol id="11"><li><ul id="12"><li><p><ol id="13"><li><ol id="14"><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
825
- BODY
826
- Html2Doc.new(filename: "test",
827
- liststyles: { ul: "l1", ol: "l2",
828
- steps: "l3" })
829
- .process(html_input(simple_body))
830
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
831
- .to match_fuzzy(<<~OUTPUT)
832
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
833
- #{word_body('<div>
834
- <p style="mso-list:l3 level1 lfo2;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l3 level2 lfo2;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l3 level4 lfo2;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l3 level5 lfo2;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l3 level5 lfo2;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l3 level5 lfo2;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p></div>
835
- <div>
836
- <p style="mso-list:l1 level1 lfo1;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p></div>
837
- <div>
838
- <p style="mso-list:l1 level1 lfo3;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo3;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo3;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo3;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l2 level5 lfo3;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l2 level5 lfo3;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p></div>',
839
- '<div style="mso-element:footnote-list"/>')}
840
- #{WORD_FTR1}
841
- OUTPUT
842
- end
843
-
844
- it "replaces id attributes with explicit a@name bookmarks" do
845
- simple_body = <<~BODY
846
- <div>
847
- <p id="a">Hello</p>
848
- <p id="b"/>
849
- </div>
850
- BODY
851
- Html2Doc.new(filename: "test", liststyles: { ul: "l1", ol: "l2" })
852
- .process(html_input(simple_body))
853
- expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
854
- .to match_fuzzy(<<~OUTPUT)
855
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
856
- #{word_body('<div>
857
- <p class="MsoNormal"><a name="a" id="a"></a>Hello</p>
858
- <p class="MsoNormal"><a name="b" id="b"></a></p>
859
- </div>',
860
- '<div style="mso-element:footnote-list"/>')}
861
- #{WORD_FTR1}
862
- OUTPUT
863
- end
864
-
865
- it "test image base64 image encoding" do
866
- simple_body = '<img src="19160-6.png">'
867
- Html2Doc.new(filename: "spec/test", debug: true, imagedir: "spec")
868
- .process(html_input(simple_body))
869
- testdoc = File.read("spec/test.doc", encoding: "utf-8")
870
- base64_image = testdoc[/image\/png\n\n(.*?)\n\n----/m, 1].gsub!("\n", "")
871
- base64_image_basename = testdoc[%r{Content-ID: <([0-9a-z\-]+)\.png}m, 1]
872
- doc_bin_image = Base64.strict_decode64(base64_image)
873
- file_bin_image = IO
874
- .read("spec/test_files/#{base64_image_basename}.png", mode: "rb")
875
- expect(doc_bin_image).to eq file_bin_image
876
- FileUtils.rm_rf %w[spec/test_files spec/test.doc spec/test.htm]
877
- end
878
- end