html2doc 0.9.4 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +4 -1
- data/lib/html2doc/notes.rb +19 -7
- data/lib/html2doc/version.rb +1 -1
- data/spec/html2doc_spec.rb +20 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6dacc967f64994c36c4566869a97848278dce24d5318417dc0e9a901bb4fa9ae
|
4
|
+
data.tar.gz: d96f332a360c7aa04b8c8d5931274737b7c31349ab28cc7e04e34785e0675169
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6e059a12f8d8bb192b31e4d573f6a661e9091a474a6da3c2c60e81e9ee99cf45251da0f5a5badf23ebd9cd913e0ce47e35fe3b7a3ac4e0610626af6c0eba27c6
|
7
|
+
data.tar.gz: dce3f36d0006eebc92e01a12e62dc826329487ebfd6186d425561991175cf6c7057f569bb1dc49ff14a9d471dedf64c236ae60da5cc265374329a2ec1f9dc302
|
data/README.adoc
CHANGED
@@ -25,7 +25,10 @@ The gem currently does the following:
|
|
25
25
|
|
26
26
|
* Convert any AsciiMath and MathML to Word's native mathematical formatting language, OOXML. Word supports copy-pasting MathML into Word and converting it into OOXML; however the conversion is not infallible (we have found problems with `\sum`: Word claims parameters were missing, and inserting dotted squares to indicate as much), and you may need to post-edit the OOXML.
|
27
27
|
** The gem does attempt to repair the MathML input, to bring it in line with Word's OOXML's expectations. If you find any issues with AsciiMath or MathML input, please raise an issue.
|
28
|
-
* Identify any footnotes in the document (defined as hyperlinks with attributes `class = "Footnote"` or `epub:type = "footnote"`), and render them as Microsoft Word footnotes.
|
28
|
+
* Identify any footnotes in the document (defined as hyperlinks with attributes `class = "Footnote"` or `epub:type = "footnote"`), and render them as Microsoft Word footnotes.
|
29
|
+
** The corresponding footnote content is any `div` or `aside` element with the same `@id` attribute as the footnote points to; e.g. `<a href="#ftn1" epub:type="footnote"><sup>3</sup></a></span>`, pointing to `<aside id="ftn3">`.
|
30
|
+
** By default, the footnote hyperlink contents are overwritten with the autonumbering element: `<a href="#ftn1" epub:type="footnote"><sup>1</sup></a>` is replaced with `<a style='mso-footnote-id:ftn1' href='#_ftn1' name='_ftnref1' title='' id='_ftnref1'><span class='MsoFootnoteReference'><span style='mso-special-character:footnote'/></span>`
|
31
|
+
** If the footnote hyperlink already contains (as a child) an element marked up as `<span class='MsoFootnoteReference'>`, only that span is replaced by the Microsoft autonumber element; any text surrounding it is preserved in both the footnote reference and the footnote target. For example, `<a href="#ftn1" epub:type="footnote"><span class='MsoFootnoteReference'>1</span>)</a>` will render as the footnote _1)_, both in the link and the target.
|
29
32
|
* Resize any local images in the HTML file to fit within the maximum page size. (Word will otherwise crash on reading the document.)
|
30
33
|
* Optionally apply list styles with predefined bullet and numbering from a Word CSS to the unordered and ordered lists in the document, restarting numbering for each ordered list.
|
31
34
|
* Convert all lists to native Word HTML rendering (using paragraphs with `MsoListParagraphCxSpFirst, MsoListParagraphCxSpMiddle, MsoListParagraphCxSpLast` styles)
|
data/lib/html2doc/notes.rb
CHANGED
@@ -15,7 +15,7 @@ module Html2Doc
|
|
15
15
|
body = docxml.at("//body")
|
16
16
|
list = body.add_child("<div style='mso-element:footnote-list'/>")
|
17
17
|
footnotes.each_with_index do |f, i|
|
18
|
-
fn = list.first.add_child(footnote_container(i + 1))
|
18
|
+
fn = list.first.add_child(footnote_container(docxml, i + 1))
|
19
19
|
f.parent = fn.first
|
20
20
|
footnote_div_to_p(f)
|
21
21
|
end
|
@@ -33,13 +33,16 @@ module Html2Doc
|
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
|
-
|
36
|
+
FN = "<span class='MsoFootnoteReference'>"\
|
37
|
+
"<span style='mso-special-character:footnote'/></span>".freeze
|
38
|
+
|
39
|
+
def self.footnote_container(docxml, i)
|
40
|
+
ref = docxml&.at("//a[@href='#_ftn#{i}']")&.children&.to_xml(indent: 0).
|
41
|
+
gsub(/>\n</, "><") || FN
|
37
42
|
<<~DIV
|
38
43
|
<div style='mso-element:footnote' id='ftn#{i}'>
|
39
44
|
<a style='mso-footnote-id:ftn#{i}' href='#_ftn#{i}'
|
40
|
-
name='_ftnref#{i}' title='' id='_ftnref#{i}'
|
41
|
-
class='MsoFootnoteReference'><span
|
42
|
-
style='mso-special-character:footnote'></span></span></div>
|
45
|
+
name='_ftnref#{i}' title='' id='_ftnref#{i}'>#{ref.strip}</a></div>
|
43
46
|
DIV
|
44
47
|
end
|
45
48
|
|
@@ -49,8 +52,17 @@ module Html2Doc
|
|
49
52
|
note = docxml.at("//*[@name = '#{href}' or @id = '#{href}']")
|
50
53
|
return false if note.nil?
|
51
54
|
set_footnote_link_attrs(a, i)
|
52
|
-
a.
|
53
|
-
|
55
|
+
if a.at("./span[@class = 'MsoFootnoteReference']")
|
56
|
+
a.children.each do |c|
|
57
|
+
if c.name == "span" and c["class"] == "MsoFootnoteReference"
|
58
|
+
c.replace(FN)
|
59
|
+
else
|
60
|
+
c.wrap("<span class='MsoFootnoteReference'></span>")
|
61
|
+
end
|
62
|
+
end
|
63
|
+
else
|
64
|
+
a.children = FN
|
65
|
+
end
|
54
66
|
fn << transform_footnote_text(note)
|
55
67
|
end
|
56
68
|
|
data/lib/html2doc/version.rb
CHANGED
data/spec/html2doc_spec.rb
CHANGED
@@ -613,6 +613,26 @@ RSpec.describe Html2Doc do
|
|
613
613
|
OUTPUT
|
614
614
|
end
|
615
615
|
|
616
|
+
it "processes footnotes with text wrapping the footnote reference" do
|
617
|
+
simple_body = '<div>This is a very simple
|
618
|
+
document<a class="footnote" href="#a1">(<span class="MsoFootnoteReference">1</span>)</a> allegedly<a class="footnote" href="#a2">2</a></div>
|
619
|
+
<aside id="a1">Footnote</aside>
|
620
|
+
<aside id="a2">Other Footnote</aside>'
|
621
|
+
Html2Doc.process(html_input(simple_body), filename: "test")
|
622
|
+
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
|
623
|
+
to match_fuzzy(<<~OUTPUT)
|
624
|
+
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
625
|
+
#{word_body('<div>This is a very simple
|
626
|
+
document<a class="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference">(</span><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span><span class="MsoFootnoteReference">)</span></a> allegedly<a class="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
|
627
|
+
'<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
|
628
|
+
<p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference">(</span><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span><span class="MsoFootnoteReference">)</span></a>Footnote</p></div>
|
629
|
+
<div style="mso-element:footnote" id="ftn2">
|
630
|
+
<p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
|
631
|
+
</div>')}
|
632
|
+
#{WORD_FTR1}
|
633
|
+
OUTPUT
|
634
|
+
end
|
635
|
+
|
616
636
|
it "extracts paragraphs from footnotes" do
|
617
637
|
simple_body = '<div>This is a very simple
|
618
638
|
document<a class="footnote" href="#a1">1</a> allegedly<a class="footnote" href="#a2">2</a></div>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|