html2doc 0.9.4 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dcc9db6cc57352a2e100bf1c1ff4127518d79fb67b958f52cd36580a4ab0b716
4
- data.tar.gz: 93979a73373bad8d3405ce8dd621455b54514d3e2e0017d9d91096b038405edb
3
+ metadata.gz: 6dacc967f64994c36c4566869a97848278dce24d5318417dc0e9a901bb4fa9ae
4
+ data.tar.gz: d96f332a360c7aa04b8c8d5931274737b7c31349ab28cc7e04e34785e0675169
5
5
  SHA512:
6
- metadata.gz: d5d29322e87a5ec048dfacd91fe3bb3ecc27478348062cae38cff7dfa3c30fb8fdca06a20cd248e87d50d48a483fa1e24e60bb3d195ab900fb80a65289090df4
7
- data.tar.gz: 2803cb07d404a133fd7debee942c79e95154e7eb703527c96a00b8f10a03219d36f99b414c0ef964bd3beec01aed178524b2068b8b6ce45771e5246556a769ee
6
+ metadata.gz: 6e059a12f8d8bb192b31e4d573f6a661e9091a474a6da3c2c60e81e9ee99cf45251da0f5a5badf23ebd9cd913e0ce47e35fe3b7a3ac4e0610626af6c0eba27c6
7
+ data.tar.gz: dce3f36d0006eebc92e01a12e62dc826329487ebfd6186d425561991175cf6c7057f569bb1dc49ff14a9d471dedf64c236ae60da5cc265374329a2ec1f9dc302
@@ -25,7 +25,10 @@ The gem currently does the following:
25
25
 
26
26
  * Convert any AsciiMath and MathML to Word's native mathematical formatting language, OOXML. Word supports copy-pasting MathML into Word and converting it into OOXML; however the conversion is not infallible (we have found problems with `\sum`: Word claims parameters were missing, and inserting dotted squares to indicate as much), and you may need to post-edit the OOXML.
27
27
  ** The gem does attempt to repair the MathML input, to bring it in line with Word's OOXML's expectations. If you find any issues with AsciiMath or MathML input, please raise an issue.
28
- * Identify any footnotes in the document (defined as hyperlinks with attributes `class = "Footnote"` or `epub:type = "footnote"`), and render them as Microsoft Word footnotes.
28
+ * Identify any footnotes in the document (defined as hyperlinks with attributes `class = "Footnote"` or `epub:type = "footnote"`), and render them as Microsoft Word footnotes.
29
+ ** The corresponding footnote content is any `div` or `aside` element with the same `@id` attribute as the footnote points to; e.g. `<a href="#ftn1" epub:type="footnote"><sup>3</sup></a></span>`, pointing to `<aside id="ftn3">`.
30
+ ** By default, the footnote hyperlink contents are overwritten with the autonumbering element: `<a href="#ftn1" epub:type="footnote"><sup>1</sup></a>` is replaced with `<a style='mso-footnote-id:ftn1' href='#_ftn1' name='_ftnref1' title='' id='_ftnref1'><span class='MsoFootnoteReference'><span style='mso-special-character:footnote'/></span>`
31
+ ** If the footnote hyperlink already contains (as a child) an element marked up as `<span class='MsoFootnoteReference'>`, only that span is replaced by the Microsoft autonumber element; any text surrounding it is preserved in both the footnote reference and the footnote target. For example, `<a href="#ftn1" epub:type="footnote"><span class='MsoFootnoteReference'>1</span>)</a>` will render as the footnote _1)_, both in the link and the target.
29
32
  * Resize any local images in the HTML file to fit within the maximum page size. (Word will otherwise crash on reading the document.)
30
33
  * Optionally apply list styles with predefined bullet and numbering from a Word CSS to the unordered and ordered lists in the document, restarting numbering for each ordered list.
31
34
  * Convert all lists to native Word HTML rendering (using paragraphs with `MsoListParagraphCxSpFirst, MsoListParagraphCxSpMiddle, MsoListParagraphCxSpLast` styles)
@@ -15,7 +15,7 @@ module Html2Doc
15
15
  body = docxml.at("//body")
16
16
  list = body.add_child("<div style='mso-element:footnote-list'/>")
17
17
  footnotes.each_with_index do |f, i|
18
- fn = list.first.add_child(footnote_container(i + 1))
18
+ fn = list.first.add_child(footnote_container(docxml, i + 1))
19
19
  f.parent = fn.first
20
20
  footnote_div_to_p(f)
21
21
  end
@@ -33,13 +33,16 @@ module Html2Doc
33
33
  end
34
34
  end
35
35
 
36
- def self.footnote_container(i)
36
+ FN = "<span class='MsoFootnoteReference'>"\
37
+ "<span style='mso-special-character:footnote'/></span>".freeze
38
+
39
+ def self.footnote_container(docxml, i)
40
+ ref = docxml&.at("//a[@href='#_ftn#{i}']")&.children&.to_xml(indent: 0).
41
+ gsub(/>\n</, "><") || FN
37
42
  <<~DIV
38
43
  <div style='mso-element:footnote' id='ftn#{i}'>
39
44
  <a style='mso-footnote-id:ftn#{i}' href='#_ftn#{i}'
40
- name='_ftnref#{i}' title='' id='_ftnref#{i}'><span
41
- class='MsoFootnoteReference'><span
42
- style='mso-special-character:footnote'></span></span></div>
45
+ name='_ftnref#{i}' title='' id='_ftnref#{i}'>#{ref.strip}</a></div>
43
46
  DIV
44
47
  end
45
48
 
@@ -49,8 +52,17 @@ module Html2Doc
49
52
  note = docxml.at("//*[@name = '#{href}' or @id = '#{href}']")
50
53
  return false if note.nil?
51
54
  set_footnote_link_attrs(a, i)
52
- a.children = "<span class='MsoFootnoteReference'>"\
53
- "<span style='mso-special-character:footnote'/></span>"
55
+ if a.at("./span[@class = 'MsoFootnoteReference']")
56
+ a.children.each do |c|
57
+ if c.name == "span" and c["class"] == "MsoFootnoteReference"
58
+ c.replace(FN)
59
+ else
60
+ c.wrap("<span class='MsoFootnoteReference'></span>")
61
+ end
62
+ end
63
+ else
64
+ a.children = FN
65
+ end
54
66
  fn << transform_footnote_text(note)
55
67
  end
56
68
 
@@ -1,3 +1,3 @@
1
1
  module Html2Doc
2
- VERSION = "0.9.4".freeze
2
+ VERSION = "1.0.0".freeze
3
3
  end
@@ -613,6 +613,26 @@ RSpec.describe Html2Doc do
613
613
  OUTPUT
614
614
  end
615
615
 
616
+ it "processes footnotes with text wrapping the footnote reference" do
617
+ simple_body = '<div>This is a very simple
618
+ document<a class="footnote" href="#a1">(<span class="MsoFootnoteReference">1</span>)</a> allegedly<a class="footnote" href="#a2">2</a></div>
619
+ <aside id="a1">Footnote</aside>
620
+ <aside id="a2">Other Footnote</aside>'
621
+ Html2Doc.process(html_input(simple_body), filename: "test")
622
+ expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
623
+ to match_fuzzy(<<~OUTPUT)
624
+ #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
625
+ #{word_body('<div>This is a very simple
626
+ document<a class="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference">(</span><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span><span class="MsoFootnoteReference">)</span></a> allegedly<a class="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
627
+ '<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
628
+ <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference">(</span><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span><span class="MsoFootnoteReference">)</span></a>Footnote</p></div>
629
+ <div style="mso-element:footnote" id="ftn2">
630
+ <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
631
+ </div>')}
632
+ #{WORD_FTR1}
633
+ OUTPUT
634
+ end
635
+
616
636
  it "extracts paragraphs from footnotes" do
617
637
  simple_body = '<div>This is a very simple
618
638
  document<a class="footnote" href="#a1">1</a> allegedly<a class="footnote" href="#a2">2</a></div>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2doc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.4
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-30 00:00:00.000000000 Z
11
+ date: 2020-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities