html2doc 0.9.3 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 34f2ebe1b5fb0faf57bcb2b73c63ca4d4f4916f0f5daa47d5ec50ca69238ca3e
4
- data.tar.gz: 16479f56aba64e5c2839546720fe6f2d998d24db42009b3849c788d111ca09d2
3
+ metadata.gz: 8aa0a8504ff535fa43595e3839d7de18fbb8a7cbb3b2ed303d1965656814dfe3
4
+ data.tar.gz: a56eb2333cd28420e7f8001f37edc9db5dc9b07f21eed192b02c35184e45c7af
5
5
  SHA512:
6
- metadata.gz: '09cb8b389a2d70dced3d4660b344b7cbc64409017404482c105a2f6f25daa528e27627f4c52b69a74a959b535d87f5d14822ad177156a01232af5466f55d8eeb'
7
- data.tar.gz: 66b314e12b36f509bcdfade0e24bae06ed5333af57f956c22840a2a1c3b8c8172a891ef30d91072a8e8b18a2e2e34c45146473bcc3e235cb7614a47d8c92a1dd
6
+ metadata.gz: '01291816da883daad14800ad3cdb41bc611c5698c9bec518c52fd0a67f133f8fd367d1b129ceb8827f214834fb970a5bbaa2f4e7d4af404bd221aca2dec61e78'
7
+ data.tar.gz: fd8119ee883d255d10e9882f3541adae2f57c9631dcec451dde230d252e101096afe0b3dd090ebe8cf31cc45f2339e452c3d55f08ac13110c1b41f11faac8ba5
@@ -1,17 +1,28 @@
1
- # Auto-generated !!! Do not edit it manually
2
- # use ci-master https://github.com/metanorma/metanorma-build-scripts
1
+ # Auto-generated by Cimas: Do not edit it manually!
2
+ # See https://github.com/metanorma/cimas
3
3
  name: macos
4
4
 
5
- on: [push, pull_request]
5
+ on:
6
+ push:
7
+ branches: [ master ]
8
+ pull_request:
9
+ paths-ignore:
10
+ - .github/workflows/ubuntu.yml
11
+ - .github/workflows/windows.yml
6
12
 
7
13
  jobs:
8
14
  test-macos:
9
15
  name: Test on Ruby ${{ matrix.ruby }} macOS
10
16
  runs-on: macos-latest
17
+ continue-on-error: ${{ matrix.experimental }}
11
18
  strategy:
12
19
  fail-fast: false
13
20
  matrix:
14
21
  ruby: [ '2.6', '2.5', '2.4' ]
22
+ experimental: [false]
23
+ include:
24
+ - ruby: '2.7'
25
+ experimental: true
15
26
  steps:
16
27
  - uses: actions/checkout@master
17
28
  - name: Use Ruby
@@ -1,17 +1,30 @@
1
- # Auto-generated !!! Do not edit it manually
2
- # use ci-master https://github.com/metanorma/metanorma-build-scripts
1
+ # Auto-generated by Cimas: Do not edit it manually!
2
+ # See https://github.com/metanorma/cimas
3
3
  name: ubuntu
4
4
 
5
- on: [push, pull_request]
5
+ on:
6
+ push:
7
+ branches: [ master ]
8
+ tags:
9
+ - '*'
10
+ pull_request:
11
+ paths-ignore:
12
+ - .github/workflows/macos.yml
13
+ - .github/workflows/windows.yml
6
14
 
7
15
  jobs:
8
16
  test-linux:
9
17
  name: Test on Ruby ${{ matrix.ruby }} Ubuntu
10
18
  runs-on: ubuntu-latest
19
+ continue-on-error: ${{ matrix.experimental }}
11
20
  strategy:
12
21
  fail-fast: false
13
22
  matrix:
14
23
  ruby: [ '2.6', '2.5', '2.4' ]
24
+ experimental: [false]
25
+ include:
26
+ - ruby: '2.7'
27
+ experimental: true
15
28
  steps:
16
29
  - uses: actions/checkout@master
17
30
  - name: Use Ruby
@@ -21,8 +34,20 @@ jobs:
21
34
  architecture: 'x64'
22
35
  - name: Update gems
23
36
  run: |
24
- gem install bundler
37
+ gem install bundler
25
38
  bundle install --jobs 4 --retry 3
26
39
  - name: Run specs
27
40
  run: |
28
41
  bundle exec rake
42
+ - name: Trigger dependent repositories
43
+ if: github.ref == 'refs/heads/master' && matrix.ruby == '2.6'
44
+ env:
45
+ GH_USERNAME: ${{ secrets.PAT_USERNAME }}
46
+ GH_ACCESS_TOKEN: ${{ secrets.PAT_TOKEN }}
47
+ run: |
48
+ curl -LO --retry 3 https://raw.githubusercontent.com/metanorma/metanorma-build-scripts/master/trigger-gh-actions.sh
49
+ [[ -f ".github/workflows/dependent_repos.env" ]] && source .github/workflows/dependent_repos.env
50
+ for repo in $DEPENDENT_REPOS
51
+ do
52
+ sh trigger-gh-actions.sh $ORGANISATION $repo $GH_USERNAME $GH_ACCESS_TOKEN $GITHUB_REPOSITORY "{ \"ref\": \"${GITHUB_REF}\" }"
53
+ done
@@ -1,17 +1,28 @@
1
- # Auto-generated !!! Do not edit it manually
2
- # use ci-master https://github.com/metanorma/metanorma-build-scripts
1
+ # Auto-generated by Cimas: Do not edit it manually!
2
+ # See https://github.com/metanorma/cimas
3
3
  name: windows
4
4
 
5
- on: [push, pull_request]
5
+ on:
6
+ push:
7
+ branches: [ master ]
8
+ pull_request:
9
+ paths-ignore:
10
+ - .github/workflows/macos.yml
11
+ - .github/workflows/ubuntu.yml
6
12
 
7
13
  jobs:
8
14
  test-windows:
9
15
  name: Test on Ruby ${{ matrix.ruby }} Windows
10
16
  runs-on: windows-latest
17
+ continue-on-error: ${{ matrix.experimental }}
11
18
  strategy:
12
19
  fail-fast: false
13
20
  matrix:
14
21
  ruby: [ '2.6', '2.5', '2.4' ]
22
+ experimental: [false]
23
+ include:
24
+ - ruby: '2.7'
25
+ experimental: true
15
26
  steps:
16
27
  - uses: actions/checkout@master
17
28
  - name: Use Ruby
@@ -22,9 +33,8 @@ jobs:
22
33
  - name: Update gems
23
34
  shell: pwsh
24
35
  run: |
25
- gem install bundler
36
+ gem install bundler
26
37
  bundle config --local path vendor/bundle
27
- bundle update
28
38
  bundle install --jobs 4 --retry 3
29
39
  - name: Run specs
30
40
  run: |
@@ -3,8 +3,9 @@
3
3
  https://github.com/metanorma/html2doc/workflows/main/badge.svg
4
4
 
5
5
  image:https://img.shields.io/gem/v/html2doc.svg["Gem Version", link="https://rubygems.org/gems/html2doc"]
6
- image:https://travis-ci.com/metanorma/html2doc.svg["Build Status", link="https://travis-ci.com/metanorma/html2doc"]
7
- image:https://ci.appveyor.com/api/projects/status/aspj42o70q3dnkf1?svg=true["Appveyor Build Status", link="https://ci.appveyor.com/project/metanorma/html2doc"]
6
+ image:https://github.com/metanorma/html2doc/workflows/ubuntu/badge.svg["Ubuntu Build Status", link="https://github.com/metanorma/html2doc/actions?query=workflow%3Aubuntu"]
7
+ image:https://github.com/metanorma/html2doc/workflows/macos/badge.svg["OSX Build Status", link="https://github.com/metanorma/html2doc/actions?query=workflow%3Amacos"]
8
+ image:https://github.com/metanorma/html2doc/workflows/windows/badge.svg["Windows Build Status", link="https://github.com/metanorma/html2doc/actions?query=workflow%3Awindows"]
8
9
  image:https://codeclimate.com/github/metanorma/html2doc/badges/gpa.svg["Code Climate", link="https://codeclimate.com/github/metanorma/html2doc"]
9
10
  image:https://img.shields.io/github/issues-pr-raw/metanorma/html2doc.svg["Pull Requests", link="https://github.com/metanorma/html2doc/pulls"]
10
11
  image:https://img.shields.io/github/commits-since/metanorma/html2doc/latest.svg["Commits since latest",link="https://github.com/metanorma/html2doc/releases"]
@@ -23,9 +24,12 @@ This work is driven by the Word document generation procedure documented in http
23
24
 
24
25
  The gem currently does the following:
25
26
 
26
- * Convert any AsciiMath and MathML to Word's native mathematical formatting language, OOXML. Word supports copy-pasting MathML into Word and converting it into OOXML; however the conversion is not infallible (we have found problems with `\sum`: Word claims parameters were missing, and inserting dotted squares to indicate as much), and you may need to post-edit the OOXML.
27
+ * Convert any AsciiMath and MathML to Word's native mathematical formatting language, OOXML. Word supports copy-pasting MathML into Word and converting it into OOXML; however the conversion is not infallible (we have in the past found problems with `\sum`: Word claims parameters were missing, and inserting dotted squares to indicate as much), and you may need to post-edit the OOXML.
27
28
  ** The gem does attempt to repair the MathML input, to bring it in line with Word's OOXML's expectations. If you find any issues with AsciiMath or MathML input, please raise an issue.
28
- * Identify any footnotes in the document (defined as hyperlinks with attributes `class = "Footnote"` or `epub:type = "footnote"`), and render them as Microsoft Word footnotes.
29
+ * Identify any footnotes in the document (defined as hyperlinks with attributes `class = "Footnote"` or `epub:type = "footnote"`), and render them as Microsoft Word footnotes.
30
+ ** The corresponding footnote content is any `div` or `aside` element with the same `@id` attribute as the footnote points to; e.g. `<a href="#ftn1" epub:type="footnote"><sup>3</sup></a></span>`, pointing to `<aside id="ftn3">`.
31
+ ** By default, the footnote hyperlink contents are overwritten with the autonumbering element: `<a href="#ftn1" epub:type="footnote"><sup>1</sup></a>` is replaced with `<a style='mso-footnote-id:ftn1' href='#_ftn1' name='_ftnref1' title='' id='_ftnref1'><span class='MsoFootnoteReference'><span style='mso-special-character:footnote'/></span>`
32
+ ** If the footnote hyperlink already contains (as a child) an element marked up as `<span class='MsoFootnoteReference'>`, only that span is replaced by the Microsoft autonumber element; any text surrounding it is preserved in both the footnote reference and the footnote target. For example, `<a href="#ftn1" epub:type="footnote"><span class='MsoFootnoteReference'>1</span>)</a>` will render as the footnote _1)_, both in the link and the target.
29
33
  * Resize any local images in the HTML file to fit within the maximum page size. (Word will otherwise crash on reading the document.)
30
34
  * Optionally apply list styles with predefined bullet and numbering from a Word CSS to the unordered and ordered lists in the document, restarting numbering for each ordered list.
31
35
  * Convert all lists to native Word HTML rendering (using paragraphs with `MsoListParagraphCxSpFirst, MsoListParagraphCxSpMiddle, MsoListParagraphCxSpLast` styles)
@@ -113,7 +117,7 @@ The bad news is that Word's understanding of HTML is HTML 4. In order for bookma
113
117
 
114
118
  The good news with generating a Word document via HTML is that Word understands CSS, and you can determine much of what the Word document looks like by manipulating that CSS. That extends to features that are not part of HTML CSS: if you want to work out how to get Word to do something in CSS, save a Word document that already does what you want as HTML, and inspect the HTML and CSS you get.
115
119
 
116
- The bad news is that Word's implementation of CSS is poorly documented -- even if Office HTML is documented in a 1300 page document (online at https://stigmortenmyre.no/mso/, https://www.rodriguezcommaj.com/assets/resources/microsoft-office-html-and-xml-reference.pdf), and the CSS selectors are only partially and selectively implemented. For list styles, for example, `mso-level-text` governs how the list label is displayed; but it is only recognised in a `@list` style: it is ignored in a CSS rule like `ol li`, or in a `style` attribute on a node. Working out the right CSS for what you want will take some trial and error, and you are better placed to try to do things Word's way than the right way.
120
+ The bad news is that Word's implementation of CSS is poorly documented -- even if Office HTML is documented in a 1300 page document (online at https://stigmortenmyre.no/mso/, https://www.rodriguezcommaj.com/assets/resources/microsoft-office-html-and-xml-reference.pdf), and the CSS selectors are only partially and selectively implemented. For list styles, for example, `mso-level-text` governs how the list label is displayed; but it is only recognised in a `@list` style: it is ignored in a CSS rule like `ol li`, or in a `style` attribute on a node. CSS selectors only support classes, in ancestor relations: `p.class1 ol.class2` is supported, but `#id1` is not, and neither is `p > ol`. Working out the right CSS for what you want will take some trial and error, and you are better placed to try to do things Word's way than the right way.
117
121
 
118
122
  === XSLT
119
123
 
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
31
31
  spec.add_dependency "nokogiri", ">= 1.10.4"
32
32
  spec.add_dependency "thread_safe"
33
33
  spec.add_dependency "uuidtools"
34
- spec.add_dependency "asciimath", "~> 1.0.9"
34
+ spec.add_dependency "asciimath", "~> 2.0.0"
35
35
 
36
36
  spec.add_development_dependency "byebug", "~> 9.1"
37
37
  spec.add_development_dependency "equivalent-xml", "~> 0.6"
@@ -2,7 +2,7 @@ require "uuidtools"
2
2
  require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
- require "pp"
5
+ require "uuidtools"
6
6
 
7
7
  module Html2Doc
8
8
  def self.style_list(li, level, liststyle, listnumber)
@@ -15,13 +15,8 @@ module Html2Doc
15
15
  li["style"] += "mso-list:#{liststyle} level#{level} lfo#{listnumber};"
16
16
  end
17
17
 
18
- def self.list_add(xpath, liststyles, listtype, level)
19
- xpath.each_with_index do |list, i|
20
- @listnumber += 1 if level == 1
21
- list["seen"] = true if level == 1
22
- (list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |li|
23
- style_list(li, level, liststyles[listtype], @listnumber)
24
- if [:ul, :ol].include? listtype
18
+ def self.list_add1(li, liststyles, listtype, level)
19
+ if [:ul, :ol].include? listtype
25
20
  list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
26
21
  liststyles, :ul, level + 1)
27
22
  list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
@@ -32,6 +27,20 @@ module Html2Doc
32
27
  list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
33
28
  liststyles, listtype, level + 1)
34
29
  end
30
+ end
31
+
32
+ def self.list_add(xpath, liststyles, listtype, level)
33
+ xpath.each_with_index do |list, i|
34
+ @listnumber += 1 if level == 1
35
+ list["seen"] = true if level == 1
36
+ list["id"] ||= UUIDTools::UUID.random_create
37
+ (list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |li|
38
+ style_list(li, level, liststyles[listtype], @listnumber)
39
+ list_add1(li, liststyles, listtype, level)
40
+ end
41
+ list.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{list['id']}')] | "\
42
+ ".//ol[not(ancestor::li/ancestor::*/@id = '#{list['id']}')]").each do |li|
43
+ list_add1(li.parent, liststyles, listtype, level-1)
35
44
  end
36
45
  end
37
46
  end
@@ -68,6 +68,7 @@ module Html2Doc
68
68
  realSize = ImageSize.path(path).size
69
69
  s = [i["width"].to_i, i["height"].to_i]
70
70
  s = realSize if s[0].zero? && s[1].zero?
71
+ return [nil, nil] if realSize[0].nil? || realSize[1].nil?
71
72
  s[1] = s[0] * realSize[1] / realSize[0] if s[1].zero? && !s[0].zero?
72
73
  s[0] = s[1] * realSize[0] / realSize[1] if s[0].zero? && !s[1].zero?
73
74
  s = [(s[0] * maxheight / s[1]).ceil, maxheight] if s[1] > maxheight
@@ -87,10 +88,9 @@ module Html2Doc
87
88
 
88
89
  # only processes locally stored images
89
90
  def self.image_cleanup(docxml, dir, localdir)
90
- #docxml.xpath(IMAGE_PATH).each do |i|
91
91
  docxml.traverse do |i|
92
92
  next unless i.element? && %w(img v:imagedata).include?(i.name)
93
- warnsvg(i["src"])
93
+ #warnsvg(i["src"])
94
94
  next if /^http/.match i["src"]
95
95
  next if %r{^data:image/[^;]+;base64}.match i["src"]
96
96
  local_filename = %r{^([A-Z]:)?/}.match(i["src"]) ? i["src"] :
@@ -115,12 +115,12 @@ module Html2Doc
115
115
  if a.size == 2 && !(/ src="https?:/.match a[1]) &&
116
116
  !(%r{ src="data:image/[^;]+;base64}.match a[1])
117
117
  m = / src=['"](?<src>[^"']+)['"]/.match a[1]
118
- warnsvg(m[:src])
118
+ #warnsvg(m[:src])
119
119
  m2 = /\.(?<suffix>[a-zA-Z_0-9]+)$/.match m[:src]
120
- new_filename = "file:///C:/Doc/#{filename}_files/#{mkuuid}.#{m2[:suffix]}"
120
+ new_filename = "#{mkuuid}.#{m2[:suffix]}"
121
121
  old_filename = %r{^([A-Z]:)?/}.match(m[:src]) ? m[:src] : File.join(localdir, m[:src])
122
- FileUtils.cp old_filename, File.join(dir, "#{mkuuid}.#{m2[:suffix]}")
123
- a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='#{new_filename}'")
122
+ FileUtils.cp old_filename, File.join(dir, new_filename)
123
+ a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='file:///C:/Doc/#{filename}_files/#{new_filename}'")
124
124
  end
125
125
  a.join
126
126
  end
@@ -15,7 +15,7 @@ module Html2Doc
15
15
  body = docxml.at("//body")
16
16
  list = body.add_child("<div style='mso-element:footnote-list'/>")
17
17
  footnotes.each_with_index do |f, i|
18
- fn = list.first.add_child(footnote_container(i + 1))
18
+ fn = list.first.add_child(footnote_container(docxml, i + 1))
19
19
  f.parent = fn.first
20
20
  footnote_div_to_p(f)
21
21
  end
@@ -33,13 +33,16 @@ module Html2Doc
33
33
  end
34
34
  end
35
35
 
36
- def self.footnote_container(i)
36
+ FN = "<span class='MsoFootnoteReference'>"\
37
+ "<span style='mso-special-character:footnote'/></span>".freeze
38
+
39
+ def self.footnote_container(docxml, i)
40
+ ref = docxml&.at("//a[@href='#_ftn#{i}']")&.children&.to_xml(indent: 0).
41
+ gsub(/>\n</, "><") || FN
37
42
  <<~DIV
38
43
  <div style='mso-element:footnote' id='ftn#{i}'>
39
44
  <a style='mso-footnote-id:ftn#{i}' href='#_ftn#{i}'
40
- name='_ftnref#{i}' title='' id='_ftnref#{i}'><span
41
- class='MsoFootnoteReference'><span
42
- style='mso-special-character:footnote'></span></span></div>
45
+ name='_ftnref#{i}' title='' id='_ftnref#{i}'>#{ref.strip}</a></div>
43
46
  DIV
44
47
  end
45
48
 
@@ -49,8 +52,17 @@ module Html2Doc
49
52
  note = docxml.at("//*[@name = '#{href}' or @id = '#{href}']")
50
53
  return false if note.nil?
51
54
  set_footnote_link_attrs(a, i)
52
- a.children = "<span class='MsoFootnoteReference'>"\
53
- "<span style='mso-special-character:footnote'/></span>"
55
+ if a.at("./span[@class = 'MsoFootnoteReference']")
56
+ a.children.each do |c|
57
+ if c.name == "span" and c["class"] == "MsoFootnoteReference"
58
+ c.replace(FN)
59
+ else
60
+ c.wrap("<span class='MsoFootnoteReference'></span>")
61
+ end
62
+ end
63
+ else
64
+ a.children = FN
65
+ end
54
66
  fn << transform_footnote_text(note)
55
67
  end
56
68
 
@@ -1,3 +1,3 @@
1
1
  module Html2Doc
2
- VERSION = "0.9.3".freeze
2
+ VERSION = "1.0.3".freeze
3
3
  end
@@ -372,11 +372,11 @@ RSpec.describe Html2Doc do
372
372
  expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
373
373
  to match_fuzzy(<<~OUTPUT)
374
374
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
375
- #{word_body("
375
+ #{word_body(%{
376
376
  <div><m:oMath>
377
- #{ASCII_MATH}<m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>\"integer\"</m:t></m:r>
377
+ <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=1</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary><m:r><m:t>=</m:t></m:r><m:sSup><m:e><m:r><m:t>(</m:t></m:r><m:f><m:fPr><m:type m:val="bar"></m:type></m:fPr><m:num><m:r><m:t>n</m:t></m:r><m:r><m:t>(</m:t></m:r><m:r><m:t>n+1</m:t></m:r><m:r><m:t>)</m:t></m:r></m:num><m:den><m:r><m:t>2</m:t></m:r></m:den></m:f><m:r><m:t>)</m:t></m:r></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:sSup><m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>"integer"</m:t></m:r><m:r><m:t>)</m:t></m:r>
378
378
  </m:oMath>
379
- </div>", '<div style="mso-element:footnote-list"/>')}
379
+ </div>}, '<div style="mso-element:footnote-list"/>')}
380
380
  #{WORD_FTR1}
381
381
  OUTPUT
382
382
  end
@@ -388,7 +388,7 @@ RSpec.describe Html2Doc do
388
388
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
389
389
  #{word_body("
390
390
  <div><m:oMath>
391
- <m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>integer</m:t></m:r>
391
+ <m:r><m:t>text</m:t></m:r><m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>integer</m:t></m:r><m:r><m:t>)</m:t></m:r>
392
392
  </m:oMath>
393
393
  </div>", '<div style="mso-element:footnote-list"/>')}
394
394
  #{WORD_FTR1}
@@ -400,11 +400,11 @@ RSpec.describe Html2Doc do
400
400
  expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
401
401
  to match_fuzzy(<<~OUTPUT)
402
402
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
403
- #{word_body("
404
- <div style=\"text-align:left;\"><m:oMathPara><m:oMathParaPr><m:jc m:val=\"left\"/></m:oMathParaPr><m:oMath>
405
- #{ASCII_MATH}
403
+ #{word_body(%{
404
+ <div style="text-align:left;"><m:oMathPara><m:oMathParaPr><m:jc m:val="left"/></m:oMathParaPr><m:oMath>
405
+ <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=1</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary><m:r><m:t>=</m:t></m:r><m:sSup><m:e><m:r><m:t>(</m:t></m:r><m:f><m:fPr><m:type m:val="bar"></m:type></m:fPr><m:num><m:r><m:t>n</m:t></m:r><m:r><m:t>(</m:t></m:r><m:r><m:t>n+1</m:t></m:r><m:r><m:t>)</m:t></m:r></m:num><m:den><m:r><m:t>2</m:t></m:r></m:den></m:f><m:r><m:t>)</m:t></m:r></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:sSup>
406
406
  </m:oMath>
407
- </m:oMathPara></div>", '<div style="mso-element:footnote-list"/>')}
407
+ </m:oMathPara></div>}, '<div style="mso-element:footnote-list"/>')}
408
408
  #{WORD_FTR1}
409
409
  OUTPUT
410
410
  end
@@ -414,11 +414,11 @@ RSpec.describe Html2Doc do
414
414
  expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
415
415
  to match_fuzzy(<<~OUTPUT)
416
416
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
417
- #{word_body("
418
- <div style=\"text-align:right;\"><m:oMathPara><m:oMathParaPr><m:jc m:val=\"right\"/></m:oMathParaPr><m:oMath>
419
- #{ASCII_MATH}
417
+ #{word_body(%{
418
+ <div style="text-align:right;"><m:oMathPara><m:oMathParaPr><m:jc m:val="right"/></m:oMathParaPr><m:oMath>
419
+ <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=1</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary><m:r><m:t>=</m:t></m:r><m:sSup><m:e><m:r><m:t>(</m:t></m:r><m:f><m:fPr><m:type m:val="bar"></m:type></m:fPr><m:num><m:r><m:t>n</m:t></m:r><m:r><m:t>(</m:t></m:r><m:r><m:t>n+1</m:t></m:r><m:r><m:t>)</m:t></m:r></m:num><m:den><m:r><m:t>2</m:t></m:r></m:den></m:f><m:r><m:t>)</m:t></m:r></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:sSup>
420
420
  </m:oMath>
421
- </m:oMathPara></div>", '<div style="mso-element:footnote-list"/>')}
421
+ </m:oMathPara></div>}, '<div style="mso-element:footnote-list"/>')}
422
422
  #{WORD_FTR1}
423
423
  OUTPUT
424
424
  end
@@ -566,10 +566,12 @@ RSpec.describe Html2Doc do
566
566
  OUTPUT
567
567
  end
568
568
 
569
+ =begin
569
570
  it "warns about SVG" do
570
571
  simple_body = '<img src="https://example.com/19160-6.svg">'
571
572
  expect{ Html2Doc.process(html_input(simple_body), filename: "test") }.to output("https://example.com/19160-6.svg: SVG not supported\n").to_stderr
572
573
  end
574
+ =end
573
575
 
574
576
  it "processes epub:type footnotes" do
575
577
  simple_body = '<div>This is a very simple
@@ -611,6 +613,26 @@ RSpec.describe Html2Doc do
611
613
  OUTPUT
612
614
  end
613
615
 
616
+ it "processes footnotes with text wrapping the footnote reference" do
617
+ simple_body = '<div>This is a very simple
618
+ document<a class="footnote" href="#a1">(<span class="MsoFootnoteReference">1</span>)</a> allegedly<a class="footnote" href="#a2">2</a></div>
619
+ <aside id="a1">Footnote</aside>
620
+ <aside id="a2">Other Footnote</aside>'
621
+ Html2Doc.process(html_input(simple_body), filename: "test")
622
+ expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
623
+ to match_fuzzy(<<~OUTPUT)
624
+ #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
625
+ #{word_body('<div>This is a very simple
626
+ document<a class="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference">(</span><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span><span class="MsoFootnoteReference">)</span></a> allegedly<a class="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
627
+ '<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
628
+ <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference">(</span><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span><span class="MsoFootnoteReference">)</span></a>Footnote</p></div>
629
+ <div style="mso-element:footnote" id="ftn2">
630
+ <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
631
+ </div>')}
632
+ #{WORD_FTR1}
633
+ OUTPUT
634
+ end
635
+
614
636
  it "extracts paragraphs from footnotes" do
615
637
  simple_body = '<div>This is a very simple
616
638
  document<a class="footnote" href="#a1">1</a> allegedly<a class="footnote" href="#a2">2</a></div>
@@ -633,15 +655,16 @@ RSpec.describe Html2Doc do
633
655
 
634
656
  it "labels lists with list styles" do
635
657
  simple_body = <<~BODY
636
- <div><ul>
637
- <li><div><p><ol><li><ul><li><p><ol><li><ol><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
658
+ <div><ul id="0">
659
+ <li><div><p><ol id="1"><li><ul id="2"><li><p><ol id="3"><li><ol id="4"><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li><div><ul id="5"><li>C</li></ul></div>
638
660
  BODY
639
661
  Html2Doc.process(html_input(simple_body), filename: "test", liststyles: {ul: "l1", ol: "l2"})
640
662
  expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
641
663
  to match_fuzzy(<<~OUTPUT)
642
664
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
643
665
  #{word_body('<div>
644
- <p style="mso-list:l1 level1 lfo1;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p></div>',
666
+ <p style="mso-list:l1 level1 lfo1;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p><div><p style="mso-list:l1 level1 lfo2;" class="MsoListParagraphCxSpFirst">C</p></div>
667
+ </div>',
645
668
  '<div style="mso-element:footnote-list"/>')}
646
669
  #{WORD_FTR1}
647
670
  OUTPUT
@@ -651,8 +674,8 @@ RSpec.describe Html2Doc do
651
674
  it "restarts numbering of lists with list styles" do
652
675
  simple_body = <<~BODY
653
676
  <div>
654
- <ol><li><div><p><ol><li><ul><li><p><ol><li><ol><li>A</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ol>
655
- <ol><li><div><p><ol><li><ul><li><p><ol><li><ol><li>A</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ol></div>
677
+ <ol id="1"><li><div><p><ol id="2"><li><ul id="3"><li><p><ol id="4"><li><ol id="5"><li>A</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ol>
678
+ <ol id="6"><li><div><p><ol id="7"><li><ul id="8"><li><p><ol id="9"><li><ol id="10"><li>A</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ol></div>
656
679
  BODY
657
680
  Html2Doc.process(html_input(simple_body), filename: "test", liststyles: {ul: "l1", ol: "l2"})
658
681
  expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
@@ -668,12 +691,12 @@ RSpec.describe Html2Doc do
668
691
 
669
692
  it "labels lists with multiple list styles" do
670
693
  simple_body = <<~BODY
671
- <div><ul class="steps">
672
- <li><div><p><ol><li><ul><li><p><ol><li><ol><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
673
- <div><ul>
674
- <li><div><p><ol><li><ul><li><p><ol><li><ol><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
675
- <div><ul class="other">
676
- <li><div><p><ol><li><ul><li><p><ol><li><ol><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
694
+ <div><ul class="steps" id="0">
695
+ <li><div><p><ol id="1"><li><ul id="2"><li><p><ol id="3"><li><ol id="4"><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
696
+ <div><ul id="5">
697
+ <li><div><p><ol id="6"><li><ul id="7"><li><p><ol id="8"><li><ol id="9"><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
698
+ <div><ul class="other" id="10">
699
+ <li><div><p><ol id="11"><li><ul id="12"><li><p><ol id="13"><li><ol id="14"><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
677
700
  BODY
678
701
  Html2Doc.process(html_input(simple_body), filename: "test", liststyles: {ul: "l1", ol: "l2", steps: "l3"})
679
702
  expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2doc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.3
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-12-24 00:00:00.000000000 Z
11
+ date: 2020-06-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 1.0.9
103
+ version: 2.0.0
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 1.0.9
110
+ version: 2.0.0
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: byebug
113
113
  requirement: !ruby/object:Gem::Requirement