html2doc 0.9.2 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/macos.yml +16 -4
- data/.github/workflows/ubuntu.yml +30 -4
- data/.github/workflows/windows.yml +16 -5
- data/README.adoc +9 -5
- data/html2doc.gemspec +0 -1
- data/lib/html2doc/lists.rb +17 -8
- data/lib/html2doc/mime.rb +7 -7
- data/lib/html2doc/notes.rb +19 -7
- data/lib/html2doc/version.rb +1 -1
- data/spec/html2doc_spec.rb +35 -12
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71671aea1d7e1f303519cf52499554f61112070aa7a5a4948d5f9d9e17c6804c
|
4
|
+
data.tar.gz: ca43f784656643e6f4bdde934f0001d055df50ff020a6565d69eef75cb0c0fdc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6d7f5861a22b9daa040f1bcf389a4f781bb1ee3c1fc7329c29aac648e9322501d1342450ce5c1a4ab98dc45b2dbadcd28eb2b045994eadbd0df1f396c4569ff1
|
7
|
+
data.tar.gz: ef1be050d303a834643f659f158c12e1e297618f7590bc2731e8476cdeced08018600539dcc81c558a60f139a25f7ea28af8bc3ba5de17fb97dd3e223f47f4f8
|
data/.github/workflows/macos.yml
CHANGED
@@ -1,16 +1,28 @@
|
|
1
|
-
# Auto-generated
|
2
|
-
#
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
+
# See https://github.com/metanorma/cimas
|
3
3
|
name: macos
|
4
4
|
|
5
|
-
on:
|
5
|
+
on:
|
6
|
+
push:
|
7
|
+
branches: [ master ]
|
8
|
+
pull_request:
|
9
|
+
paths-ignore:
|
10
|
+
- .github/workflows/ubuntu.yml
|
11
|
+
- .github/workflows/windows.yml
|
6
12
|
|
7
13
|
jobs:
|
8
14
|
test-macos:
|
9
15
|
name: Test on Ruby ${{ matrix.ruby }} macOS
|
10
16
|
runs-on: macos-latest
|
17
|
+
continue-on-error: ${{ matrix.experimental }}
|
11
18
|
strategy:
|
19
|
+
fail-fast: false
|
12
20
|
matrix:
|
13
21
|
ruby: [ '2.6', '2.5', '2.4' ]
|
22
|
+
experimental: [false]
|
23
|
+
include:
|
24
|
+
- ruby: '2.7'
|
25
|
+
experimental: true
|
14
26
|
steps:
|
15
27
|
- uses: actions/checkout@master
|
16
28
|
- name: Use Ruby
|
@@ -20,7 +32,7 @@ jobs:
|
|
20
32
|
architecture: 'x64'
|
21
33
|
- name: Update gems
|
22
34
|
run: |
|
23
|
-
sudo gem install bundler
|
35
|
+
sudo gem install bundler --force
|
24
36
|
bundle install --jobs 4 --retry 3
|
25
37
|
- name: Run specs
|
26
38
|
run: |
|
@@ -1,16 +1,30 @@
|
|
1
|
-
# Auto-generated
|
2
|
-
#
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
+
# See https://github.com/metanorma/cimas
|
3
3
|
name: ubuntu
|
4
4
|
|
5
|
-
on:
|
5
|
+
on:
|
6
|
+
push:
|
7
|
+
branches: [ master ]
|
8
|
+
tags:
|
9
|
+
- '*'
|
10
|
+
pull_request:
|
11
|
+
paths-ignore:
|
12
|
+
- .github/workflows/macos.yml
|
13
|
+
- .github/workflows/windows.yml
|
6
14
|
|
7
15
|
jobs:
|
8
16
|
test-linux:
|
9
17
|
name: Test on Ruby ${{ matrix.ruby }} Ubuntu
|
10
18
|
runs-on: ubuntu-latest
|
19
|
+
continue-on-error: ${{ matrix.experimental }}
|
11
20
|
strategy:
|
21
|
+
fail-fast: false
|
12
22
|
matrix:
|
13
23
|
ruby: [ '2.6', '2.5', '2.4' ]
|
24
|
+
experimental: [false]
|
25
|
+
include:
|
26
|
+
- ruby: '2.7'
|
27
|
+
experimental: true
|
14
28
|
steps:
|
15
29
|
- uses: actions/checkout@master
|
16
30
|
- name: Use Ruby
|
@@ -20,8 +34,20 @@ jobs:
|
|
20
34
|
architecture: 'x64'
|
21
35
|
- name: Update gems
|
22
36
|
run: |
|
23
|
-
gem install bundler
|
37
|
+
gem install bundler
|
24
38
|
bundle install --jobs 4 --retry 3
|
25
39
|
- name: Run specs
|
26
40
|
run: |
|
27
41
|
bundle exec rake
|
42
|
+
- name: Trigger dependent repositories
|
43
|
+
if: github.ref == 'refs/heads/master' && matrix.ruby == '2.6'
|
44
|
+
env:
|
45
|
+
GH_USERNAME: ${{ secrets.PAT_USERNAME }}
|
46
|
+
GH_ACCESS_TOKEN: ${{ secrets.PAT_TOKEN }}
|
47
|
+
run: |
|
48
|
+
curl -LO --retry 3 https://raw.githubusercontent.com/metanorma/metanorma-build-scripts/master/trigger-gh-actions.sh
|
49
|
+
[[ -f ".github/workflows/dependent_repos.env" ]] && source .github/workflows/dependent_repos.env
|
50
|
+
for repo in $DEPENDENT_REPOS
|
51
|
+
do
|
52
|
+
sh trigger-gh-actions.sh $ORGANISATION $repo $GH_USERNAME $GH_ACCESS_TOKEN $GITHUB_REPOSITORY "{ \"ref\": \"${GITHUB_REF}\" }"
|
53
|
+
done
|
@@ -1,16 +1,28 @@
|
|
1
|
-
# Auto-generated
|
2
|
-
#
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
+
# See https://github.com/metanorma/cimas
|
3
3
|
name: windows
|
4
4
|
|
5
|
-
on:
|
5
|
+
on:
|
6
|
+
push:
|
7
|
+
branches: [ master ]
|
8
|
+
pull_request:
|
9
|
+
paths-ignore:
|
10
|
+
- .github/workflows/macos.yml
|
11
|
+
- .github/workflows/ubuntu.yml
|
6
12
|
|
7
13
|
jobs:
|
8
14
|
test-windows:
|
9
15
|
name: Test on Ruby ${{ matrix.ruby }} Windows
|
10
16
|
runs-on: windows-latest
|
17
|
+
continue-on-error: ${{ matrix.experimental }}
|
11
18
|
strategy:
|
19
|
+
fail-fast: false
|
12
20
|
matrix:
|
13
21
|
ruby: [ '2.6', '2.5', '2.4' ]
|
22
|
+
experimental: [false]
|
23
|
+
include:
|
24
|
+
- ruby: '2.7'
|
25
|
+
experimental: true
|
14
26
|
steps:
|
15
27
|
- uses: actions/checkout@master
|
16
28
|
- name: Use Ruby
|
@@ -21,9 +33,8 @@ jobs:
|
|
21
33
|
- name: Update gems
|
22
34
|
shell: pwsh
|
23
35
|
run: |
|
24
|
-
gem install bundler
|
36
|
+
gem install bundler
|
25
37
|
bundle config --local path vendor/bundle
|
26
|
-
bundle update
|
27
38
|
bundle install --jobs 4 --retry 3
|
28
39
|
- name: Run specs
|
29
40
|
run: |
|
data/README.adoc
CHANGED
@@ -3,8 +3,9 @@
|
|
3
3
|
https://github.com/metanorma/html2doc/workflows/main/badge.svg
|
4
4
|
|
5
5
|
image:https://img.shields.io/gem/v/html2doc.svg["Gem Version", link="https://rubygems.org/gems/html2doc"]
|
6
|
-
image:https://
|
7
|
-
image:https://
|
6
|
+
image:https://github.com/metanorma/html2doc/workflows/ubuntu/badge.svg["Ubuntu Build Status", link="https://github.com/metanorma/html2doc/actions?query=workflow%3Aubuntu"]
|
7
|
+
image:https://github.com/metanorma/html2doc/workflows/macos/badge.svg["OSX Build Status", link="https://github.com/metanorma/html2doc/actions?query=workflow%3Amacos"]
|
8
|
+
image:https://github.com/metanorma/html2doc/workflows/windows/badge.svg["Windows Build Status", link="https://github.com/metanorma/html2doc/actions?query=workflow%3Awindows"]
|
8
9
|
image:https://codeclimate.com/github/metanorma/html2doc/badges/gpa.svg["Code Climate", link="https://codeclimate.com/github/metanorma/html2doc"]
|
9
10
|
image:https://img.shields.io/github/issues-pr-raw/metanorma/html2doc.svg["Pull Requests", link="https://github.com/metanorma/html2doc/pulls"]
|
10
11
|
image:https://img.shields.io/github/commits-since/metanorma/html2doc/latest.svg["Commits since latest",link="https://github.com/metanorma/html2doc/releases"]
|
@@ -23,9 +24,12 @@ This work is driven by the Word document generation procedure documented in http
|
|
23
24
|
|
24
25
|
The gem currently does the following:
|
25
26
|
|
26
|
-
* Convert any AsciiMath and MathML to Word's native mathematical formatting language, OOXML. Word supports copy-pasting MathML into Word and converting it into OOXML; however the conversion is not infallible (we have found problems with `\sum`: Word claims parameters were missing, and inserting dotted squares to indicate as much), and you may need to post-edit the OOXML.
|
27
|
+
* Convert any AsciiMath and MathML to Word's native mathematical formatting language, OOXML. Word supports copy-pasting MathML into Word and converting it into OOXML; however the conversion is not infallible (we have in the past found problems with `\sum`: Word claims parameters were missing, and inserting dotted squares to indicate as much), and you may need to post-edit the OOXML.
|
27
28
|
** The gem does attempt to repair the MathML input, to bring it in line with Word's OOXML's expectations. If you find any issues with AsciiMath or MathML input, please raise an issue.
|
28
|
-
* Identify any footnotes in the document (defined as hyperlinks with attributes `class = "Footnote"` or `epub:type = "footnote"`), and render them as Microsoft Word footnotes.
|
29
|
+
* Identify any footnotes in the document (defined as hyperlinks with attributes `class = "Footnote"` or `epub:type = "footnote"`), and render them as Microsoft Word footnotes.
|
30
|
+
** The corresponding footnote content is any `div` or `aside` element with the same `@id` attribute as the footnote points to; e.g. `<a href="#ftn1" epub:type="footnote"><sup>3</sup></a></span>`, pointing to `<aside id="ftn3">`.
|
31
|
+
** By default, the footnote hyperlink contents are overwritten with the autonumbering element: `<a href="#ftn1" epub:type="footnote"><sup>1</sup></a>` is replaced with `<a style='mso-footnote-id:ftn1' href='#_ftn1' name='_ftnref1' title='' id='_ftnref1'><span class='MsoFootnoteReference'><span style='mso-special-character:footnote'/></span>`
|
32
|
+
** If the footnote hyperlink already contains (as a child) an element marked up as `<span class='MsoFootnoteReference'>`, only that span is replaced by the Microsoft autonumber element; any text surrounding it is preserved in both the footnote reference and the footnote target. For example, `<a href="#ftn1" epub:type="footnote"><span class='MsoFootnoteReference'>1</span>)</a>` will render as the footnote _1)_, both in the link and the target.
|
29
33
|
* Resize any local images in the HTML file to fit within the maximum page size. (Word will otherwise crash on reading the document.)
|
30
34
|
* Optionally apply list styles with predefined bullet and numbering from a Word CSS to the unordered and ordered lists in the document, restarting numbering for each ordered list.
|
31
35
|
* Convert all lists to native Word HTML rendering (using paragraphs with `MsoListParagraphCxSpFirst, MsoListParagraphCxSpMiddle, MsoListParagraphCxSpLast` styles)
|
@@ -113,7 +117,7 @@ The bad news is that Word's understanding of HTML is HTML 4. In order for bookma
|
|
113
117
|
|
114
118
|
The good news with generating a Word document via HTML is that Word understands CSS, and you can determine much of what the Word document looks like by manipulating that CSS. That extends to features that are not part of HTML CSS: if you want to work out how to get Word to do something in CSS, save a Word document that already does what you want as HTML, and inspect the HTML and CSS you get.
|
115
119
|
|
116
|
-
The bad news is that Word's implementation of CSS is poorly documented -- even if Office HTML is documented in a 1300 page document (online at https://stigmortenmyre.no/mso/, https://www.rodriguezcommaj.com/assets/resources/microsoft-office-html-and-xml-reference.pdf), and the CSS selectors are only partially and selectively implemented. For list styles, for example, `mso-level-text` governs how the list label is displayed; but it is only recognised in a `@list` style: it is ignored in a CSS rule like `ol li`, or in a `style` attribute on a node. Working out the right CSS for what you want will take some trial and error, and you are better placed to try to do things Word's way than the right way.
|
120
|
+
The bad news is that Word's implementation of CSS is poorly documented -- even if Office HTML is documented in a 1300 page document (online at https://stigmortenmyre.no/mso/, https://www.rodriguezcommaj.com/assets/resources/microsoft-office-html-and-xml-reference.pdf), and the CSS selectors are only partially and selectively implemented. For list styles, for example, `mso-level-text` governs how the list label is displayed; but it is only recognised in a `@list` style: it is ignored in a CSS rule like `ol li`, or in a `style` attribute on a node. CSS selectors only support classes, in ancestor relations: `p.class1 ol.class2` is supported, but `#id1` is not, and neither is `p > ol`. Working out the right CSS for what you want will take some trial and error, and you are better placed to try to do things Word's way than the right way.
|
117
121
|
|
118
122
|
=== XSLT
|
119
123
|
|
data/html2doc.gemspec
CHANGED
@@ -33,7 +33,6 @@ Gem::Specification.new do |spec|
|
|
33
33
|
spec.add_dependency "uuidtools"
|
34
34
|
spec.add_dependency "asciimath", "~> 1.0.9"
|
35
35
|
|
36
|
-
spec.add_development_dependency "bundler", "~> 2.0.1"
|
37
36
|
spec.add_development_dependency "byebug", "~> 9.1"
|
38
37
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
39
38
|
spec.add_development_dependency "guard", "~> 2.14"
|
data/lib/html2doc/lists.rb
CHANGED
@@ -2,7 +2,7 @@ require "uuidtools"
|
|
2
2
|
require "asciimath"
|
3
3
|
require "htmlentities"
|
4
4
|
require "nokogiri"
|
5
|
-
require "
|
5
|
+
require "uuidtools"
|
6
6
|
|
7
7
|
module Html2Doc
|
8
8
|
def self.style_list(li, level, liststyle, listnumber)
|
@@ -15,13 +15,8 @@ module Html2Doc
|
|
15
15
|
li["style"] += "mso-list:#{liststyle} level#{level} lfo#{listnumber};"
|
16
16
|
end
|
17
17
|
|
18
|
-
def self.
|
19
|
-
|
20
|
-
@listnumber += 1 if level == 1
|
21
|
-
list["seen"] = true if level == 1
|
22
|
-
(list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |li|
|
23
|
-
style_list(li, level, liststyles[listtype], @listnumber)
|
24
|
-
if [:ul, :ol].include? listtype
|
18
|
+
def self.list_add1(li, liststyles, listtype, level)
|
19
|
+
if [:ul, :ol].include? listtype
|
25
20
|
list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
|
26
21
|
liststyles, :ul, level + 1)
|
27
22
|
list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
|
@@ -32,6 +27,20 @@ module Html2Doc
|
|
32
27
|
list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
|
33
28
|
liststyles, listtype, level + 1)
|
34
29
|
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.list_add(xpath, liststyles, listtype, level)
|
33
|
+
xpath.each_with_index do |list, i|
|
34
|
+
@listnumber += 1 if level == 1
|
35
|
+
list["seen"] = true if level == 1
|
36
|
+
list["id"] ||= UUIDTools::UUID.random_create
|
37
|
+
(list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |li|
|
38
|
+
style_list(li, level, liststyles[listtype], @listnumber)
|
39
|
+
list_add1(li, liststyles, listtype, level)
|
40
|
+
end
|
41
|
+
list.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{list['id']}')] | "\
|
42
|
+
".//ol[not(ancestor::li/ancestor::*/@id = '#{list['id']}')]").each do |li|
|
43
|
+
list_add1(li.parent, liststyles, listtype, level-1)
|
35
44
|
end
|
36
45
|
end
|
37
46
|
end
|
data/lib/html2doc/mime.rb
CHANGED
@@ -68,6 +68,7 @@ module Html2Doc
|
|
68
68
|
realSize = ImageSize.path(path).size
|
69
69
|
s = [i["width"].to_i, i["height"].to_i]
|
70
70
|
s = realSize if s[0].zero? && s[1].zero?
|
71
|
+
return [nil, nil] if realSize[0].nil? || realSize[1].nil?
|
71
72
|
s[1] = s[0] * realSize[1] / realSize[0] if s[1].zero? && !s[0].zero?
|
72
73
|
s[0] = s[1] * realSize[0] / realSize[1] if s[0].zero? && !s[1].zero?
|
73
74
|
s = [(s[0] * maxheight / s[1]).ceil, maxheight] if s[1] > maxheight
|
@@ -87,10 +88,9 @@ module Html2Doc
|
|
87
88
|
|
88
89
|
# only processes locally stored images
|
89
90
|
def self.image_cleanup(docxml, dir, localdir)
|
90
|
-
#docxml.xpath(IMAGE_PATH).each do |i|
|
91
91
|
docxml.traverse do |i|
|
92
92
|
next unless i.element? && %w(img v:imagedata).include?(i.name)
|
93
|
-
warnsvg(i["src"])
|
93
|
+
#warnsvg(i["src"])
|
94
94
|
next if /^http/.match i["src"]
|
95
95
|
next if %r{^data:image/[^;]+;base64}.match i["src"]
|
96
96
|
local_filename = %r{^([A-Z]:)?/}.match(i["src"]) ? i["src"] :
|
@@ -115,12 +115,12 @@ module Html2Doc
|
|
115
115
|
if a.size == 2 && !(/ src="https?:/.match a[1]) &&
|
116
116
|
!(%r{ src="data:image/[^;]+;base64}.match a[1])
|
117
117
|
m = / src=['"](?<src>[^"']+)['"]/.match a[1]
|
118
|
-
warnsvg(m[:src])
|
119
|
-
m2 = /\.(?<suffix
|
120
|
-
new_filename = "
|
118
|
+
#warnsvg(m[:src])
|
119
|
+
m2 = /\.(?<suffix>[a-zA-Z_0-9]+)$/.match m[:src]
|
120
|
+
new_filename = "#{mkuuid}.#{m2[:suffix]}"
|
121
121
|
old_filename = %r{^([A-Z]:)?/}.match(m[:src]) ? m[:src] : File.join(localdir, m[:src])
|
122
|
-
FileUtils.cp old_filename, File.join(dir,
|
123
|
-
a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='
|
122
|
+
FileUtils.cp old_filename, File.join(dir, new_filename)
|
123
|
+
a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='file:///C:/Doc/#{filename}_files/#{new_filename}'")
|
124
124
|
end
|
125
125
|
a.join
|
126
126
|
end
|
data/lib/html2doc/notes.rb
CHANGED
@@ -15,7 +15,7 @@ module Html2Doc
|
|
15
15
|
body = docxml.at("//body")
|
16
16
|
list = body.add_child("<div style='mso-element:footnote-list'/>")
|
17
17
|
footnotes.each_with_index do |f, i|
|
18
|
-
fn = list.first.add_child(footnote_container(i + 1))
|
18
|
+
fn = list.first.add_child(footnote_container(docxml, i + 1))
|
19
19
|
f.parent = fn.first
|
20
20
|
footnote_div_to_p(f)
|
21
21
|
end
|
@@ -33,13 +33,16 @@ module Html2Doc
|
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
|
-
|
36
|
+
FN = "<span class='MsoFootnoteReference'>"\
|
37
|
+
"<span style='mso-special-character:footnote'/></span>".freeze
|
38
|
+
|
39
|
+
def self.footnote_container(docxml, i)
|
40
|
+
ref = docxml&.at("//a[@href='#_ftn#{i}']")&.children&.to_xml(indent: 0).
|
41
|
+
gsub(/>\n</, "><") || FN
|
37
42
|
<<~DIV
|
38
43
|
<div style='mso-element:footnote' id='ftn#{i}'>
|
39
44
|
<a style='mso-footnote-id:ftn#{i}' href='#_ftn#{i}'
|
40
|
-
name='_ftnref#{i}' title='' id='_ftnref#{i}'
|
41
|
-
class='MsoFootnoteReference'><span
|
42
|
-
style='mso-special-character:footnote'></span></span></div>
|
45
|
+
name='_ftnref#{i}' title='' id='_ftnref#{i}'>#{ref.strip}</a></div>
|
43
46
|
DIV
|
44
47
|
end
|
45
48
|
|
@@ -49,8 +52,17 @@ module Html2Doc
|
|
49
52
|
note = docxml.at("//*[@name = '#{href}' or @id = '#{href}']")
|
50
53
|
return false if note.nil?
|
51
54
|
set_footnote_link_attrs(a, i)
|
52
|
-
a.
|
53
|
-
|
55
|
+
if a.at("./span[@class = 'MsoFootnoteReference']")
|
56
|
+
a.children.each do |c|
|
57
|
+
if c.name == "span" and c["class"] == "MsoFootnoteReference"
|
58
|
+
c.replace(FN)
|
59
|
+
else
|
60
|
+
c.wrap("<span class='MsoFootnoteReference'></span>")
|
61
|
+
end
|
62
|
+
end
|
63
|
+
else
|
64
|
+
a.children = FN
|
65
|
+
end
|
54
66
|
fn << transform_footnote_text(note)
|
55
67
|
end
|
56
68
|
|
data/lib/html2doc/version.rb
CHANGED
data/spec/html2doc_spec.rb
CHANGED
@@ -348,7 +348,7 @@ RSpec.describe Html2Doc do
|
|
348
348
|
File.open("spec/header_img1.html", "w:UTF-8") do |f|
|
349
349
|
f.write doc.sub(%r{spec/19160-6.png}, File.expand_path(File.join(File.dirname(__FILE__), "19160-6.png")))
|
350
350
|
end
|
351
|
-
Html2Doc.process(html_input(""), filename: "test", header_file: "spec/
|
351
|
+
Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img1.html")
|
352
352
|
doc = guid_clean(File.read("test.doc", encoding: "utf-8"))
|
353
353
|
expect(doc).to match(%r{Content-Type: image/png})
|
354
354
|
expect(doc).to match(%r{file:///C:/Doc/test_files/[^.]+\.png})
|
@@ -566,10 +566,12 @@ RSpec.describe Html2Doc do
|
|
566
566
|
OUTPUT
|
567
567
|
end
|
568
568
|
|
569
|
+
=begin
|
569
570
|
it "warns about SVG" do
|
570
571
|
simple_body = '<img src="https://example.com/19160-6.svg">'
|
571
572
|
expect{ Html2Doc.process(html_input(simple_body), filename: "test") }.to output("https://example.com/19160-6.svg: SVG not supported\n").to_stderr
|
572
573
|
end
|
574
|
+
=end
|
573
575
|
|
574
576
|
it "processes epub:type footnotes" do
|
575
577
|
simple_body = '<div>This is a very simple
|
@@ -611,6 +613,26 @@ RSpec.describe Html2Doc do
|
|
611
613
|
OUTPUT
|
612
614
|
end
|
613
615
|
|
616
|
+
it "processes footnotes with text wrapping the footnote reference" do
|
617
|
+
simple_body = '<div>This is a very simple
|
618
|
+
document<a class="footnote" href="#a1">(<span class="MsoFootnoteReference">1</span>)</a> allegedly<a class="footnote" href="#a2">2</a></div>
|
619
|
+
<aside id="a1">Footnote</aside>
|
620
|
+
<aside id="a2">Other Footnote</aside>'
|
621
|
+
Html2Doc.process(html_input(simple_body), filename: "test")
|
622
|
+
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
|
623
|
+
to match_fuzzy(<<~OUTPUT)
|
624
|
+
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
625
|
+
#{word_body('<div>This is a very simple
|
626
|
+
document<a class="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference">(</span><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span><span class="MsoFootnoteReference">)</span></a> allegedly<a class="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
|
627
|
+
'<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
|
628
|
+
<p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference">(</span><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span><span class="MsoFootnoteReference">)</span></a>Footnote</p></div>
|
629
|
+
<div style="mso-element:footnote" id="ftn2">
|
630
|
+
<p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
|
631
|
+
</div>')}
|
632
|
+
#{WORD_FTR1}
|
633
|
+
OUTPUT
|
634
|
+
end
|
635
|
+
|
614
636
|
it "extracts paragraphs from footnotes" do
|
615
637
|
simple_body = '<div>This is a very simple
|
616
638
|
document<a class="footnote" href="#a1">1</a> allegedly<a class="footnote" href="#a2">2</a></div>
|
@@ -633,15 +655,16 @@ RSpec.describe Html2Doc do
|
|
633
655
|
|
634
656
|
it "labels lists with list styles" do
|
635
657
|
simple_body = <<~BODY
|
636
|
-
<div><ul>
|
637
|
-
<li><div><p><ol><li><ul><li><p><ol><li><ol><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
|
658
|
+
<div><ul id="0">
|
659
|
+
<li><div><p><ol id="1"><li><ul id="2"><li><p><ol id="3"><li><ol id="4"><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li><div><ul id="5"><li>C</li></ul></div>
|
638
660
|
BODY
|
639
661
|
Html2Doc.process(html_input(simple_body), filename: "test", liststyles: {ul: "l1", ol: "l2"})
|
640
662
|
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
|
641
663
|
to match_fuzzy(<<~OUTPUT)
|
642
664
|
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
643
665
|
#{word_body('<div>
|
644
|
-
<p style="mso-list:l1 level1 lfo1;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p></div>
|
666
|
+
<p style="mso-list:l1 level1 lfo1;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p><div><p style="mso-list:l1 level1 lfo2;" class="MsoListParagraphCxSpFirst">C</p></div>
|
667
|
+
</div>',
|
645
668
|
'<div style="mso-element:footnote-list"/>')}
|
646
669
|
#{WORD_FTR1}
|
647
670
|
OUTPUT
|
@@ -651,8 +674,8 @@ RSpec.describe Html2Doc do
|
|
651
674
|
it "restarts numbering of lists with list styles" do
|
652
675
|
simple_body = <<~BODY
|
653
676
|
<div>
|
654
|
-
<ol><li><div><p><ol><li><ul><li><p><ol><li><ol><li>A</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ol>
|
655
|
-
<ol><li><div><p><ol><li><ul><li><p><ol><li><ol><li>A</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ol></div>
|
677
|
+
<ol id="1"><li><div><p><ol id="2"><li><ul id="3"><li><p><ol id="4"><li><ol id="5"><li>A</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ol>
|
678
|
+
<ol id="6"><li><div><p><ol id="7"><li><ul id="8"><li><p><ol id="9"><li><ol id="10"><li>A</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ol></div>
|
656
679
|
BODY
|
657
680
|
Html2Doc.process(html_input(simple_body), filename: "test", liststyles: {ul: "l1", ol: "l2"})
|
658
681
|
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
|
@@ -668,12 +691,12 @@ RSpec.describe Html2Doc do
|
|
668
691
|
|
669
692
|
it "labels lists with multiple list styles" do
|
670
693
|
simple_body = <<~BODY
|
671
|
-
<div><ul class="steps">
|
672
|
-
<li><div><p><ol><li><ul><li><p><ol><li><ol><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
|
673
|
-
<div><ul>
|
674
|
-
<li><div><p><ol><li><ul><li><p><ol><li><ol><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
|
675
|
-
<div><ul class="other">
|
676
|
-
<li><div><p><ol><li><ul><li><p><ol><li><ol><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
|
694
|
+
<div><ul class="steps" id="0">
|
695
|
+
<li><div><p><ol id="1"><li><ul id="2"><li><p><ol id="3"><li><ol id="4"><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
|
696
|
+
<div><ul id="5">
|
697
|
+
<li><div><p><ol id="6"><li><ul id="7"><li><p><ol id="8"><li><ol id="9"><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
|
698
|
+
<div><ul class="other" id="10">
|
699
|
+
<li><div><p><ol id="11"><li><ul id="12"><li><p><ol id="13"><li><ol id="14"><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
|
677
700
|
BODY
|
678
701
|
Html2Doc.process(html_input(simple_body), filename: "test", liststyles: {ul: "l1", ol: "l2", steps: "l3"})
|
679
702
|
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -108,20 +108,6 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 1.0.9
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: bundler
|
113
|
-
requirement: !ruby/object:Gem::Requirement
|
114
|
-
requirements:
|
115
|
-
- - "~>"
|
116
|
-
- !ruby/object:Gem::Version
|
117
|
-
version: 2.0.1
|
118
|
-
type: :development
|
119
|
-
prerelease: false
|
120
|
-
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
requirements:
|
122
|
-
- - "~>"
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
version: 2.0.1
|
125
111
|
- !ruby/object:Gem::Dependency
|
126
112
|
name: byebug
|
127
113
|
requirement: !ruby/object:Gem::Requirement
|