html2doc 1.1.0 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f1d55301cebf98f75ddfd8966237d19c39bae1c9c8b4b01a39f124ca5da8e588
4
- data.tar.gz: a84cd4d87e2cf1c9f107300c883b6b6b85496e2067e76d7f0a282feecf6550c1
3
+ metadata.gz: b9ed3f5d01d7910a104f86dfe54090ffc3ddf56730f5885293801b3848b24735
4
+ data.tar.gz: 98428b2016bba38f17cb66226e2fb8d96a28c6ad28bd47a3bc0b998ea1c81228
5
5
  SHA512:
6
- metadata.gz: 2ef21ec975f624420db8ee706ab1997ab7941bb7e9f75bfaecdde056346a08f73842cd151ad6f5c7a9711a0616ccc61177bd24f17693f8c31c6db70e7aa78088
7
- data.tar.gz: c204dfcb3f27a24f86908195d47287c74daaaa41d02de5f28fbb99167b17895f7b46e27971ae533c18874f09ed2d32d4205c6dcd9c6ee6b9e045a9dc06656bfa
6
+ metadata.gz: ede857348aa47a2f09df5c0c1929056251729b358815130ed6c7823f14e9a49cbb1439d43eb45104cb6be2104f47b4dda15b156680dfefd687c4d6439e162c89
7
+ data.tar.gz: 4027da3d313f7efb834efc96666d6aedfa509d3b2fc7335b367259833a0050e29b13da92e40514b2afee76b9f84420b81951d1fb9d577643a077643823dcf23c
@@ -16,19 +16,9 @@ jobs:
16
16
  strategy:
17
17
  fail-fast: false
18
18
  matrix:
19
- ruby: [ '2.7', '2.6', '2.5', '2.4' ]
19
+ ruby: [ '3.0', '2.7', '2.6', '2.5' ]
20
20
  os: [ ubuntu-latest, windows-latest, macos-latest ]
21
21
  experimental: [ false ]
22
- include:
23
- - ruby: '3.0'
24
- os: 'ubuntu-latest'
25
- experimental: true
26
- - ruby: '3.0'
27
- os: 'windows-latest'
28
- experimental: true
29
- - ruby: '3.0'
30
- os: 'macos-latest'
31
- experimental: true
32
22
  steps:
33
23
  - uses: actions/checkout@v2
34
24
  with:
data/.hound.yml CHANGED
@@ -1,3 +1,5 @@
1
+ # Auto-generated by Cimas: Do not edit it manually!
2
+ # See https://github.com/metanorma/cimas
1
3
  ruby:
2
- Enabled: true
4
+ enabled: true
3
5
  config_file: .rubocop.yml
data/.rubocop.yml CHANGED
@@ -1,14 +1,10 @@
1
- # This project follows the Ribose OSS style guide.
2
- # https://github.com/riboseinc/oss-guides
3
- # All project-specific additions and overrides should be specified in this file.
1
+ # Auto-generated by Cimas: Do not edit it manually!
2
+ # See https://github.com/metanorma/cimas
4
3
  inherit_from:
5
4
  - https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
6
5
 
7
6
  # local repo-specific modifications
7
+ # ...
8
8
 
9
9
  AllCops:
10
- DisplayCopNames: false
11
- StyleGuideCopsOnly: false
12
- TargetRubyVersion: 2.4
13
- Rails:
14
- Enabled: true
10
+ TargetRubyVersion: 2.5
data/Gemfile CHANGED
@@ -10,6 +10,6 @@ end
10
10
 
11
11
  gemspec
12
12
 
13
- if File.exist? 'Gemfile.devel'
14
- eval File.read('Gemfile.devel'), nil, 'Gemfile.devel' # rubocop:disable Security/Eval
13
+ if File.exist? "Gemfile.devel"
14
+ eval File.read("Gemfile.devel"), nil, "Gemfile.devel" # rubocop:disable Security/Eval
15
15
  end
data/Rakefile CHANGED
@@ -3,4 +3,4 @@ require "rspec/core/rake_task"
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task default: :spec
data/bin/html2doc CHANGED
@@ -21,9 +21,8 @@ if ARGV.length < 1
21
21
  end
22
22
 
23
23
  Html2Doc.process(
24
- File.read(ARGV[0], encoding: "utf-8"),
24
+ File.read(ARGV[0], encoding: "utf-8"),
25
25
  filename: ARGV[0].gsub(/\.html?$/, ""),
26
26
  stylesheet: options[:stylesheet],
27
27
  header: options[:header],
28
28
  )
29
-
data/bin/rspec CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
-
2
+
3
3
  # This file was generated by Bundler.
4
4
  #
5
5
  # The application 'rspec' is installed as part of a gem, and
data/html2doc.gemspec CHANGED
@@ -1,5 +1,4 @@
1
-
2
- lib = File.expand_path("../lib", __FILE__)
1
+ lib = File.expand_path("lib", __dir__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require "html2doc/version"
5
4
 
@@ -16,23 +15,23 @@ Gem::Specification.new do |spec|
16
15
  This gem is in active development.
17
16
  DESCRIPTION
18
17
 
19
- spec.homepage = "https://github.com/metanorma/html2doc"
20
- spec.licenses = ["CC-BY-SA-3.0", "BSD-2-Clause"]
18
+ spec.homepage = "https://github.com/metanorma/html2doc"
19
+ spec.licenses = ["CC-BY-SA-3.0", "BSD-2-Clause"]
21
20
 
22
21
  spec.bindir = "bin"
23
22
  spec.require_paths = ["lib"]
24
23
  spec.files = `git ls-files`.split("\n")
25
24
  spec.test_files = `git ls-files -- {spec}/*`.split("\n")
26
- spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
25
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
27
26
 
27
+ spec.add_dependency "asciimath", "~> 2.0.2"
28
28
  spec.add_dependency "htmlentities", "~> 4.3.4"
29
29
  spec.add_dependency "image_size"
30
30
  spec.add_dependency "mime-types"
31
- spec.add_dependency "nokogiri", "~> 1.10.4"
31
+ spec.add_dependency "nokogiri", "~> 1.12"
32
+ spec.add_dependency "plane1converter", "~> 0.0.1"
32
33
  spec.add_dependency "thread_safe"
33
34
  spec.add_dependency "uuidtools"
34
- spec.add_dependency "asciimath", "~> 2.0.2"
35
- spec.add_dependency "plane1converter", "~> 0.0.1"
36
35
 
37
36
  spec.add_development_dependency "byebug", "~> 9.1"
38
37
  spec.add_development_dependency "equivalent-xml", "~> 0.6"
@@ -40,8 +39,8 @@ Gem::Specification.new do |spec|
40
39
  spec.add_development_dependency "guard-rspec", "~> 4.7"
41
40
  spec.add_development_dependency "rake", "~> 12.0"
42
41
  spec.add_development_dependency "rspec", "~> 3.6"
42
+ spec.add_development_dependency "rspec-match_fuzzy", "~> 0.1.3"
43
43
  spec.add_development_dependency "rubocop", "~> 1.5.2"
44
44
  spec.add_development_dependency "simplecov", "~> 0.15"
45
45
  spec.add_development_dependency "timecop", "~> 0.9"
46
- spec.add_development_dependency "rspec-match_fuzzy", "~> 0.1.3"
47
46
  end
data/lib/html2doc/base.rb CHANGED
@@ -2,8 +2,6 @@ require "uuidtools"
2
2
  require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
- #require "xml/xslt"
6
- require "pp"
7
5
  require "fileutils"
8
6
 
9
7
  module Html2Doc
@@ -19,15 +17,17 @@ module Html2Doc
19
17
 
20
18
  def self.process_header(headerfile, hash)
21
19
  return if headerfile.nil?
20
+
22
21
  doc = File.read(headerfile, encoding: "utf-8")
23
- doc = header_image_cleanup(doc, hash[:dir1], hash[:filename], File.dirname(hash[:filename]))
22
+ doc = header_image_cleanup(doc, hash[:dir1], hash[:filename],
23
+ File.dirname(hash[:filename]))
24
24
  File.open("#{hash[:dir1]}/header.html", "w:UTF-8") { |f| f.write(doc) }
25
25
  end
26
26
 
27
27
  def self.clear_dir(dir)
28
28
  Dir.foreach(dir) do |f|
29
29
  fn = File.join(dir, f)
30
- File.delete(fn) if f != '.' && f != '..'
30
+ File.delete(fn) if f != "." && f != ".."
31
31
  end
32
32
  dir
33
33
  end
@@ -72,7 +72,7 @@ module Html2Doc
72
72
 
73
73
  def self.to_xhtml(xml)
74
74
  xml.gsub!(/<\?xml[^>]*>/, "")
75
- unless /<!DOCTYPE /.match xml
75
+ unless /<!DOCTYPE /.match? xml
76
76
  xml = '<!DOCTYPE html SYSTEM
77
77
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
78
78
  end
@@ -84,34 +84,35 @@ module Html2Doc
84
84
  DOCTYPE
85
85
 
86
86
  def self.from_xhtml(xml)
87
- xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "").
88
- sub(DOCTYPE, "").
89
- gsub(%{ />}, "/>")
87
+ xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
88
+ .sub(DOCTYPE, "")
89
+ .gsub(%{ />}, "/>")
90
90
  end
91
91
 
92
- def self.msword_fix(r)
92
+ def self.msword_fix(doc)
93
93
  # brain damage in MSWord parser
94
- r.gsub!(%r{<span style="mso-special-character:footnote"/>},
95
- '<span style="mso-special-character:footnote"></span>')
96
- r.gsub!(%r{<div style="mso-element:footnote-list"></div>},
97
- '<div style="mso-element:footnote-list"/>')
98
- r.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>")
99
- r.gsub!(%r{<link rel="File-List"}, "<link rel=File-List")
100
- r.gsub!(%r{<meta http-equiv="Content-Type"},
101
- "<meta http-equiv=Content-Type")
102
- r.gsub!(%r{></m:jc>}, "/>")
103
- r.gsub!(%r{></v:stroke>}, "/>")
104
- r.gsub!(%r{></v:f>}, "/>")
105
- r.gsub!(%r{></v:path>}, "/>")
106
- r.gsub!(%r{></o:lock>}, "/>")
107
- r.gsub!(%r{></v:imagedata>}, "/>")
108
- r.gsub!(%r{></w:wrap>}, "/>")
109
- r.gsub!(%r{&tab;|&amp;tab;}, '<span style="mso-tab-count:1">&#xA0; </span>')
110
- r = r.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
94
+ doc.gsub!(%r{<span style="mso-special-character:footnote"/>},
95
+ '<span style="mso-special-character:footnote"></span>')
96
+ doc.gsub!(%r{<div style="mso-element:footnote-list"></div>},
97
+ '<div style="mso-element:footnote-list"/>')
98
+ doc.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>")
99
+ doc.gsub!(%r{<link rel="File-List"}, "<link rel=File-List")
100
+ doc.gsub!(%r{<meta http-equiv="Content-Type"},
101
+ "<meta http-equiv=Content-Type")
102
+ doc.gsub!(%r{></m:jc>}, "/>")
103
+ doc.gsub!(%r{></v:stroke>}, "/>")
104
+ doc.gsub!(%r{></v:f>}, "/>")
105
+ doc.gsub!(%r{></v:path>}, "/>")
106
+ doc.gsub!(%r{></o:lock>}, "/>")
107
+ doc.gsub!(%r{></v:imagedata>}, "/>")
108
+ doc.gsub!(%r{></w:wrap>}, "/>")
109
+ doc.gsub!(%r{<(/)?m:(span|em)\b}, "<\\1\\2")
110
+ doc.gsub!(%r{&tab;|&amp;tab;},
111
+ '<span style="mso-tab-count:1">&#xA0; </span>')
112
+ doc.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
111
113
  a.size > 2 and a[2] = a[2].gsub(/>\s+</, "><")
112
114
  a
113
115
  end.join
114
- r
115
116
  end
116
117
 
117
118
  PRINT_VIEW = <<~XML.freeze
@@ -127,30 +128,30 @@ module Html2Doc
127
128
  <meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
128
129
  XML
129
130
 
130
- def self.define_head1(docxml, dir)
131
+ def self.define_head1(docxml, _dir)
131
132
  docxml.xpath("//*[local-name() = 'head']").each do |h|
132
133
  h.children.first.add_previous_sibling <<~XML
133
- #{PRINT_VIEW}
134
- <link rel="File-List" href="cid:filelist.xml"/>
134
+ #{PRINT_VIEW}
135
+ <link rel="File-List" href="cid:filelist.xml"/>
135
136
  XML
136
137
  end
137
138
  end
138
139
 
139
- def self.filename_substitute(stylesheet, header_filename, filename)
140
- if header_filename.nil?
141
- stylesheet
142
- else
143
- stylesheet.gsub(/url\("[^"]+"\)/) do |m|
144
- /FILENAME/.match(m) ? "url(cid:header.html)" : m
140
+ def self.filename_substitute(head, header_filename)
141
+ return if header_filename.nil?
142
+
143
+ head.xpath(".//*[local-name() = 'style']").each do |s|
144
+ s1 = s.to_xml.gsub(/url\("[^"]+"\)/) do |m|
145
+ /FILENAME/.match?(m) ? "url(cid:header.html)" : m
145
146
  end
147
+ s.replace(s1)
146
148
  end
147
149
  end
148
150
 
149
- def self.stylesheet(filename, header_filename, fn)
150
- (fn.nil? || fn.empty?) &&
151
+ def self.stylesheet(_filename, _header_filename, fn)
152
+ (fn.nil? || fn.empty?) and
151
153
  fn = File.join(File.dirname(__FILE__), "wordstyle.css")
152
154
  stylesheet = File.read(fn, encoding: "UTF-8")
153
- stylesheet = filename_substitute(stylesheet, header_filename, filename)
154
155
  xml = Nokogiri::XML("<style/>")
155
156
  xml.children.first << Nokogiri::XML::Comment.new(xml, "\n#{stylesheet}\n")
156
157
  xml.root.to_s
@@ -161,6 +162,7 @@ module Html2Doc
161
162
  head = docxml.at("//*[local-name() = 'head']")
162
163
  css = stylesheet(hash[:filename], hash[:header_file], hash[:stylesheet])
163
164
  add_stylesheet(head, title, css)
165
+ filename_substitute(head, hash[:header_file])
164
166
  define_head1(docxml, hash[:dir1])
165
167
  rootnamespace(docxml.root)
166
168
  end
@@ -189,13 +191,13 @@ module Html2Doc
189
191
  end
190
192
 
191
193
  def self.bookmarks(docxml)
192
- docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]").each do |x|
193
- next if x["id"].empty?
194
- next if %w(shapetype v:shapetype shape v:shape).include? x.name
195
- if x.children.empty?
196
- x.add_child("<a name='#{x["id"]}'></a>")
197
- else
198
- x.children.first.previous = "<a name='#{x["id"]}'></a>"
194
+ docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]")
195
+ .each do |x|
196
+ next if x["id"].empty? ||
197
+ %w(shapetype v:shapetype shape v:shape).include?(x.name)
198
+
199
+ if x.children.empty? then x.add_child("<a name='#{x['id']}'></a>")
200
+ else x.children.first.previous = "<a name='#{x['id']}'></a>"
199
201
  end
200
202
  x.delete("id")
201
203
  end
@@ -2,83 +2,87 @@ require "uuidtools"
2
2
  require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
- require "uuidtools"
6
5
 
7
6
  module Html2Doc
8
- def self.style_list(li, level, liststyle, listnumber)
7
+ def self.style_list(elem, level, liststyle, listnumber)
9
8
  return unless liststyle
10
- if li["style"]
11
- li["style"] += ";"
9
+
10
+ if elem["style"]
11
+ elem["style"] += ";"
12
12
  else
13
- li["style"] = ""
13
+ elem["style"] = ""
14
14
  end
15
- li["style"] += "mso-list:#{liststyle} level#{level} lfo#{listnumber};"
15
+ elem["style"] += "mso-list:#{liststyle} level#{level} lfo#{listnumber};"
16
16
  end
17
17
 
18
- def self.list_add1(li, liststyles, listtype, level)
19
- if [:ul, :ol].include? listtype
20
- list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
21
- liststyles, :ul, level + 1)
22
- list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
23
- liststyles, :ol, level + 1)
24
- else
25
- list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
26
- liststyles, listtype, level + 1)
27
- list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
28
- liststyles, listtype, level + 1)
29
- end
18
+ def self.list_add1(elem, liststyles, listtype, level)
19
+ if %i[ul ol].include? listtype
20
+ list_add(elem.xpath(".//ul") - elem.xpath(".//ul//ul | .//ol//ul"),
21
+ liststyles, :ul, level + 1)
22
+ list_add(elem.xpath(".//ol") - elem.xpath(".//ul//ol | .//ol//ol"),
23
+ liststyles, :ol, level + 1)
24
+ else
25
+ list_add(elem.xpath(".//ul") - elem.xpath(".//ul//ul | .//ol//ul"),
26
+ liststyles, listtype, level + 1)
27
+ list_add(elem.xpath(".//ol") - elem.xpath(".//ul//ol | .//ol//ol"),
28
+ liststyles, listtype, level + 1)
29
+ end
30
30
  end
31
31
 
32
32
  def self.list_add(xpath, liststyles, listtype, level)
33
- xpath.each_with_index do |list, i|
33
+ xpath.each_with_index do |l, _i|
34
34
  @listnumber += 1 if level == 1
35
- list["seen"] = true if level == 1
36
- list["id"] ||= UUIDTools::UUID.random_create
37
- (list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |li|
35
+ l["seen"] = true if level == 1
36
+ l["id"] ||= UUIDTools::UUID.random_create
37
+ (l.xpath(".//li") - l.xpath(".//ol//li | .//ul//li")).each do |li|
38
38
  style_list(li, level, liststyles[listtype], @listnumber)
39
39
  list_add1(li, liststyles, listtype, level)
40
40
  end
41
- list.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{list['id']}')] | "\
42
- ".//ol[not(ancestor::li/ancestor::*/@id = '#{list['id']}')]").each do |li|
43
- list_add1(li.parent, liststyles, listtype, level-1)
41
+ l.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{l['id']}')] | "\
42
+ ".//ol[not(ancestor::li/ancestor::*/@id = '#{l['id']}')]")
43
+ .each do |li|
44
+ list_add1(li.parent, liststyles, listtype, level - 1)
44
45
  end
45
46
  end
46
47
  end
47
48
 
48
- def self.list2para(u)
49
- return if u.xpath("./li").empty?
50
- u.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
51
- u.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
52
- u.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
53
- u.xpath("./li").each do |l|
49
+ def self.list2para(list)
50
+ return if list.xpath("./li").empty?
51
+
52
+ list.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
53
+ list.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
54
+ list.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
55
+ list.xpath("./li").each do |l|
54
56
  l.name = "p"
55
57
  l["class"] ||= "MsoListParagraphCxSpMiddle"
56
58
  l&.first_element_child&.name == "p" and
57
59
  l.first_element_child.replace(l.first_element_child.children)
58
60
  end
59
- u.replace(u.children)
61
+ list.replace(list.children)
60
62
  end
61
63
 
62
64
  TOPLIST = "[not(ancestor::ul) and not(ancestor::ol)]".freeze
63
65
 
64
- def self.lists1(docxml, liststyles, k)
65
- case k
66
+ def self.lists1(docxml, liststyles, style)
67
+ case style
66
68
  when :ul then list_add(docxml.xpath("//ul[not(@class)]#{TOPLIST}"),
67
- liststyles, :ul, 1)
69
+ liststyles, :ul, 1)
68
70
  when :ol then list_add(docxml.xpath("//ol[not(@class)]#{TOPLIST}"),
69
71
  liststyles, :ol, 1)
70
72
  else
71
- list_add(docxml.xpath("//ol[@class = '#{k.to_s}']#{TOPLIST} | "\
72
- "//ul[@class = '#{k.to_s}']#{TOPLIST}"),
73
- liststyles, k, 1)
73
+ list_add(docxml.xpath("//ol[@class = '#{style}']#{TOPLIST} | "\
74
+ "//ul[@class = '#{style}']#{TOPLIST}"),
75
+ liststyles, style, 1)
74
76
  end
75
77
  end
76
78
 
77
79
  def self.lists_unstyled(docxml, liststyles)
78
- list_add(docxml.xpath("//ul#{TOPLIST}[not(@seen)]"),
79
- liststyles, :ul, 1) if liststyles.has_key?(:ul)
80
- list_add(docxml.xpath("//ol#{TOPLIST}[not(@seen)]"),
81
- liststyles, :ul, 1) if liststyles.has_key?(:ol)
80
+ liststyles.has_key?(:ul) and
81
+ list_add(docxml.xpath("//ul#{TOPLIST}[not(@seen)]"),
82
+ liststyles, :ul, 1)
83
+ liststyles.has_key?(:ol) and
84
+ list_add(docxml.xpath("//ol#{TOPLIST}[not(@seen)]"),
85
+ liststyles, :ul, 1)
82
86
  docxml.xpath("//ul[@seen] | //ol[@seen]").each do |l|
83
87
  l.delete("seen")
84
88
  end
@@ -86,6 +90,7 @@ module Html2Doc
86
90
 
87
91
  def self.lists(docxml, liststyles)
88
92
  return if liststyles.nil?
93
+
89
94
  @listnumber = 0
90
95
  liststyles.each_key { |k| lists1(docxml, liststyles, k) }
91
96
  lists_unstyled(docxml, liststyles)