html2doc 1.1.0 → 1.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f1d55301cebf98f75ddfd8966237d19c39bae1c9c8b4b01a39f124ca5da8e588
4
- data.tar.gz: a84cd4d87e2cf1c9f107300c883b6b6b85496e2067e76d7f0a282feecf6550c1
3
+ metadata.gz: b9ed3f5d01d7910a104f86dfe54090ffc3ddf56730f5885293801b3848b24735
4
+ data.tar.gz: 98428b2016bba38f17cb66226e2fb8d96a28c6ad28bd47a3bc0b998ea1c81228
5
5
  SHA512:
6
- metadata.gz: 2ef21ec975f624420db8ee706ab1997ab7941bb7e9f75bfaecdde056346a08f73842cd151ad6f5c7a9711a0616ccc61177bd24f17693f8c31c6db70e7aa78088
7
- data.tar.gz: c204dfcb3f27a24f86908195d47287c74daaaa41d02de5f28fbb99167b17895f7b46e27971ae533c18874f09ed2d32d4205c6dcd9c6ee6b9e045a9dc06656bfa
6
+ metadata.gz: ede857348aa47a2f09df5c0c1929056251729b358815130ed6c7823f14e9a49cbb1439d43eb45104cb6be2104f47b4dda15b156680dfefd687c4d6439e162c89
7
+ data.tar.gz: 4027da3d313f7efb834efc96666d6aedfa509d3b2fc7335b367259833a0050e29b13da92e40514b2afee76b9f84420b81951d1fb9d577643a077643823dcf23c
@@ -16,19 +16,9 @@ jobs:
16
16
  strategy:
17
17
  fail-fast: false
18
18
  matrix:
19
- ruby: [ '2.7', '2.6', '2.5', '2.4' ]
19
+ ruby: [ '3.0', '2.7', '2.6', '2.5' ]
20
20
  os: [ ubuntu-latest, windows-latest, macos-latest ]
21
21
  experimental: [ false ]
22
- include:
23
- - ruby: '3.0'
24
- os: 'ubuntu-latest'
25
- experimental: true
26
- - ruby: '3.0'
27
- os: 'windows-latest'
28
- experimental: true
29
- - ruby: '3.0'
30
- os: 'macos-latest'
31
- experimental: true
32
22
  steps:
33
23
  - uses: actions/checkout@v2
34
24
  with:
data/.hound.yml CHANGED
@@ -1,3 +1,5 @@
1
+ # Auto-generated by Cimas: Do not edit it manually!
2
+ # See https://github.com/metanorma/cimas
1
3
  ruby:
2
- Enabled: true
4
+ enabled: true
3
5
  config_file: .rubocop.yml
data/.rubocop.yml CHANGED
@@ -1,14 +1,10 @@
1
- # This project follows the Ribose OSS style guide.
2
- # https://github.com/riboseinc/oss-guides
3
- # All project-specific additions and overrides should be specified in this file.
1
+ # Auto-generated by Cimas: Do not edit it manually!
2
+ # See https://github.com/metanorma/cimas
4
3
  inherit_from:
5
4
  - https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
6
5
 
7
6
  # local repo-specific modifications
7
+ # ...
8
8
 
9
9
  AllCops:
10
- DisplayCopNames: false
11
- StyleGuideCopsOnly: false
12
- TargetRubyVersion: 2.4
13
- Rails:
14
- Enabled: true
10
+ TargetRubyVersion: 2.5
data/Gemfile CHANGED
@@ -10,6 +10,6 @@ end
10
10
 
11
11
  gemspec
12
12
 
13
- if File.exist? 'Gemfile.devel'
14
- eval File.read('Gemfile.devel'), nil, 'Gemfile.devel' # rubocop:disable Security/Eval
13
+ if File.exist? "Gemfile.devel"
14
+ eval File.read("Gemfile.devel"), nil, "Gemfile.devel" # rubocop:disable Security/Eval
15
15
  end
data/Rakefile CHANGED
@@ -3,4 +3,4 @@ require "rspec/core/rake_task"
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task default: :spec
data/bin/html2doc CHANGED
@@ -21,9 +21,8 @@ if ARGV.length < 1
21
21
  end
22
22
 
23
23
  Html2Doc.process(
24
- File.read(ARGV[0], encoding: "utf-8"),
24
+ File.read(ARGV[0], encoding: "utf-8"),
25
25
  filename: ARGV[0].gsub(/\.html?$/, ""),
26
26
  stylesheet: options[:stylesheet],
27
27
  header: options[:header],
28
28
  )
29
-
data/bin/rspec CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
-
2
+
3
3
  # This file was generated by Bundler.
4
4
  #
5
5
  # The application 'rspec' is installed as part of a gem, and
data/html2doc.gemspec CHANGED
@@ -1,5 +1,4 @@
1
-
2
- lib = File.expand_path("../lib", __FILE__)
1
+ lib = File.expand_path("lib", __dir__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require "html2doc/version"
5
4
 
@@ -16,23 +15,23 @@ Gem::Specification.new do |spec|
16
15
  This gem is in active development.
17
16
  DESCRIPTION
18
17
 
19
- spec.homepage = "https://github.com/metanorma/html2doc"
20
- spec.licenses = ["CC-BY-SA-3.0", "BSD-2-Clause"]
18
+ spec.homepage = "https://github.com/metanorma/html2doc"
19
+ spec.licenses = ["CC-BY-SA-3.0", "BSD-2-Clause"]
21
20
 
22
21
  spec.bindir = "bin"
23
22
  spec.require_paths = ["lib"]
24
23
  spec.files = `git ls-files`.split("\n")
25
24
  spec.test_files = `git ls-files -- {spec}/*`.split("\n")
26
- spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
25
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
27
26
 
27
+ spec.add_dependency "asciimath", "~> 2.0.2"
28
28
  spec.add_dependency "htmlentities", "~> 4.3.4"
29
29
  spec.add_dependency "image_size"
30
30
  spec.add_dependency "mime-types"
31
- spec.add_dependency "nokogiri", "~> 1.10.4"
31
+ spec.add_dependency "nokogiri", "~> 1.12"
32
+ spec.add_dependency "plane1converter", "~> 0.0.1"
32
33
  spec.add_dependency "thread_safe"
33
34
  spec.add_dependency "uuidtools"
34
- spec.add_dependency "asciimath", "~> 2.0.2"
35
- spec.add_dependency "plane1converter", "~> 0.0.1"
36
35
 
37
36
  spec.add_development_dependency "byebug", "~> 9.1"
38
37
  spec.add_development_dependency "equivalent-xml", "~> 0.6"
@@ -40,8 +39,8 @@ Gem::Specification.new do |spec|
40
39
  spec.add_development_dependency "guard-rspec", "~> 4.7"
41
40
  spec.add_development_dependency "rake", "~> 12.0"
42
41
  spec.add_development_dependency "rspec", "~> 3.6"
42
+ spec.add_development_dependency "rspec-match_fuzzy", "~> 0.1.3"
43
43
  spec.add_development_dependency "rubocop", "~> 1.5.2"
44
44
  spec.add_development_dependency "simplecov", "~> 0.15"
45
45
  spec.add_development_dependency "timecop", "~> 0.9"
46
- spec.add_development_dependency "rspec-match_fuzzy", "~> 0.1.3"
47
46
  end
data/lib/html2doc/base.rb CHANGED
@@ -2,8 +2,6 @@ require "uuidtools"
2
2
  require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
- #require "xml/xslt"
6
- require "pp"
7
5
  require "fileutils"
8
6
 
9
7
  module Html2Doc
@@ -19,15 +17,17 @@ module Html2Doc
19
17
 
20
18
  def self.process_header(headerfile, hash)
21
19
  return if headerfile.nil?
20
+
22
21
  doc = File.read(headerfile, encoding: "utf-8")
23
- doc = header_image_cleanup(doc, hash[:dir1], hash[:filename], File.dirname(hash[:filename]))
22
+ doc = header_image_cleanup(doc, hash[:dir1], hash[:filename],
23
+ File.dirname(hash[:filename]))
24
24
  File.open("#{hash[:dir1]}/header.html", "w:UTF-8") { |f| f.write(doc) }
25
25
  end
26
26
 
27
27
  def self.clear_dir(dir)
28
28
  Dir.foreach(dir) do |f|
29
29
  fn = File.join(dir, f)
30
- File.delete(fn) if f != '.' && f != '..'
30
+ File.delete(fn) if f != "." && f != ".."
31
31
  end
32
32
  dir
33
33
  end
@@ -72,7 +72,7 @@ module Html2Doc
72
72
 
73
73
  def self.to_xhtml(xml)
74
74
  xml.gsub!(/<\?xml[^>]*>/, "")
75
- unless /<!DOCTYPE /.match xml
75
+ unless /<!DOCTYPE /.match? xml
76
76
  xml = '<!DOCTYPE html SYSTEM
77
77
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
78
78
  end
@@ -84,34 +84,35 @@ module Html2Doc
84
84
  DOCTYPE
85
85
 
86
86
  def self.from_xhtml(xml)
87
- xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "").
88
- sub(DOCTYPE, "").
89
- gsub(%{ />}, "/>")
87
+ xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
88
+ .sub(DOCTYPE, "")
89
+ .gsub(%{ />}, "/>")
90
90
  end
91
91
 
92
- def self.msword_fix(r)
92
+ def self.msword_fix(doc)
93
93
  # brain damage in MSWord parser
94
- r.gsub!(%r{<span style="mso-special-character:footnote"/>},
95
- '<span style="mso-special-character:footnote"></span>')
96
- r.gsub!(%r{<div style="mso-element:footnote-list"></div>},
97
- '<div style="mso-element:footnote-list"/>')
98
- r.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>")
99
- r.gsub!(%r{<link rel="File-List"}, "<link rel=File-List")
100
- r.gsub!(%r{<meta http-equiv="Content-Type"},
101
- "<meta http-equiv=Content-Type")
102
- r.gsub!(%r{></m:jc>}, "/>")
103
- r.gsub!(%r{></v:stroke>}, "/>")
104
- r.gsub!(%r{></v:f>}, "/>")
105
- r.gsub!(%r{></v:path>}, "/>")
106
- r.gsub!(%r{></o:lock>}, "/>")
107
- r.gsub!(%r{></v:imagedata>}, "/>")
108
- r.gsub!(%r{></w:wrap>}, "/>")
109
- r.gsub!(%r{&tab;|&amp;tab;}, '<span style="mso-tab-count:1">&#xA0; </span>')
110
- r = r.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
94
+ doc.gsub!(%r{<span style="mso-special-character:footnote"/>},
95
+ '<span style="mso-special-character:footnote"></span>')
96
+ doc.gsub!(%r{<div style="mso-element:footnote-list"></div>},
97
+ '<div style="mso-element:footnote-list"/>')
98
+ doc.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>")
99
+ doc.gsub!(%r{<link rel="File-List"}, "<link rel=File-List")
100
+ doc.gsub!(%r{<meta http-equiv="Content-Type"},
101
+ "<meta http-equiv=Content-Type")
102
+ doc.gsub!(%r{></m:jc>}, "/>")
103
+ doc.gsub!(%r{></v:stroke>}, "/>")
104
+ doc.gsub!(%r{></v:f>}, "/>")
105
+ doc.gsub!(%r{></v:path>}, "/>")
106
+ doc.gsub!(%r{></o:lock>}, "/>")
107
+ doc.gsub!(%r{></v:imagedata>}, "/>")
108
+ doc.gsub!(%r{></w:wrap>}, "/>")
109
+ doc.gsub!(%r{<(/)?m:(span|em)\b}, "<\\1\\2")
110
+ doc.gsub!(%r{&tab;|&amp;tab;},
111
+ '<span style="mso-tab-count:1">&#xA0; </span>')
112
+ doc.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
111
113
  a.size > 2 and a[2] = a[2].gsub(/>\s+</, "><")
112
114
  a
113
115
  end.join
114
- r
115
116
  end
116
117
 
117
118
  PRINT_VIEW = <<~XML.freeze
@@ -127,30 +128,30 @@ module Html2Doc
127
128
  <meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
128
129
  XML
129
130
 
130
- def self.define_head1(docxml, dir)
131
+ def self.define_head1(docxml, _dir)
131
132
  docxml.xpath("//*[local-name() = 'head']").each do |h|
132
133
  h.children.first.add_previous_sibling <<~XML
133
- #{PRINT_VIEW}
134
- <link rel="File-List" href="cid:filelist.xml"/>
134
+ #{PRINT_VIEW}
135
+ <link rel="File-List" href="cid:filelist.xml"/>
135
136
  XML
136
137
  end
137
138
  end
138
139
 
139
- def self.filename_substitute(stylesheet, header_filename, filename)
140
- if header_filename.nil?
141
- stylesheet
142
- else
143
- stylesheet.gsub(/url\("[^"]+"\)/) do |m|
144
- /FILENAME/.match(m) ? "url(cid:header.html)" : m
140
+ def self.filename_substitute(head, header_filename)
141
+ return if header_filename.nil?
142
+
143
+ head.xpath(".//*[local-name() = 'style']").each do |s|
144
+ s1 = s.to_xml.gsub(/url\("[^"]+"\)/) do |m|
145
+ /FILENAME/.match?(m) ? "url(cid:header.html)" : m
145
146
  end
147
+ s.replace(s1)
146
148
  end
147
149
  end
148
150
 
149
- def self.stylesheet(filename, header_filename, fn)
150
- (fn.nil? || fn.empty?) &&
151
+ def self.stylesheet(_filename, _header_filename, fn)
152
+ (fn.nil? || fn.empty?) and
151
153
  fn = File.join(File.dirname(__FILE__), "wordstyle.css")
152
154
  stylesheet = File.read(fn, encoding: "UTF-8")
153
- stylesheet = filename_substitute(stylesheet, header_filename, filename)
154
155
  xml = Nokogiri::XML("<style/>")
155
156
  xml.children.first << Nokogiri::XML::Comment.new(xml, "\n#{stylesheet}\n")
156
157
  xml.root.to_s
@@ -161,6 +162,7 @@ module Html2Doc
161
162
  head = docxml.at("//*[local-name() = 'head']")
162
163
  css = stylesheet(hash[:filename], hash[:header_file], hash[:stylesheet])
163
164
  add_stylesheet(head, title, css)
165
+ filename_substitute(head, hash[:header_file])
164
166
  define_head1(docxml, hash[:dir1])
165
167
  rootnamespace(docxml.root)
166
168
  end
@@ -189,13 +191,13 @@ module Html2Doc
189
191
  end
190
192
 
191
193
  def self.bookmarks(docxml)
192
- docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]").each do |x|
193
- next if x["id"].empty?
194
- next if %w(shapetype v:shapetype shape v:shape).include? x.name
195
- if x.children.empty?
196
- x.add_child("<a name='#{x["id"]}'></a>")
197
- else
198
- x.children.first.previous = "<a name='#{x["id"]}'></a>"
194
+ docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]")
195
+ .each do |x|
196
+ next if x["id"].empty? ||
197
+ %w(shapetype v:shapetype shape v:shape).include?(x.name)
198
+
199
+ if x.children.empty? then x.add_child("<a name='#{x['id']}'></a>")
200
+ else x.children.first.previous = "<a name='#{x['id']}'></a>"
199
201
  end
200
202
  x.delete("id")
201
203
  end
@@ -2,83 +2,87 @@ require "uuidtools"
2
2
  require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
- require "uuidtools"
6
5
 
7
6
  module Html2Doc
8
- def self.style_list(li, level, liststyle, listnumber)
7
+ def self.style_list(elem, level, liststyle, listnumber)
9
8
  return unless liststyle
10
- if li["style"]
11
- li["style"] += ";"
9
+
10
+ if elem["style"]
11
+ elem["style"] += ";"
12
12
  else
13
- li["style"] = ""
13
+ elem["style"] = ""
14
14
  end
15
- li["style"] += "mso-list:#{liststyle} level#{level} lfo#{listnumber};"
15
+ elem["style"] += "mso-list:#{liststyle} level#{level} lfo#{listnumber};"
16
16
  end
17
17
 
18
- def self.list_add1(li, liststyles, listtype, level)
19
- if [:ul, :ol].include? listtype
20
- list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
21
- liststyles, :ul, level + 1)
22
- list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
23
- liststyles, :ol, level + 1)
24
- else
25
- list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
26
- liststyles, listtype, level + 1)
27
- list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
28
- liststyles, listtype, level + 1)
29
- end
18
+ def self.list_add1(elem, liststyles, listtype, level)
19
+ if %i[ul ol].include? listtype
20
+ list_add(elem.xpath(".//ul") - elem.xpath(".//ul//ul | .//ol//ul"),
21
+ liststyles, :ul, level + 1)
22
+ list_add(elem.xpath(".//ol") - elem.xpath(".//ul//ol | .//ol//ol"),
23
+ liststyles, :ol, level + 1)
24
+ else
25
+ list_add(elem.xpath(".//ul") - elem.xpath(".//ul//ul | .//ol//ul"),
26
+ liststyles, listtype, level + 1)
27
+ list_add(elem.xpath(".//ol") - elem.xpath(".//ul//ol | .//ol//ol"),
28
+ liststyles, listtype, level + 1)
29
+ end
30
30
  end
31
31
 
32
32
  def self.list_add(xpath, liststyles, listtype, level)
33
- xpath.each_with_index do |list, i|
33
+ xpath.each_with_index do |l, _i|
34
34
  @listnumber += 1 if level == 1
35
- list["seen"] = true if level == 1
36
- list["id"] ||= UUIDTools::UUID.random_create
37
- (list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |li|
35
+ l["seen"] = true if level == 1
36
+ l["id"] ||= UUIDTools::UUID.random_create
37
+ (l.xpath(".//li") - l.xpath(".//ol//li | .//ul//li")).each do |li|
38
38
  style_list(li, level, liststyles[listtype], @listnumber)
39
39
  list_add1(li, liststyles, listtype, level)
40
40
  end
41
- list.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{list['id']}')] | "\
42
- ".//ol[not(ancestor::li/ancestor::*/@id = '#{list['id']}')]").each do |li|
43
- list_add1(li.parent, liststyles, listtype, level-1)
41
+ l.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{l['id']}')] | "\
42
+ ".//ol[not(ancestor::li/ancestor::*/@id = '#{l['id']}')]")
43
+ .each do |li|
44
+ list_add1(li.parent, liststyles, listtype, level - 1)
44
45
  end
45
46
  end
46
47
  end
47
48
 
48
- def self.list2para(u)
49
- return if u.xpath("./li").empty?
50
- u.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
51
- u.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
52
- u.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
53
- u.xpath("./li").each do |l|
49
+ def self.list2para(list)
50
+ return if list.xpath("./li").empty?
51
+
52
+ list.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
53
+ list.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
54
+ list.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
55
+ list.xpath("./li").each do |l|
54
56
  l.name = "p"
55
57
  l["class"] ||= "MsoListParagraphCxSpMiddle"
56
58
  l&.first_element_child&.name == "p" and
57
59
  l.first_element_child.replace(l.first_element_child.children)
58
60
  end
59
- u.replace(u.children)
61
+ list.replace(list.children)
60
62
  end
61
63
 
62
64
  TOPLIST = "[not(ancestor::ul) and not(ancestor::ol)]".freeze
63
65
 
64
- def self.lists1(docxml, liststyles, k)
65
- case k
66
+ def self.lists1(docxml, liststyles, style)
67
+ case style
66
68
  when :ul then list_add(docxml.xpath("//ul[not(@class)]#{TOPLIST}"),
67
- liststyles, :ul, 1)
69
+ liststyles, :ul, 1)
68
70
  when :ol then list_add(docxml.xpath("//ol[not(@class)]#{TOPLIST}"),
69
71
  liststyles, :ol, 1)
70
72
  else
71
- list_add(docxml.xpath("//ol[@class = '#{k.to_s}']#{TOPLIST} | "\
72
- "//ul[@class = '#{k.to_s}']#{TOPLIST}"),
73
- liststyles, k, 1)
73
+ list_add(docxml.xpath("//ol[@class = '#{style}']#{TOPLIST} | "\
74
+ "//ul[@class = '#{style}']#{TOPLIST}"),
75
+ liststyles, style, 1)
74
76
  end
75
77
  end
76
78
 
77
79
  def self.lists_unstyled(docxml, liststyles)
78
- list_add(docxml.xpath("//ul#{TOPLIST}[not(@seen)]"),
79
- liststyles, :ul, 1) if liststyles.has_key?(:ul)
80
- list_add(docxml.xpath("//ol#{TOPLIST}[not(@seen)]"),
81
- liststyles, :ul, 1) if liststyles.has_key?(:ol)
80
+ liststyles.has_key?(:ul) and
81
+ list_add(docxml.xpath("//ul#{TOPLIST}[not(@seen)]"),
82
+ liststyles, :ul, 1)
83
+ liststyles.has_key?(:ol) and
84
+ list_add(docxml.xpath("//ol#{TOPLIST}[not(@seen)]"),
85
+ liststyles, :ul, 1)
82
86
  docxml.xpath("//ul[@seen] | //ol[@seen]").each do |l|
83
87
  l.delete("seen")
84
88
  end
@@ -86,6 +90,7 @@ module Html2Doc
86
90
 
87
91
  def self.lists(docxml, liststyles)
88
92
  return if liststyles.nil?
93
+
89
94
  @listnumber = 0
90
95
  liststyles.each_key { |k| lists1(docxml, liststyles, k) }
91
96
  lists_unstyled(docxml, liststyles)