isodoc 1.6.2 → 1.6.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rake.yml +2 -12
  3. data/.hound.yml +3 -1
  4. data/.rubocop.yml +4 -6
  5. data/Rakefile +2 -2
  6. data/isodoc.gemspec +3 -2
  7. data/lib/isodoc-yaml/i18n-en.yaml +1 -0
  8. data/lib/isodoc-yaml/i18n-fr.yaml +1 -0
  9. data/lib/isodoc-yaml/i18n-zh-Hans.yaml +1 -0
  10. data/lib/isodoc.rb +0 -2
  11. data/lib/isodoc/convert.rb +7 -1
  12. data/lib/isodoc/function/blocks.rb +5 -4
  13. data/lib/isodoc/function/cleanup.rb +52 -43
  14. data/lib/isodoc/function/inline.rb +7 -7
  15. data/lib/isodoc/function/references.rb +32 -51
  16. data/lib/isodoc/function/section.rb +28 -16
  17. data/lib/isodoc/function/table.rb +21 -22
  18. data/lib/isodoc/function/terms.rb +6 -7
  19. data/lib/isodoc/function/to_word_html.rb +6 -3
  20. data/lib/isodoc/function/utils.rb +181 -163
  21. data/lib/isodoc/gem_tasks.rb +8 -9
  22. data/lib/isodoc/headlesshtml_convert.rb +8 -7
  23. data/lib/isodoc/html_convert.rb +5 -1
  24. data/lib/isodoc/html_function/comments.rb +14 -12
  25. data/lib/isodoc/html_function/footnotes.rb +14 -7
  26. data/lib/isodoc/html_function/html.rb +30 -26
  27. data/lib/isodoc/html_function/postprocess.rb +191 -182
  28. data/lib/isodoc/html_function/sectionsplit.rb +230 -0
  29. data/lib/isodoc/metadata.rb +22 -20
  30. data/lib/isodoc/metadata_contributor.rb +31 -28
  31. data/lib/isodoc/pdf_convert.rb +11 -13
  32. data/lib/isodoc/presentation_function/bibdata.rb +61 -30
  33. data/lib/isodoc/presentation_function/inline.rb +34 -27
  34. data/lib/isodoc/presentation_function/section.rb +54 -19
  35. data/lib/isodoc/presentation_xml_convert.rb +2 -0
  36. data/lib/isodoc/sassc_importer.rb +1 -1
  37. data/lib/isodoc/version.rb +1 -1
  38. data/lib/isodoc/word_function/postprocess.rb +50 -36
  39. data/lib/isodoc/xref.rb +2 -0
  40. data/lib/isodoc/xref/xref_counter.rb +1 -2
  41. data/lib/isodoc/xref/xref_gen.rb +21 -14
  42. data/lib/isodoc/xref/xref_gen_seq.rb +60 -35
  43. data/lib/isodoc/xref/xref_sect_gen.rb +15 -15
  44. data/spec/assets/scripts_override.html +3 -0
  45. data/spec/isodoc/blocks_spec.rb +624 -997
  46. data/spec/isodoc/cleanup_spec.rb +40 -42
  47. data/spec/isodoc/i18n_spec.rb +694 -821
  48. data/spec/isodoc/inline_spec.rb +482 -328
  49. data/spec/isodoc/metadata_spec.rb +384 -379
  50. data/spec/isodoc/postproc_spec.rb +163 -55
  51. data/spec/isodoc/presentation_xml_spec.rb +355 -278
  52. data/spec/isodoc/ref_spec.rb +5 -5
  53. data/spec/isodoc/section_spec.rb +216 -199
  54. data/spec/isodoc/sectionsplit_spec.rb +190 -0
  55. data/spec/isodoc/table_spec.rb +41 -42
  56. data/spec/isodoc/terms_spec.rb +84 -84
  57. data/spec/isodoc/xref_spec.rb +974 -932
  58. metadata +22 -5
@@ -1,193 +1,211 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module IsoDoc::Function
4
- module Utils
5
- def date_range(date)
6
- self.class.date_range(date)
7
- end
3
+ module IsoDoc
4
+ module Function
5
+ module Utils
6
+ def date_range(date)
7
+ self.class.date_range(date)
8
+ end
8
9
 
9
- def ns(xpath)
10
- self.class.ns(xpath)
11
- end
10
+ def ns(xpath)
11
+ self.class.ns(xpath)
12
+ end
12
13
 
13
- def insert_tab(out, n)
14
- tab = %w(Hans Hant).include?(@script) ? " " : "  "
15
- [1..n].each { out << tab }
16
- end
14
+ def insert_tab(out, count)
15
+ tab = %w(Hans Hant).include?(@script) ? "&#x3000;" : "&nbsp; "
16
+ [1..count].each { out << tab }
17
+ end
17
18
 
18
- # add namespaces for Word fragments
19
- NOKOHEAD = <<~HERE
20
- <!DOCTYPE html SYSTEM
21
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
22
- <html xmlns="http://www.w3.org/1999/xhtml">
23
- <head> <title></title> <meta charset="UTF-8" /> </head>
24
- <body> </body> </html>
25
- HERE
26
-
27
- # block for processing XML document fragments as XHTML,
28
- # to allow for HTMLentities
29
- def noko(&block)
30
- doc = ::Nokogiri::XML.parse(NOKOHEAD)
31
- fragment = doc.fragment("")
32
- ::Nokogiri::XML::Builder.with fragment, &block
33
- fragment.to_xml(encoding: "US-ASCII").lines.map do |l|
34
- l.gsub(/\s*\n/, "")
19
+ # add namespaces for Word fragments
20
+ NOKOHEAD = <<~HERE
21
+ <!DOCTYPE html SYSTEM
22
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
23
+ <html xmlns="http://www.w3.org/1999/xhtml">
24
+ <head> <title></title> <meta charset="UTF-8" /> </head>
25
+ <body> </body> </html>
26
+ HERE
27
+
28
+ # block for processing XML document fragments as XHTML,
29
+ # to allow for HTMLentities
30
+ def noko(&block)
31
+ doc = ::Nokogiri::XML.parse(NOKOHEAD)
32
+ fragment = doc.fragment("")
33
+ ::Nokogiri::XML::Builder.with fragment, &block
34
+ fragment.to_xml(encoding: "US-ASCII").lines.map do |l|
35
+ l.gsub(/\s*\n/, "")
36
+ end
35
37
  end
36
- end
37
38
 
38
- def attr_code(attributes)
39
- attributes = attributes.reject { |_, val| val.nil? }.map
40
- attributes.map do |k, v|
41
- [k, v.is_a?(String) ? HTMLEntities.new.decode(v) : v]
42
- end.to_h
43
- end
39
+ def attr_code(attributes)
40
+ attributes = attributes.reject { |_, val| val.nil? }.map
41
+ attributes.map do |k, v|
42
+ [k, v.is_a?(String) ? HTMLEntities.new.decode(v) : v]
43
+ end.to_h
44
+ end
44
45
 
45
- DOCTYPE_HDR = '<!DOCTYPE html SYSTEM '\
46
- '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
47
-
48
- def to_xhtml(xml)
49
- xml.gsub!(/<\?xml[^>]*>/, "")
50
- /<!DOCTYPE /.match(xml) || (xml = DOCTYPE_HDR + xml)
51
- xml = xml.split(/(\&[^ \r\n\t#;]+;)/).map do |t|
52
- /^(\&[^ \t\r\n#;]+;)/.match?(t) ?
53
- HTMLEntities.new.encode(HTMLEntities.new.decode(t), :hexadecimal) : t
54
- end.join("")
55
- begin
56
- Nokogiri::XML.parse(xml, &:strict)
57
- rescue Nokogiri::XML::SyntaxError => e
58
- File.open("#{@filename}.#{@format}.err", "w:UTF-8") { |f| f.write xml }
59
- abort "Malformed Output XML for #{@format}: #{e} "\
60
- "(see #{@filename}.#{@format}.err)"
46
+ DOCTYPE_HDR = "<!DOCTYPE html SYSTEM "\
47
+ '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
48
+
49
+ def to_xhtml(xml)
50
+ xml = to_xhtml_prep(xml)
51
+ begin
52
+ Nokogiri::XML.parse(xml, &:strict)
53
+ rescue Nokogiri::XML::SyntaxError => e
54
+ File.open("#{@filename}.#{@format}.err", "w:UTF-8") do |f|
55
+ f.write xml
56
+ end
57
+ abort "Malformed Output XML for #{@format}: #{e} "\
58
+ "(see #{@filename}.#{@format}.err)"
59
+ end
61
60
  end
62
- end
63
61
 
64
- def to_xhtml_fragment(xml)
65
- doc = ::Nokogiri::XML.parse(NOKOHEAD)
66
- fragment = doc.fragment(xml)
67
- fragment
68
- end
62
+ def to_xhtml_prep(xml)
63
+ xml.gsub!(/<\?xml[^>]*>/, "")
64
+ /<!DOCTYPE /.match(xml) || (xml = DOCTYPE_HDR + xml)
65
+ xml.split(/(&[^ \r\n\t#;]+;)/).map do |t|
66
+ if /^(&[^ \t\r\n#;]+;)/.match?(t)
67
+ HTMLEntities.new.encode(HTMLEntities.new.decode(t), :hexadecimal)
68
+ else t
69
+ end
70
+ end.join("")
71
+ end
69
72
 
70
- def from_xhtml(xml)
71
- xml.to_xml.sub(%r{ xmlns="http://www.w3.org/1999/xhtml"}, "")
72
- end
73
+ def to_xhtml_fragment(xml)
74
+ doc = ::Nokogiri::XML.parse(NOKOHEAD)
75
+ doc.fragment(xml)
76
+ end
73
77
 
74
- CLAUSE_ANCESTOR =
75
- ".//ancestor::*[local-name() = 'annex' or "\
76
- "local-name() = 'definitions' or "\
77
- "local-name() = 'acknowledgements' or local-name() = 'term' or "\
78
- "local-name() = 'appendix' or local-name() = 'foreword' or "\
79
- "local-name() = 'introduction' or local-name() = 'terms' or "\
80
- "local-name() = 'clause' or local-name() = 'references']/@id"
81
-
82
- def get_clause_id(node)
83
- clause = node.xpath(CLAUSE_ANCESTOR)
84
- clause&.last&.text || nil
85
- end
78
+ def from_xhtml(xml)
79
+ xml.to_xml.sub(%r{ xmlns="http://www.w3.org/1999/xhtml"}, "")
80
+ end
86
81
 
87
- NOTE_CONTAINER_ANCESTOR =
88
- ".//ancestor::*[local-name() = 'annex' or "\
89
- "local-name() = 'foreword' or local-name() = 'appendix' or "\
90
- "local-name() = 'introduction' or local-name() = 'terms' or "\
91
- "local-name() = 'acknowledgements' or local-name() = 'term' or "\
92
- "local-name() = 'clause' or local-name() = 'references' or "\
93
- "local-name() = 'figure' or local-name() = 'formula' or "\
94
- "local-name() = 'table' or local-name() = 'example']/@id"
95
-
96
- def get_note_container_id(node)
97
- container = node.xpath(NOTE_CONTAINER_ANCESTOR)
98
- container&.last&.text || nil
99
- end
82
+ CLAUSE_ANCESTOR =
83
+ ".//ancestor::*[local-name() = 'annex' or "\
84
+ "local-name() = 'definitions' or "\
85
+ "local-name() = 'acknowledgements' or local-name() = 'term' or "\
86
+ "local-name() = 'appendix' or local-name() = 'foreword' or "\
87
+ "local-name() = 'introduction' or local-name() = 'terms' or "\
88
+ "local-name() = 'clause' or local-name() = 'references']/@id"
89
+
90
+ def get_clause_id(node)
91
+ clause = node.xpath(CLAUSE_ANCESTOR)
92
+ clause&.last&.text || nil
93
+ end
100
94
 
101
- def sentence_join(array)
102
- return '' if array.nil? || array.empty?
103
- if array.length == 1 then array[0]
104
- else
105
- @i18n.l10n("#{array[0..-2].join(', ')} "\
106
- "#{@i18n.and} #{array.last}",
107
- @lang, @script)
95
+ NOTE_CONTAINER_ANCESTOR =
96
+ ".//ancestor::*[local-name() = 'annex' or "\
97
+ "local-name() = 'foreword' or local-name() = 'appendix' or "\
98
+ "local-name() = 'introduction' or local-name() = 'terms' or "\
99
+ "local-name() = 'acknowledgements' or local-name() = 'term' or "\
100
+ "local-name() = 'clause' or local-name() = 'references' or "\
101
+ "local-name() = 'figure' or local-name() = 'formula' or "\
102
+ "local-name() = 'table' or local-name() = 'example']/@id"
103
+
104
+ def get_note_container_id(node)
105
+ container = node.xpath(NOTE_CONTAINER_ANCESTOR)
106
+ container&.last&.text || nil
108
107
  end
109
- end
110
108
 
111
- # avoid `; avoid {{ (Liquid Templates); avoid [[ (Javascript)
112
- def extract_delims(text)
113
- @openmathdelim = "(#("
114
- @closemathdelim = ")#)"
115
- while text.include?(@openmathdelim) || text.include?(@closemathdelim)
116
- @openmathdelim += "("
117
- @closemathdelim += ")"
109
+ def sentence_join(array)
110
+ return "" if array.nil? || array.empty?
111
+
112
+ if array.length == 1 then array[0]
113
+ else
114
+ @i18n.l10n("#{array[0..-2].join(', ')} "\
115
+ "#{@i18n.and} #{array.last}",
116
+ @lang, @script)
117
+ end
118
118
  end
119
- [@openmathdelim, @closemathdelim]
120
- end
121
119
 
122
- def header_strip(h)
123
- h = h.to_s.gsub(%r{<br\s*/>}, " ").gsub(/<\/?h[123456][^>]*>/, "")
124
- .gsub(/<\/?b[^>]*>/, "")
125
- h1 = to_xhtml_fragment(h.dup)
126
- h1.traverse do |x|
127
- x.replace(" ") if x.name == "span" && /mso-tab-count/.match(x["style"])
128
- x.remove if x.name == "img"
129
- x.remove if x.name == "span" && x["class"] == "MsoCommentReference"
130
- x.remove if x.name == "a" && x["class"] == "FootnoteRef"
131
- x.remove if x.name == "span" && /mso-bookmark/.match(x["style"])
132
- x.replace(x.children) if x.name == "a"
133
- end
134
- from_xhtml(h1)
135
- end
120
+ # avoid `; avoid {{ (Liquid Templates); avoid [[ (Javascript)
121
+ def extract_delims(text)
122
+ @openmathdelim = "(#("
123
+ @closemathdelim = ")#)"
124
+ while text.include?(@openmathdelim) || text.include?(@closemathdelim)
125
+ @openmathdelim += "("
126
+ @closemathdelim += ")"
127
+ end
128
+ [@openmathdelim, @closemathdelim]
129
+ end
136
130
 
137
- def liquid(doc)
138
- self.class.liquid(doc)
139
- end
131
+ def header_strip(hdr)
132
+ h1 = to_xhtml_fragment(hdr.to_s.gsub(%r{<br\s*/>}, " ")
133
+ .gsub(/<\/?h[123456][^>]*>/, "").gsub(/<\/?b[^>]*>/, "").dup)
134
+ h1.traverse do |x|
135
+ if x.name == "span" && /mso-tab-count/.match(x["style"])
136
+ x.replace(" ")
137
+ elsif header_strip_elem?(x) then x.remove
138
+ elsif x.name == "a" then x.replace(x.children)
139
+ end
140
+ end
141
+ from_xhtml(h1)
142
+ end
140
143
 
141
- def liquid(doc)
142
- # unescape HTML escapes in doc
143
- doc = doc.split(%r<(\{%|%\})>).each_slice(4).map do |a|
144
- a[2] = a[2].gsub(/\&lt;/, "<").gsub(/\&gt;/, ">") if a.size > 2
145
- a.join("")
146
- end.join("")
147
- Liquid::Template.parse(doc)
148
- end
144
+ def header_strip_elem?(elem)
145
+ elem.name == "img" ||
146
+ elem.name == "span" && elem["class"] == "MsoCommentReference" ||
147
+ elem.name == "a" && elem["class"] == "FootnoteRef" ||
148
+ elem.name == "span" && /mso-bookmark/.match(elem["style"])
149
+ end
149
150
 
150
- def empty2nil(v)
151
- return nil if !v.nil? && v.is_a?(String) && v.empty?
152
- v
153
- end
151
+ =begin
152
+ def liquid(doc)
153
+ self.class.liquid(doc)
154
+ end
155
+ =end
156
+
157
+ def liquid(doc)
158
+ # unescape HTML escapes in doc
159
+ doc = doc.split(%r<(\{%|%\})>).each_slice(4).map do |a|
160
+ a[2] = a[2].gsub(/&lt;/, "<").gsub(/&gt;/, ">") if a.size > 2
161
+ a.join("")
162
+ end.join("")
163
+ Liquid::Template.parse(doc)
164
+ end
154
165
 
155
- def populate_template(docxml, _format = nil)
156
- meta = @meta
157
- .get
158
- .merge(@labels ? {labels: @labels} : {})
159
- .merge(@meta.labels ? {labels: @meta.labels} : {})
160
- .merge(fonts_options || {})
161
- template = liquid(docxml)
162
- template.render(meta.map { |k, v| [k.to_s, empty2nil(v)] }.to_h)
163
- .gsub("&lt;", "&#x3c;").gsub("&gt;", "&#x3e;").gsub("&amp;", "&#x26;")
164
- end
166
+ def empty2nil(str)
167
+ return nil if !str.nil? && str.is_a?(String) && str.empty?
165
168
 
166
- def save_dataimage(uri, _relative_dir = true)
167
- %r{^data:(image|application)/(?<imgtype>[^;]+);base64,(?<imgdata>.+)$} =~ uri
168
- imgtype.sub!(/\+[a-z0-9]+$/, "") # svg+xml
169
- imgtype = "png" unless /^[a-z0-9]+$/.match imgtype
170
- Tempfile.open(["image", ".#{imgtype}"]) do |f|
171
- f.binmode
172
- f.write(Base64.strict_decode64(imgdata))
173
- @tempfile_cache << f # persist to the end
174
- f.path
169
+ str
175
170
  end
176
- end
177
171
 
178
- def image_localfile(i)
179
- if /^data:/.match? i["src"]
180
- save_dataimage(i["src"], false)
181
- elsif %r{^([A-Z]:)?/}.match? i["src"]
182
- i["src"]
183
- else
184
- File.join(@localdir, i["src"])
172
+ def populate_template(docxml, _format = nil)
173
+ meta = @meta
174
+ .get
175
+ .merge(@labels ? { labels: @labels } : {})
176
+ .merge(@meta.labels ? { labels: @meta.labels } : {})
177
+ .merge(fonts_options || {})
178
+ template = liquid(docxml)
179
+ template.render(meta.map { |k, v| [k.to_s, empty2nil(v)] }.to_h)
180
+ .gsub("&lt;", "&#x3c;").gsub("&gt;", "&#x3e;").gsub("&amp;", "&#x26;")
185
181
  end
186
- end
187
182
 
188
- def labelled_ancestor(node)
189
- !node.ancestors("example, requirement, recommendation, permission, "\
190
- "note, table, figure, sourcecode").empty?
183
+ def save_dataimage(uri, _relative_dir = true)
184
+ %r{^data:(image|application)/(?<imgtype>[^;]+);base64,(?<imgdata>.+)$} =~ uri
185
+ imgtype.sub!(/\+[a-z0-9]+$/, "") # svg+xml
186
+ imgtype = "png" unless /^[a-z0-9]+$/.match? imgtype
187
+ Tempfile.open(["image", ".#{imgtype}"]) do |f|
188
+ f.binmode
189
+ f.write(Base64.strict_decode64(imgdata))
190
+ @tempfile_cache << f # persist to the end
191
+ f.path
192
+ end
193
+ end
194
+
195
+ def image_localfile(img)
196
+ if /^data:/.match? img["src"]
197
+ save_dataimage(img["src"], false)
198
+ elsif %r{^([A-Z]:)?/}.match? img["src"]
199
+ img["src"]
200
+ else
201
+ File.join(@localdir, img["src"])
202
+ end
203
+ end
204
+
205
+ def labelled_ancestor(node)
206
+ !node.ancestors("example, requirement, recommendation, permission, "\
207
+ "note, table, figure, sourcecode").empty?
208
+ end
191
209
  end
192
210
  end
193
211
  end
@@ -12,12 +12,10 @@ module IsoDoc
12
12
 
13
13
  def install
14
14
  rule ".css" => [proc { |tn| tn.sub(/\.css$/, ".scss") }] do |current_task|
15
- begin
16
- puts(current_task)
17
- compile_scss_task(current_task)
18
- rescue StandardError => e
19
- notify_borken_compilation(e, current_task)
20
- end
15
+ puts(current_task)
16
+ compile_scss_task(current_task)
17
+ rescue StandardError => e
18
+ notify_borken_compilation(e, current_task)
21
19
  end
22
20
 
23
21
  scss_files = Rake::FileList["lib/**/*.scss"]
@@ -88,7 +86,7 @@ module IsoDoc
88
86
  text
89
87
  .gsub("/* LIQUID_COMMENT", "")
90
88
  .gsub("LIQUID_COMMENT */", "")
91
- .gsub('"{{', '{{').gsub('}}"', "}}")
89
+ .gsub('"{{', "{{").gsub('}}"', "}}")
92
90
  end
93
91
 
94
92
  def fonts_placeholder
@@ -107,7 +105,8 @@ module IsoDoc
107
105
  require "sassc"
108
106
 
109
107
  isodoc_path = if Gem.loaded_specs["isodoc"]
110
- File.join(Gem.loaded_specs["isodoc"].full_gem_path, "lib", "isodoc")
108
+ File.join(Gem.loaded_specs["isodoc"].full_gem_path,
109
+ "lib", "isodoc")
111
110
  else
112
111
  File.join("lib", "isodoc")
113
112
  end
@@ -119,7 +118,7 @@ module IsoDoc
119
118
  SassC::Engine.new(fonts_placeholder + sheet_content,
120
119
  syntax: :scss,
121
120
  importer: SasscImporter)
122
- .render
121
+ .render
123
122
  end
124
123
 
125
124
  def compile_scss_task(current_task)
@@ -1,11 +1,10 @@
1
- require_relative "html_function/comments.rb"
2
- require_relative "html_function/footnotes.rb"
3
- require_relative "html_function/html.rb"
1
+ require_relative "html_function/comments"
2
+ require_relative "html_function/footnotes"
3
+ require_relative "html_function/html"
4
4
  require "fileutils"
5
5
 
6
6
  module IsoDoc
7
7
  class HeadlessHtmlConvert < ::IsoDoc::Convert
8
-
9
8
  include HtmlFunction::Comments
10
9
  include HtmlFunction::Footnotes
11
10
  include HtmlFunction::Html
@@ -26,16 +25,18 @@ module IsoDoc
26
25
  docxml, filename, dir = convert_init(file, input_filename, debug)
27
26
  result = convert1(docxml, filename, dir)
28
27
  return result if debug
29
- postprocess(result, filename + ".tmp.html", dir)
28
+
29
+ postprocess(result, "#{filename}.tmp.html", dir)
30
30
  FileUtils.rm_rf dir
31
- strip_head(filename + ".tmp.html", output_filename || "#{filename}.#{@suffix}")
31
+ strip_head("#{filename}.tmp.html",
32
+ output_filename || "#{filename}.#{@suffix}")
32
33
  FileUtils.rm_rf ["#{filename}.tmp.html", tmpimagedir]
33
34
  end
34
35
 
35
36
  def strip_head(input, output)
36
37
  file = File.read(input, encoding: "utf-8")
37
38
  doc = Nokogiri::XML(file)
38
- doc.xpath("//head").each { |x| x.remove }
39
+ doc.xpath("//head").each(&:remove)
39
40
  doc.xpath("//html").each { |x| x.name = "div" }
40
41
  body = doc.at("//body")
41
42
  body.replace(body.children)