isodoc 1.6.2 → 1.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rake.yml +2 -12
  3. data/.hound.yml +3 -1
  4. data/.rubocop.yml +4 -6
  5. data/Rakefile +2 -2
  6. data/isodoc.gemspec +3 -2
  7. data/lib/isodoc-yaml/i18n-en.yaml +1 -0
  8. data/lib/isodoc-yaml/i18n-fr.yaml +1 -0
  9. data/lib/isodoc-yaml/i18n-zh-Hans.yaml +1 -0
  10. data/lib/isodoc.rb +0 -2
  11. data/lib/isodoc/convert.rb +7 -1
  12. data/lib/isodoc/function/blocks.rb +5 -4
  13. data/lib/isodoc/function/cleanup.rb +52 -43
  14. data/lib/isodoc/function/inline.rb +7 -7
  15. data/lib/isodoc/function/references.rb +32 -51
  16. data/lib/isodoc/function/section.rb +28 -16
  17. data/lib/isodoc/function/table.rb +21 -22
  18. data/lib/isodoc/function/terms.rb +6 -7
  19. data/lib/isodoc/function/to_word_html.rb +6 -3
  20. data/lib/isodoc/function/utils.rb +181 -163
  21. data/lib/isodoc/gem_tasks.rb +8 -9
  22. data/lib/isodoc/headlesshtml_convert.rb +8 -7
  23. data/lib/isodoc/html_convert.rb +5 -1
  24. data/lib/isodoc/html_function/comments.rb +14 -12
  25. data/lib/isodoc/html_function/footnotes.rb +14 -7
  26. data/lib/isodoc/html_function/html.rb +30 -26
  27. data/lib/isodoc/html_function/postprocess.rb +191 -182
  28. data/lib/isodoc/html_function/sectionsplit.rb +230 -0
  29. data/lib/isodoc/metadata.rb +22 -20
  30. data/lib/isodoc/metadata_contributor.rb +31 -28
  31. data/lib/isodoc/pdf_convert.rb +11 -13
  32. data/lib/isodoc/presentation_function/bibdata.rb +61 -30
  33. data/lib/isodoc/presentation_function/inline.rb +34 -27
  34. data/lib/isodoc/presentation_function/section.rb +54 -19
  35. data/lib/isodoc/presentation_xml_convert.rb +2 -0
  36. data/lib/isodoc/sassc_importer.rb +1 -1
  37. data/lib/isodoc/version.rb +1 -1
  38. data/lib/isodoc/word_function/postprocess.rb +50 -36
  39. data/lib/isodoc/xref.rb +2 -0
  40. data/lib/isodoc/xref/xref_counter.rb +1 -2
  41. data/lib/isodoc/xref/xref_gen.rb +21 -14
  42. data/lib/isodoc/xref/xref_gen_seq.rb +60 -35
  43. data/lib/isodoc/xref/xref_sect_gen.rb +15 -15
  44. data/spec/assets/scripts_override.html +3 -0
  45. data/spec/isodoc/blocks_spec.rb +624 -997
  46. data/spec/isodoc/cleanup_spec.rb +40 -42
  47. data/spec/isodoc/i18n_spec.rb +694 -821
  48. data/spec/isodoc/inline_spec.rb +482 -328
  49. data/spec/isodoc/metadata_spec.rb +384 -379
  50. data/spec/isodoc/postproc_spec.rb +163 -55
  51. data/spec/isodoc/presentation_xml_spec.rb +355 -278
  52. data/spec/isodoc/ref_spec.rb +5 -5
  53. data/spec/isodoc/section_spec.rb +216 -199
  54. data/spec/isodoc/sectionsplit_spec.rb +190 -0
  55. data/spec/isodoc/table_spec.rb +41 -42
  56. data/spec/isodoc/terms_spec.rb +84 -84
  57. data/spec/isodoc/xref_spec.rb +974 -932
  58. metadata +22 -5
@@ -1,193 +1,211 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module IsoDoc::Function
4
- module Utils
5
- def date_range(date)
6
- self.class.date_range(date)
7
- end
3
+ module IsoDoc
4
+ module Function
5
+ module Utils
6
+ def date_range(date)
7
+ self.class.date_range(date)
8
+ end
8
9
 
9
- def ns(xpath)
10
- self.class.ns(xpath)
11
- end
10
+ def ns(xpath)
11
+ self.class.ns(xpath)
12
+ end
12
13
 
13
- def insert_tab(out, n)
14
- tab = %w(Hans Hant).include?(@script) ? " " : "  "
15
- [1..n].each { out << tab }
16
- end
14
+ def insert_tab(out, count)
15
+ tab = %w(Hans Hant).include?(@script) ? "&#x3000;" : "&nbsp; "
16
+ [1..count].each { out << tab }
17
+ end
17
18
 
18
- # add namespaces for Word fragments
19
- NOKOHEAD = <<~HERE
20
- <!DOCTYPE html SYSTEM
21
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
22
- <html xmlns="http://www.w3.org/1999/xhtml">
23
- <head> <title></title> <meta charset="UTF-8" /> </head>
24
- <body> </body> </html>
25
- HERE
26
-
27
- # block for processing XML document fragments as XHTML,
28
- # to allow for HTMLentities
29
- def noko(&block)
30
- doc = ::Nokogiri::XML.parse(NOKOHEAD)
31
- fragment = doc.fragment("")
32
- ::Nokogiri::XML::Builder.with fragment, &block
33
- fragment.to_xml(encoding: "US-ASCII").lines.map do |l|
34
- l.gsub(/\s*\n/, "")
19
+ # add namespaces for Word fragments
20
+ NOKOHEAD = <<~HERE
21
+ <!DOCTYPE html SYSTEM
22
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
23
+ <html xmlns="http://www.w3.org/1999/xhtml">
24
+ <head> <title></title> <meta charset="UTF-8" /> </head>
25
+ <body> </body> </html>
26
+ HERE
27
+
28
+ # block for processing XML document fragments as XHTML,
29
+ # to allow for HTMLentities
30
+ def noko(&block)
31
+ doc = ::Nokogiri::XML.parse(NOKOHEAD)
32
+ fragment = doc.fragment("")
33
+ ::Nokogiri::XML::Builder.with fragment, &block
34
+ fragment.to_xml(encoding: "US-ASCII").lines.map do |l|
35
+ l.gsub(/\s*\n/, "")
36
+ end
35
37
  end
36
- end
37
38
 
38
- def attr_code(attributes)
39
- attributes = attributes.reject { |_, val| val.nil? }.map
40
- attributes.map do |k, v|
41
- [k, v.is_a?(String) ? HTMLEntities.new.decode(v) : v]
42
- end.to_h
43
- end
39
+ def attr_code(attributes)
40
+ attributes = attributes.reject { |_, val| val.nil? }.map
41
+ attributes.map do |k, v|
42
+ [k, v.is_a?(String) ? HTMLEntities.new.decode(v) : v]
43
+ end.to_h
44
+ end
44
45
 
45
- DOCTYPE_HDR = '<!DOCTYPE html SYSTEM '\
46
- '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
47
-
48
- def to_xhtml(xml)
49
- xml.gsub!(/<\?xml[^>]*>/, "")
50
- /<!DOCTYPE /.match(xml) || (xml = DOCTYPE_HDR + xml)
51
- xml = xml.split(/(\&[^ \r\n\t#;]+;)/).map do |t|
52
- /^(\&[^ \t\r\n#;]+;)/.match?(t) ?
53
- HTMLEntities.new.encode(HTMLEntities.new.decode(t), :hexadecimal) : t
54
- end.join("")
55
- begin
56
- Nokogiri::XML.parse(xml, &:strict)
57
- rescue Nokogiri::XML::SyntaxError => e
58
- File.open("#{@filename}.#{@format}.err", "w:UTF-8") { |f| f.write xml }
59
- abort "Malformed Output XML for #{@format}: #{e} "\
60
- "(see #{@filename}.#{@format}.err)"
46
+ DOCTYPE_HDR = "<!DOCTYPE html SYSTEM "\
47
+ '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
48
+
49
+ def to_xhtml(xml)
50
+ xml = to_xhtml_prep(xml)
51
+ begin
52
+ Nokogiri::XML.parse(xml, &:strict)
53
+ rescue Nokogiri::XML::SyntaxError => e
54
+ File.open("#{@filename}.#{@format}.err", "w:UTF-8") do |f|
55
+ f.write xml
56
+ end
57
+ abort "Malformed Output XML for #{@format}: #{e} "\
58
+ "(see #{@filename}.#{@format}.err)"
59
+ end
61
60
  end
62
- end
63
61
 
64
- def to_xhtml_fragment(xml)
65
- doc = ::Nokogiri::XML.parse(NOKOHEAD)
66
- fragment = doc.fragment(xml)
67
- fragment
68
- end
62
+ def to_xhtml_prep(xml)
63
+ xml.gsub!(/<\?xml[^>]*>/, "")
64
+ /<!DOCTYPE /.match(xml) || (xml = DOCTYPE_HDR + xml)
65
+ xml.split(/(&[^ \r\n\t#;]+;)/).map do |t|
66
+ if /^(&[^ \t\r\n#;]+;)/.match?(t)
67
+ HTMLEntities.new.encode(HTMLEntities.new.decode(t), :hexadecimal)
68
+ else t
69
+ end
70
+ end.join("")
71
+ end
69
72
 
70
- def from_xhtml(xml)
71
- xml.to_xml.sub(%r{ xmlns="http://www.w3.org/1999/xhtml"}, "")
72
- end
73
+ def to_xhtml_fragment(xml)
74
+ doc = ::Nokogiri::XML.parse(NOKOHEAD)
75
+ doc.fragment(xml)
76
+ end
73
77
 
74
- CLAUSE_ANCESTOR =
75
- ".//ancestor::*[local-name() = 'annex' or "\
76
- "local-name() = 'definitions' or "\
77
- "local-name() = 'acknowledgements' or local-name() = 'term' or "\
78
- "local-name() = 'appendix' or local-name() = 'foreword' or "\
79
- "local-name() = 'introduction' or local-name() = 'terms' or "\
80
- "local-name() = 'clause' or local-name() = 'references']/@id"
81
-
82
- def get_clause_id(node)
83
- clause = node.xpath(CLAUSE_ANCESTOR)
84
- clause&.last&.text || nil
85
- end
78
+ def from_xhtml(xml)
79
+ xml.to_xml.sub(%r{ xmlns="http://www.w3.org/1999/xhtml"}, "")
80
+ end
86
81
 
87
- NOTE_CONTAINER_ANCESTOR =
88
- ".//ancestor::*[local-name() = 'annex' or "\
89
- "local-name() = 'foreword' or local-name() = 'appendix' or "\
90
- "local-name() = 'introduction' or local-name() = 'terms' or "\
91
- "local-name() = 'acknowledgements' or local-name() = 'term' or "\
92
- "local-name() = 'clause' or local-name() = 'references' or "\
93
- "local-name() = 'figure' or local-name() = 'formula' or "\
94
- "local-name() = 'table' or local-name() = 'example']/@id"
95
-
96
- def get_note_container_id(node)
97
- container = node.xpath(NOTE_CONTAINER_ANCESTOR)
98
- container&.last&.text || nil
99
- end
82
+ CLAUSE_ANCESTOR =
83
+ ".//ancestor::*[local-name() = 'annex' or "\
84
+ "local-name() = 'definitions' or "\
85
+ "local-name() = 'acknowledgements' or local-name() = 'term' or "\
86
+ "local-name() = 'appendix' or local-name() = 'foreword' or "\
87
+ "local-name() = 'introduction' or local-name() = 'terms' or "\
88
+ "local-name() = 'clause' or local-name() = 'references']/@id"
89
+
90
+ def get_clause_id(node)
91
+ clause = node.xpath(CLAUSE_ANCESTOR)
92
+ clause&.last&.text || nil
93
+ end
100
94
 
101
- def sentence_join(array)
102
- return '' if array.nil? || array.empty?
103
- if array.length == 1 then array[0]
104
- else
105
- @i18n.l10n("#{array[0..-2].join(', ')} "\
106
- "#{@i18n.and} #{array.last}",
107
- @lang, @script)
95
+ NOTE_CONTAINER_ANCESTOR =
96
+ ".//ancestor::*[local-name() = 'annex' or "\
97
+ "local-name() = 'foreword' or local-name() = 'appendix' or "\
98
+ "local-name() = 'introduction' or local-name() = 'terms' or "\
99
+ "local-name() = 'acknowledgements' or local-name() = 'term' or "\
100
+ "local-name() = 'clause' or local-name() = 'references' or "\
101
+ "local-name() = 'figure' or local-name() = 'formula' or "\
102
+ "local-name() = 'table' or local-name() = 'example']/@id"
103
+
104
+ def get_note_container_id(node)
105
+ container = node.xpath(NOTE_CONTAINER_ANCESTOR)
106
+ container&.last&.text || nil
108
107
  end
109
- end
110
108
 
111
- # avoid `; avoid {{ (Liquid Templates); avoid [[ (Javascript)
112
- def extract_delims(text)
113
- @openmathdelim = "(#("
114
- @closemathdelim = ")#)"
115
- while text.include?(@openmathdelim) || text.include?(@closemathdelim)
116
- @openmathdelim += "("
117
- @closemathdelim += ")"
109
+ def sentence_join(array)
110
+ return "" if array.nil? || array.empty?
111
+
112
+ if array.length == 1 then array[0]
113
+ else
114
+ @i18n.l10n("#{array[0..-2].join(', ')} "\
115
+ "#{@i18n.and} #{array.last}",
116
+ @lang, @script)
117
+ end
118
118
  end
119
- [@openmathdelim, @closemathdelim]
120
- end
121
119
 
122
- def header_strip(h)
123
- h = h.to_s.gsub(%r{<br\s*/>}, " ").gsub(/<\/?h[123456][^>]*>/, "")
124
- .gsub(/<\/?b[^>]*>/, "")
125
- h1 = to_xhtml_fragment(h.dup)
126
- h1.traverse do |x|
127
- x.replace(" ") if x.name == "span" && /mso-tab-count/.match(x["style"])
128
- x.remove if x.name == "img"
129
- x.remove if x.name == "span" && x["class"] == "MsoCommentReference"
130
- x.remove if x.name == "a" && x["class"] == "FootnoteRef"
131
- x.remove if x.name == "span" && /mso-bookmark/.match(x["style"])
132
- x.replace(x.children) if x.name == "a"
133
- end
134
- from_xhtml(h1)
135
- end
120
+ # avoid `; avoid {{ (Liquid Templates); avoid [[ (Javascript)
121
+ def extract_delims(text)
122
+ @openmathdelim = "(#("
123
+ @closemathdelim = ")#)"
124
+ while text.include?(@openmathdelim) || text.include?(@closemathdelim)
125
+ @openmathdelim += "("
126
+ @closemathdelim += ")"
127
+ end
128
+ [@openmathdelim, @closemathdelim]
129
+ end
136
130
 
137
- def liquid(doc)
138
- self.class.liquid(doc)
139
- end
131
+ def header_strip(hdr)
132
+ h1 = to_xhtml_fragment(hdr.to_s.gsub(%r{<br\s*/>}, " ")
133
+ .gsub(/<\/?h[123456][^>]*>/, "").gsub(/<\/?b[^>]*>/, "").dup)
134
+ h1.traverse do |x|
135
+ if x.name == "span" && /mso-tab-count/.match(x["style"])
136
+ x.replace(" ")
137
+ elsif header_strip_elem?(x) then x.remove
138
+ elsif x.name == "a" then x.replace(x.children)
139
+ end
140
+ end
141
+ from_xhtml(h1)
142
+ end
140
143
 
141
- def liquid(doc)
142
- # unescape HTML escapes in doc
143
- doc = doc.split(%r<(\{%|%\})>).each_slice(4).map do |a|
144
- a[2] = a[2].gsub(/\&lt;/, "<").gsub(/\&gt;/, ">") if a.size > 2
145
- a.join("")
146
- end.join("")
147
- Liquid::Template.parse(doc)
148
- end
144
+ def header_strip_elem?(elem)
145
+ elem.name == "img" ||
146
+ elem.name == "span" && elem["class"] == "MsoCommentReference" ||
147
+ elem.name == "a" && elem["class"] == "FootnoteRef" ||
148
+ elem.name == "span" && /mso-bookmark/.match(elem["style"])
149
+ end
149
150
 
150
- def empty2nil(v)
151
- return nil if !v.nil? && v.is_a?(String) && v.empty?
152
- v
153
- end
151
+ =begin
152
+ def liquid(doc)
153
+ self.class.liquid(doc)
154
+ end
155
+ =end
156
+
157
+ def liquid(doc)
158
+ # unescape HTML escapes in doc
159
+ doc = doc.split(%r<(\{%|%\})>).each_slice(4).map do |a|
160
+ a[2] = a[2].gsub(/&lt;/, "<").gsub(/&gt;/, ">") if a.size > 2
161
+ a.join("")
162
+ end.join("")
163
+ Liquid::Template.parse(doc)
164
+ end
154
165
 
155
- def populate_template(docxml, _format = nil)
156
- meta = @meta
157
- .get
158
- .merge(@labels ? {labels: @labels} : {})
159
- .merge(@meta.labels ? {labels: @meta.labels} : {})
160
- .merge(fonts_options || {})
161
- template = liquid(docxml)
162
- template.render(meta.map { |k, v| [k.to_s, empty2nil(v)] }.to_h)
163
- .gsub("&lt;", "&#x3c;").gsub("&gt;", "&#x3e;").gsub("&amp;", "&#x26;")
164
- end
166
+ def empty2nil(str)
167
+ return nil if !str.nil? && str.is_a?(String) && str.empty?
165
168
 
166
- def save_dataimage(uri, _relative_dir = true)
167
- %r{^data:(image|application)/(?<imgtype>[^;]+);base64,(?<imgdata>.+)$} =~ uri
168
- imgtype.sub!(/\+[a-z0-9]+$/, "") # svg+xml
169
- imgtype = "png" unless /^[a-z0-9]+$/.match imgtype
170
- Tempfile.open(["image", ".#{imgtype}"]) do |f|
171
- f.binmode
172
- f.write(Base64.strict_decode64(imgdata))
173
- @tempfile_cache << f # persist to the end
174
- f.path
169
+ str
175
170
  end
176
- end
177
171
 
178
- def image_localfile(i)
179
- if /^data:/.match? i["src"]
180
- save_dataimage(i["src"], false)
181
- elsif %r{^([A-Z]:)?/}.match? i["src"]
182
- i["src"]
183
- else
184
- File.join(@localdir, i["src"])
172
+ def populate_template(docxml, _format = nil)
173
+ meta = @meta
174
+ .get
175
+ .merge(@labels ? { labels: @labels } : {})
176
+ .merge(@meta.labels ? { labels: @meta.labels } : {})
177
+ .merge(fonts_options || {})
178
+ template = liquid(docxml)
179
+ template.render(meta.map { |k, v| [k.to_s, empty2nil(v)] }.to_h)
180
+ .gsub("&lt;", "&#x3c;").gsub("&gt;", "&#x3e;").gsub("&amp;", "&#x26;")
185
181
  end
186
- end
187
182
 
188
- def labelled_ancestor(node)
189
- !node.ancestors("example, requirement, recommendation, permission, "\
190
- "note, table, figure, sourcecode").empty?
183
+ def save_dataimage(uri, _relative_dir = true)
184
+ %r{^data:(image|application)/(?<imgtype>[^;]+);base64,(?<imgdata>.+)$} =~ uri
185
+ imgtype.sub!(/\+[a-z0-9]+$/, "") # svg+xml
186
+ imgtype = "png" unless /^[a-z0-9]+$/.match? imgtype
187
+ Tempfile.open(["image", ".#{imgtype}"]) do |f|
188
+ f.binmode
189
+ f.write(Base64.strict_decode64(imgdata))
190
+ @tempfile_cache << f # persist to the end
191
+ f.path
192
+ end
193
+ end
194
+
195
+ def image_localfile(img)
196
+ if /^data:/.match? img["src"]
197
+ save_dataimage(img["src"], false)
198
+ elsif %r{^([A-Z]:)?/}.match? img["src"]
199
+ img["src"]
200
+ else
201
+ File.join(@localdir, img["src"])
202
+ end
203
+ end
204
+
205
+ def labelled_ancestor(node)
206
+ !node.ancestors("example, requirement, recommendation, permission, "\
207
+ "note, table, figure, sourcecode").empty?
208
+ end
191
209
  end
192
210
  end
193
211
  end
@@ -12,12 +12,10 @@ module IsoDoc
12
12
 
13
13
  def install
14
14
  rule ".css" => [proc { |tn| tn.sub(/\.css$/, ".scss") }] do |current_task|
15
- begin
16
- puts(current_task)
17
- compile_scss_task(current_task)
18
- rescue StandardError => e
19
- notify_borken_compilation(e, current_task)
20
- end
15
+ puts(current_task)
16
+ compile_scss_task(current_task)
17
+ rescue StandardError => e
18
+ notify_borken_compilation(e, current_task)
21
19
  end
22
20
 
23
21
  scss_files = Rake::FileList["lib/**/*.scss"]
@@ -88,7 +86,7 @@ module IsoDoc
88
86
  text
89
87
  .gsub("/* LIQUID_COMMENT", "")
90
88
  .gsub("LIQUID_COMMENT */", "")
91
- .gsub('"{{', '{{').gsub('}}"', "}}")
89
+ .gsub('"{{', "{{").gsub('}}"', "}}")
92
90
  end
93
91
 
94
92
  def fonts_placeholder
@@ -107,7 +105,8 @@ module IsoDoc
107
105
  require "sassc"
108
106
 
109
107
  isodoc_path = if Gem.loaded_specs["isodoc"]
110
- File.join(Gem.loaded_specs["isodoc"].full_gem_path, "lib", "isodoc")
108
+ File.join(Gem.loaded_specs["isodoc"].full_gem_path,
109
+ "lib", "isodoc")
111
110
  else
112
111
  File.join("lib", "isodoc")
113
112
  end
@@ -119,7 +118,7 @@ module IsoDoc
119
118
  SassC::Engine.new(fonts_placeholder + sheet_content,
120
119
  syntax: :scss,
121
120
  importer: SasscImporter)
122
- .render
121
+ .render
123
122
  end
124
123
 
125
124
  def compile_scss_task(current_task)
@@ -1,11 +1,10 @@
1
- require_relative "html_function/comments.rb"
2
- require_relative "html_function/footnotes.rb"
3
- require_relative "html_function/html.rb"
1
+ require_relative "html_function/comments"
2
+ require_relative "html_function/footnotes"
3
+ require_relative "html_function/html"
4
4
  require "fileutils"
5
5
 
6
6
  module IsoDoc
7
7
  class HeadlessHtmlConvert < ::IsoDoc::Convert
8
-
9
8
  include HtmlFunction::Comments
10
9
  include HtmlFunction::Footnotes
11
10
  include HtmlFunction::Html
@@ -26,16 +25,18 @@ module IsoDoc
26
25
  docxml, filename, dir = convert_init(file, input_filename, debug)
27
26
  result = convert1(docxml, filename, dir)
28
27
  return result if debug
29
- postprocess(result, filename + ".tmp.html", dir)
28
+
29
+ postprocess(result, "#{filename}.tmp.html", dir)
30
30
  FileUtils.rm_rf dir
31
- strip_head(filename + ".tmp.html", output_filename || "#{filename}.#{@suffix}")
31
+ strip_head("#{filename}.tmp.html",
32
+ output_filename || "#{filename}.#{@suffix}")
32
33
  FileUtils.rm_rf ["#{filename}.tmp.html", tmpimagedir]
33
34
  end
34
35
 
35
36
  def strip_head(input, output)
36
37
  file = File.read(input, encoding: "utf-8")
37
38
  doc = Nokogiri::XML(file)
38
- doc.xpath("//head").each { |x| x.remove }
39
+ doc.xpath("//head").each(&:remove)
39
40
  doc.xpath("//html").each { |x| x.name = "div" }
40
41
  body = doc.at("//body")
41
42
  body.replace(body.children)