isodoc 1.7.5 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/isodoc.gemspec +8 -7
  3. data/lib/isodoc/class_utils.rb +25 -2
  4. data/lib/isodoc/convert.rb +2 -0
  5. data/lib/isodoc/function/cleanup.rb +4 -0
  6. data/lib/isodoc/function/to_word_html.rb +2 -1
  7. data/lib/isodoc/function/utils.rb +34 -14
  8. data/lib/isodoc/html_function/comments.rb +107 -111
  9. data/lib/isodoc/html_function/footnotes.rb +68 -67
  10. data/lib/isodoc/html_function/html.rb +113 -103
  11. data/lib/isodoc/presentation_function/block.rb +1 -69
  12. data/lib/isodoc/presentation_function/image.rb +112 -0
  13. data/lib/isodoc/presentation_function/inline.rb +16 -78
  14. data/lib/isodoc/presentation_function/terms.rb +179 -0
  15. data/lib/isodoc/presentation_xml_convert.rb +11 -4
  16. data/lib/isodoc/version.rb +1 -1
  17. data/lib/isodoc/word_function/body.rb +176 -174
  18. data/lib/isodoc/word_function/comments.rb +117 -112
  19. data/lib/isodoc/word_function/footnotes.rb +88 -86
  20. data/lib/isodoc/word_function/inline.rb +42 -67
  21. data/lib/isodoc/word_function/postprocess.rb +184 -176
  22. data/lib/isodoc/word_function/postprocess_cover.rb +121 -110
  23. data/lib/isodoc/xref/xref_gen.rb +153 -150
  24. data/lib/isodoc/xref/xref_sect_gen.rb +134 -129
  25. data/lib/isodoc/xslfo_convert.rb +11 -7
  26. data/lib/isodoc-yaml/i18n-ar.yaml +22 -0
  27. data/lib/isodoc-yaml/i18n-de.yaml +20 -0
  28. data/lib/isodoc-yaml/i18n-en.yaml +20 -0
  29. data/lib/isodoc-yaml/i18n-es.yaml +20 -0
  30. data/lib/isodoc-yaml/i18n-fr.yaml +20 -0
  31. data/lib/isodoc-yaml/i18n-ru.yaml +21 -1
  32. data/lib/isodoc-yaml/i18n-zh-Hans.yaml +21 -0
  33. data/lib/metanorma/output/xslfo.rb +4 -11
  34. data/spec/assets/i18n.yaml +3 -1
  35. data/spec/assets/odf.svg +1 -4
  36. data/spec/isodoc/blocks_spec.rb +229 -157
  37. data/spec/isodoc/i18n_spec.rb +8 -8
  38. data/spec/isodoc/inline_spec.rb +285 -32
  39. data/spec/isodoc/postproc_spec.rb +38 -0
  40. data/spec/isodoc/presentation_xml_spec.rb +60 -0
  41. data/spec/isodoc/section_spec.rb +11 -10
  42. data/spec/isodoc/terms_spec.rb +354 -34
  43. data/spec/isodoc/xref_spec.rb +4 -4
  44. data/spec/isodoc/xslfo_convert_spec.rb +34 -9
  45. metadata +49 -33
@@ -1,229 +1,237 @@
1
1
  require "fileutils"
2
2
  require_relative "./postprocess_cover"
3
3
 
4
- module IsoDoc::WordFunction
5
- module Postprocess
6
- # add namespaces for Word fragments
7
- WORD_NOKOHEAD = <<~HERE.freeze
8
- <!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
9
- <html xmlns="http://www.w3.org/1999/xhtml"
10
- xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office"
11
- xmlns:w="urn:schemas-microsoft-com:office:word"
12
- xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
13
- <head> <title></title> <meta charset="UTF-8" /> </head>
14
- <body> </body> </html>
15
- HERE
16
-
17
- def to_word_xhtml_fragment(xml)
18
- doc = ::Nokogiri::XML.parse(WORD_NOKOHEAD)
19
- ::Nokogiri::XML::DocumentFragment.new(doc, xml, doc.root)
20
- end
4
+ module IsoDoc
5
+ module WordFunction
6
+ module Postprocess
7
+ # add namespaces for Word fragments
8
+ WORD_NOKOHEAD = <<~HERE.freeze
9
+ <!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
10
+ <html xmlns="http://www.w3.org/1999/xhtml"
11
+ xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office"
12
+ xmlns:w="urn:schemas-microsoft-com:office:word"
13
+ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
14
+ <head> <title></title> <meta charset="UTF-8" /> </head>
15
+ <body> </body> </html>
16
+ HERE
17
+
18
+ def to_word_xhtml_fragment(xml)
19
+ doc = ::Nokogiri::XML.parse(WORD_NOKOHEAD)
20
+ ::Nokogiri::XML::DocumentFragment.new(doc, xml, doc.root)
21
+ end
21
22
 
22
- def table_note_cleanup(docxml)
23
- super
24
- # preempt html2doc putting MsoNormal there
25
- docxml.xpath("//p[not(self::*[@class])][ancestor::*[@class = 'Note']]")
26
- .each do |p|
27
- p["class"] = "Note"
23
+ def table_note_cleanup(docxml)
24
+ super
25
+ # preempt html2doc putting MsoNormal there
26
+ docxml.xpath("//p[not(self::*[@class])][ancestor::*[@class = 'Note']]")
27
+ .each do |p|
28
+ p["class"] = "Note"
29
+ end
28
30
  end
29
- end
30
31
 
31
- def postprocess(result, filename, dir)
32
- filename = filename.sub(/\.doc$/, "")
33
- header = generate_header(filename, dir)
34
- result = from_xhtml(cleanup(to_xhtml(textcleanup(result))))
35
- toWord(result, filename, dir, header)
36
- @files_to_delete.each { |f| FileUtils.rm_f f }
37
- end
32
+ def postprocess(result, filename, dir)
33
+ filename = filename.sub(/\.doc$/, "")
34
+ header = generate_header(filename, dir)
35
+ result = from_xhtml(cleanup(to_xhtml(textcleanup(result))))
36
+ toWord(result, filename, dir, header)
37
+ @files_to_delete.each { |f| FileUtils.rm_f f }
38
+ end
38
39
 
39
- def toWord(result, filename, dir, header)
40
- result = from_xhtml(word_cleanup(to_xhtml(result)))
41
- @wordstylesheet = wordstylesheet_update
42
- Html2Doc.process(
43
- result,
44
- filename: filename,
45
- stylesheet: @wordstylesheet&.path,
46
- header_file: header&.path, dir: dir,
47
- asciimathdelims: [@openmathdelim, @closemathdelim],
48
- liststyles: { ul: @ulstyle, ol: @olstyle }
49
- )
50
- header&.unlink
51
- @wordstylesheet.unlink if @wordstylesheet.is_a?(Tempfile)
52
- end
40
+ def toWord(result, filename, dir, header)
41
+ result = from_xhtml(word_cleanup(to_xhtml(result)))
42
+ @wordstylesheet = wordstylesheet_update
43
+ Html2Doc.process(
44
+ result,
45
+ filename: filename,
46
+ imagedir: @localdir,
47
+ stylesheet: @wordstylesheet&.path,
48
+ header_file: header&.path, dir: dir,
49
+ asciimathdelims: [@openmathdelim, @closemathdelim],
50
+ liststyles: { ul: @ulstyle, ol: @olstyle }
51
+ )
52
+ header&.unlink
53
+ @wordstylesheet.unlink if @wordstylesheet.is_a?(Tempfile)
54
+ end
53
55
 
54
- def wordstylesheet_update
55
- return if @wordstylesheet.nil?
56
+ def wordstylesheet_update
57
+ return if @wordstylesheet.nil?
56
58
 
57
- f = File.open(@wordstylesheet.path, "a")
58
- @landscapestyle.empty? or f.write(@landscapestyle)
59
- if @wordstylesheet_override && @wordstylesheet
60
- f.write(@wordstylesheet_override.read)
61
- @wordstylesheet_override.close
62
- elsif @wordstylesheet_override && !@wordstylesheet
63
- @wordstylesheet = @wordstylesheet_override
59
+ f = File.open(@wordstylesheet.path, "a")
60
+ @landscapestyle.empty? or f.write(@landscapestyle)
61
+ if @wordstylesheet_override && @wordstylesheet
62
+ f.write(@wordstylesheet_override.read)
63
+ @wordstylesheet_override.close
64
+ elsif @wordstylesheet_override && !@wordstylesheet
65
+ @wordstylesheet = @wordstylesheet_override
66
+ end
67
+ f.close
68
+ @wordstylesheet
64
69
  end
65
- f.close
66
- @wordstylesheet
67
- end
68
70
 
69
- def word_admonition_images(docxml)
70
- docxml.xpath("//div[@class = 'Admonition']//img").each do |i|
71
- i["width"], i["height"] =
72
- Html2Doc.image_resize(i, image_localfile(i), @maxheight, 300)
71
+ def word_admonition_images(docxml)
72
+ docxml.xpath("//div[@class = 'Admonition']//img").each do |i|
73
+ i["width"], i["height"] =
74
+ Html2Doc.image_resize(i, image_localfile(i), @maxheight, 300)
75
+ end
73
76
  end
74
- end
75
77
 
76
- def word_cleanup(docxml)
77
- word_annex_cleanup(docxml)
78
- word_preface(docxml)
79
- word_nested_tables(docxml)
80
- word_colgroup(docxml)
81
- word_table_align(docxml)
82
- word_table_separator(docxml)
83
- word_admonition_images(docxml)
84
- word_list_continuations(docxml)
85
- word_example_cleanup(docxml)
86
- word_pseudocode_cleanup(docxml)
87
- word_image_caption(docxml)
88
- word_section_breaks(docxml)
89
- authority_cleanup(docxml)
90
- word_footnote_format(docxml)
91
- docxml
92
- end
78
+ def word_cleanup(docxml)
79
+ word_annex_cleanup(docxml)
80
+ word_preface(docxml)
81
+ word_nested_tables(docxml)
82
+ word_colgroup(docxml)
83
+ word_table_align(docxml)
84
+ word_table_separator(docxml)
85
+ word_admonition_images(docxml)
86
+ word_list_continuations(docxml)
87
+ word_example_cleanup(docxml)
88
+ word_pseudocode_cleanup(docxml)
89
+ word_image_caption(docxml)
90
+ word_section_breaks(docxml)
91
+ authority_cleanup(docxml)
92
+ word_footnote_format(docxml)
93
+ docxml
94
+ end
93
95
 
94
- def word_colgroup(docxml)
95
- cells2d = {}
96
- docxml.xpath("//table[colgroup]").each do |t|
97
- w = colgroup_widths(t)
98
- t.xpath(".//tr").each_with_index { |_tr, r| cells2d[r] = {} }
99
- t.xpath(".//tr").each_with_index do |tr, r|
100
- tr.xpath("./td | ./th").each_with_index do |td, _i|
101
- x = 0
102
- rs = td&.attr("rowspan")&.to_i || 1
103
- cs = td&.attr("colspan")&.to_i || 1
104
- while cells2d[r][x]
105
- x += 1
106
- end
107
- (r..(r + rs - 1)).each do |y2|
108
- (x..(x + cs - 1)).each do |x2|
109
- cells2d[y2][x2] = 1
96
+ def word_colgroup(docxml)
97
+ cells2d = {}
98
+ docxml.xpath("//table[colgroup]").each do |t|
99
+ w = colgroup_widths(t)
100
+ t.xpath(".//tr").each_with_index { |_tr, r| cells2d[r] = {} }
101
+ t.xpath(".//tr").each_with_index do |tr, r|
102
+ tr.xpath("./td | ./th").each_with_index do |td, _i|
103
+ x = 0
104
+ rs = td&.attr("rowspan")&.to_i || 1
105
+ cs = td&.attr("colspan")&.to_i || 1
106
+ while cells2d[r][x]
107
+ x += 1
110
108
  end
109
+ (r..(r + rs - 1)).each do |y2|
110
+ (x..(x + cs - 1)).each do |x2|
111
+ cells2d[y2][x2] = 1
112
+ end
113
+ end
114
+ width = (x..(x + cs - 1)).each_with_object({ width: 0 }) do |z, m|
115
+ m[:width] += w[z]
116
+ end
117
+ td["width"] = "#{width[:width]}%"
118
+ x += cs
111
119
  end
112
- width = (x..(x + cs - 1)).each_with_object({ width: 0 }) do |z, m|
113
- m[:width] += w[z]
114
- end
115
- td["width"] = "#{width[:width]}%"
116
- x += cs
117
120
  end
118
121
  end
119
122
  end
120
- end
121
123
 
122
- # assume percentages
123
- def colgroup_widths(table)
124
- table.xpath("./colgroup/col").each_with_object([]) do |c, m|
125
- m << c["width"].sub(/%$/, "").to_f
124
+ # assume percentages
125
+ def colgroup_widths(table)
126
+ table.xpath("./colgroup/col").each_with_object([]) do |c, m|
127
+ m << c["width"].sub(/%$/, "").to_f
128
+ end
126
129
  end
127
- end
128
130
 
129
- def word_nested_tables(docxml)
130
- docxml.xpath("//table").each do |t|
131
- t.xpath(".//table").reverse.each do |tt|
132
- t.next = tt.remove
131
+ def word_nested_tables(docxml)
132
+ docxml.xpath("//table").each do |t|
133
+ t.xpath(".//table").reverse.each do |tt|
134
+ t.next = tt.remove
135
+ end
133
136
  end
134
137
  end
135
- end
136
138
 
137
- def style_update(node, css)
138
- return unless node
139
+ def style_update(node, css)
140
+ return unless node
139
141
 
140
- node["style"] = node["style"] ? node["style"].sub(/;?$/, ";#{css}") : css
141
- end
142
+ node["style"] =
143
+ node["style"] ? node["style"].sub(/;?$/, ";#{css}") : css
144
+ end
142
145
 
143
- def word_image_caption(docxml)
144
- docxml.xpath("//p[@class = 'FigureTitle' or @class = 'SourceTitle']")
145
- .each do |t|
146
- if t&.previous_element&.name == "img"
147
- img = t.previous_element
148
- t.previous_element.swap("<p class=\'figure\'>#{img.to_xml}</p>")
146
+ def word_image_caption(docxml)
147
+ docxml.xpath("//p[@class = 'FigureTitle' or @class = 'SourceTitle']")
148
+ .each do |t|
149
+ if t&.previous_element&.name == "img"
150
+ img = t.previous_element
151
+ t.previous_element.swap("<p class=\'figure\'>#{img.to_xml}</p>")
152
+ end
153
+ style_update(t&.previous_element, "page-break-after:avoid;")
149
154
  end
150
- style_update(t&.previous_element, "page-break-after:avoid;")
151
155
  end
152
- end
153
156
 
154
- def word_list_continuations(docxml)
155
- list_add(docxml.xpath("//ul[not(ancestor::ul) and not(ancestor::ol)]"), 1)
156
- list_add(docxml.xpath("//ol[not(ancestor::ul) and not(ancestor::ol)]"), 1)
157
- end
157
+ def word_list_continuations(docxml)
158
+ list_add(docxml.xpath("//ul[not(ancestor::ul) and not(ancestor::ol)]"),
159
+ 1)
160
+ list_add(docxml.xpath("//ol[not(ancestor::ul) and not(ancestor::ol)]"),
161
+ 1)
162
+ end
158
163
 
159
- def list_add(xpath, lvl)
160
- xpath.each do |list|
161
- (list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |l|
162
- l.xpath("./p | ./div | ./table").each_with_index do |p, i|
163
- next if i.zero?
164
+ def list_add(xpath, lvl)
165
+ xpath.each do |list|
166
+ (list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |l|
167
+ l.xpath("./p | ./div | ./table").each_with_index do |p, i|
168
+ next if i.zero?
164
169
 
165
- p.wrap(%{<div class="ListContLevel#{lvl}"/>})
170
+ p.wrap(%{<div class="ListContLevel#{lvl}"/>})
171
+ end
172
+ list_add(l.xpath(".//ul") - l.xpath(".//ul//ul | .//ol//ul"),
173
+ lvl + 1)
174
+ list_add(l.xpath(".//ol") - l.xpath(".//ul//ol | .//ol//ol"),
175
+ lvl + 1)
166
176
  end
167
- list_add(l.xpath(".//ul") - l.xpath(".//ul//ul | .//ol//ul"), lvl + 1)
168
- list_add(l.xpath(".//ol") - l.xpath(".//ul//ol | .//ol//ol"), lvl + 1)
169
177
  end
170
178
  end
171
- end
172
179
 
173
- def word_table_align(docxml)
174
- docxml.xpath("//td[@align]/p | //th[@align]/p").each do |p|
175
- next if p["align"]
180
+ def word_table_align(docxml)
181
+ docxml.xpath("//td[@align]/p | //th[@align]/p").each do |p|
182
+ next if p["align"]
176
183
 
177
- style_update(p, "text-align: #{p.parent['align']}")
184
+ style_update(p, "text-align: #{p.parent['align']}")
185
+ end
178
186
  end
179
- end
180
187
 
181
- def word_table_separator(docxml)
182
- docxml.xpath("//p[@class = 'TableTitle']").each do |t|
183
- next unless t.children.empty?
188
+ def word_table_separator(docxml)
189
+ docxml.xpath("//p[@class = 'TableTitle']").each do |t|
190
+ next unless t.children.empty?
184
191
 
185
- t["style"] = t["style"].sub(/;?$/, ";font-size:0pt;")
186
- t.children = "&nbsp;"
192
+ t["style"] = t["style"].sub(/;?$/, ";font-size:0pt;")
193
+ t.children = "&nbsp;"
194
+ end
187
195
  end
188
- end
189
196
 
190
- def word_annex_cleanup(docxml); end
197
+ def word_annex_cleanup(docxml); end
191
198
 
192
- def word_example_cleanup(docxml)
193
- docxml.xpath("//div[@class = 'example']//p[not(@class)]").each do |p|
194
- p["class"] = "example"
199
+ def word_example_cleanup(docxml)
200
+ docxml.xpath("//div[@class = 'example']//p[not(@class)]").each do |p|
201
+ p["class"] = "example"
202
+ end
195
203
  end
196
- end
197
204
 
198
- def word_pseudocode_cleanup(docxml)
199
- docxml.xpath("//div[@class = 'pseudocode']//p[not(@class)]").each do |p|
200
- p["class"] = "pseudocode"
205
+ def word_pseudocode_cleanup(docxml)
206
+ docxml.xpath("//div[@class = 'pseudocode']//p[not(@class)]").each do |p|
207
+ p["class"] = "pseudocode"
208
+ end
201
209
  end
202
- end
203
210
 
204
- # applies for <div class="WordSectionN_M"><p><pagebreak/></p>...
205
- def word_remove_pb_before_annex(docxml)
206
- docxml.xpath("//div[p/br]").each do |d|
207
- /^WordSection\d+_\d+$/.match(d["class"]) or next
208
- d.elements[0].name == "p" && !d.elements[0].elements.empty? or next
209
- d.elements[0].elements[0].name == "br" &&
210
- d.elements[0].elements[0]["style"] ==
211
- "mso-special-character:line-break;page-break-before:always" or next
212
- d.elements[0].remove
211
+ # applies for <div class="WordSectionN_M"><p><pagebreak/></p>...
212
+ def word_remove_pb_before_annex(docxml)
213
+ docxml.xpath("//div[p/br]").each do |d|
214
+ /^WordSection\d+_\d+$/.match(d["class"]) or next
215
+ d.elements[0].name == "p" && !d.elements[0].elements.empty? or next
216
+ d.elements[0].elements[0].name == "br" &&
217
+ d.elements[0].elements[0]["style"] ==
218
+ "mso-special-character:line-break;page-break-before:always" or next
219
+ d.elements[0].remove
220
+ end
213
221
  end
214
- end
215
222
 
216
- def word_footnote_format(docxml)
217
- # the content is in a[@epub:type = 'footnote']//sup, but in Word,
218
- # we need to inject content around the autonumbered footnote reference
219
- docxml.xpath("//a[@epub:type = 'footnote']").each do |x|
220
- footnote_reference_format(x)
221
- end
222
- docxml.xpath("//a[@class = 'TableFootnoteRef'] | "\
223
- "//span[@class = 'TableFootnoteRef']").each do |x|
224
- table_footnote_reference_format(x)
223
+ def word_footnote_format(docxml)
224
+ # the content is in a[@epub:type = 'footnote']//sup, but in Word,
225
+ # we need to inject content around the autonumbered footnote reference
226
+ docxml.xpath("//a[@epub:type = 'footnote']").each do |x|
227
+ footnote_reference_format(x)
228
+ end
229
+ docxml.xpath("//a[@class = 'TableFootnoteRef'] | "\
230
+ "//span[@class = 'TableFootnoteRef']").each do |x|
231
+ table_footnote_reference_format(x)
232
+ end
233
+ docxml
225
234
  end
226
- docxml
227
235
  end
228
236
  end
229
237
  end