metanorma-standoc 1.10.4.1 → 1.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/lib/asciidoctor/standoc/base.rb +2 -0
  3. data/lib/asciidoctor/standoc/blocks.rb +2 -0
  4. data/lib/asciidoctor/standoc/cleanup.rb +12 -70
  5. data/lib/asciidoctor/standoc/cleanup_maths.rb +113 -21
  6. data/lib/asciidoctor/standoc/cleanup_reqt.rb +1 -0
  7. data/lib/asciidoctor/standoc/cleanup_section.rb +1 -0
  8. data/lib/asciidoctor/standoc/cleanup_section_names.rb +31 -14
  9. data/lib/asciidoctor/standoc/cleanup_text.rb +70 -0
  10. data/lib/asciidoctor/standoc/isodoc.rng +23 -9
  11. data/lib/asciidoctor/standoc/table.rb +22 -20
  12. data/lib/metanorma/standoc/version.rb +1 -1
  13. data/metanorma-standoc.gemspec +1 -1
  14. data/spec/asciidoctor/cleanup_sections_spec.rb +66 -0
  15. data/spec/asciidoctor/cleanup_spec.rb +28 -1
  16. data/spec/asciidoctor/isobib_cache_spec.rb +8 -8
  17. data/spec/asciidoctor/macros_spec.rb +5 -1
  18. data/spec/asciidoctor/refs_dl_spec.rb +1 -1
  19. data/spec/asciidoctor/refs_spec.rb +218 -442
  20. data/spec/asciidoctor/section_spec.rb +1 -1
  21. data/spec/fixtures/datamodel_description_sections_tree.xml +326 -327
  22. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +227 -139
  23. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +152 -0
  24. data/spec/vcr_cassettes/isobib_get_123.yml +50 -34
  25. data/spec/vcr_cassettes/isobib_get_123_1.yml +102 -70
  26. data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +107 -75
  27. data/spec/vcr_cassettes/isobib_get_123_2001.yml +50 -34
  28. data/spec/vcr_cassettes/isobib_get_124.yml +51 -35
  29. data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +14 -14
  30. data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +47 -45
  31. metadata +6 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4de5b7f8779dae7c359b93ce364ffa7cc406156bf1437c7cdccb6595b7bd958a
4
- data.tar.gz: c35902148b405e451af8487faff391b6eddbbd7b07cd2b6c8d565cd515e4515c
3
+ metadata.gz: dd98ba81df1c144552167e677b04d6152d838de0ea2d539a6bc7bde2a4b7f235
4
+ data.tar.gz: 68d9afff164047bf12f97ac23735cc1d61503172ae8656795393314d127223b6
5
5
  SHA512:
6
- metadata.gz: 6e33b33321d6826a65257a3440c63218021d5d40b69fa58b268d0a2c5e8436aed951e05c7f44ab97c74b5ce916c25ad7040cc962cf38be15ee2ebaf0c6858529
7
- data.tar.gz: f8f719f8062c3c71ebd2883617b984d7432134cb94da1be308e9ede34bf74def5be6267570c76ff9f248e628e443d6b71a60b9f71e1ba436b7c64eb2c2b25c13
6
+ metadata.gz: 611ba153c4160486b0ea51eb1564e09cd81d02f6ca6d5a075e3f38cf321321f2e019ba538341786bd4fe01eac9ef53e71ddc5cc19fdccb4e69189d9e7a720955
7
+ data.tar.gz: 0acb358132209888abe3a276c16abcfdf142570fd271712960e64e02db487df9bb39081a1f4e08da7347777a71240b57b2d8f6dda7d980523981efe5a838495c
@@ -53,6 +53,7 @@ module Asciidoctor
53
53
  htmltoclevels: node.attr("htmltoclevels") || node.attr("toclevels"),
54
54
  doctoclevels: node.attr("doctoclevels") || node.attr("toclevels"),
55
55
  break_up_urls_in_tables: node.attr("break-up-urls-in-tables"),
56
+ suppressasciimathdup: node.attr("suppress-asciimath-dup"),
56
57
  bare: node.attr("bare"),
57
58
  sectionsplit: node.attr("sectionsplit"),
58
59
  }
@@ -87,6 +88,7 @@ module Asciidoctor
87
88
  htmltoclevels: node.attr("htmltoclevels") || node.attr("toclevels"),
88
89
  doctoclevels: node.attr("doctoclevels") || node.attr("toclevels"),
89
90
  break_up_urls_in_tables: node.attr("break-up-urls-in-tables"),
91
+ suppressasciimathdup: node.attr("suppress-asciimath-dup"),
90
92
  bare: node.attr("bare"),
91
93
  }
92
94
 
@@ -161,6 +161,8 @@ module Asciidoctor
161
161
  def para_attrs(node)
162
162
  attr_code(keep_attrs(node)
163
163
  .merge(align: node.attr("align"),
164
+ variant_title: node.role == "variant-title" ? true : nil,
165
+ type: node.attr("type"),
164
166
  id: Metanorma::Utils::anchor_or_uuid(node)))
165
167
  end
166
168
 
@@ -13,19 +13,12 @@ require_relative "./cleanup_amend"
13
13
  require_relative "./cleanup_maths"
14
14
  require_relative "./cleanup_image"
15
15
  require_relative "./cleanup_reqt"
16
+ require_relative "./cleanup_text"
16
17
  require "relaton_iev"
17
18
 
18
19
  module Asciidoctor
19
20
  module Standoc
20
21
  module Cleanup
21
- def textcleanup(result)
22
- text = result.flatten.map { |l| l.sub(/\s*$/, "") } * "\n"
23
- !@keepasciimath and text = asciimath2mathml(text)
24
- text = text.gsub(/\s+<fn /, "<fn ")
25
- text.gsub(%r{<passthrough\s+formats="metanorma">([^<]*)
26
- </passthrough>}mx) { HTMLEntities.new.decode($1) }
27
- end
28
-
29
22
  def cleanup(xmldoc)
30
23
  element_name_cleanup(xmldoc)
31
24
  sections_cleanup(xmldoc)
@@ -72,63 +65,6 @@ module Asciidoctor
72
65
  xmldoc
73
66
  end
74
67
 
75
- IGNORE_DUMBQUOTES = "//pre | //pre//* | //tt | //tt//* | "\
76
- "//sourcecode | //sourcecode//* | //bibdata//* | //stem | "\
77
- "//stem//* | //figure[@class = 'pseudocode'] | "\
78
- "//figure[@class = 'pseudocode']//*".freeze
79
-
80
- def smartquotes_cleanup(xmldoc)
81
- xmldoc.xpath("//date").each { |d| Metanorma::Utils::endash_date(d) }
82
- if @smartquotes then smartquotes_cleanup1(xmldoc)
83
- else dumbquote_cleanup(xmldoc)
84
- end
85
- end
86
-
87
- def smartquotes_cleanup1(xmldoc)
88
- uninterrupt_quotes_around_xml(xmldoc)
89
- dumb2smart_quotes(xmldoc)
90
- end
91
-
92
- # "abc<tag/>", def => "abc",<tag/> def
93
- def uninterrupt_quotes_around_xml(xmldoc)
94
- xmldoc.xpath("//*[following::text()[1]"\
95
- "[starts-with(., '\"') or starts-with(., \"'\")]]")
96
- .each do |x|
97
- next if !x.ancestors("pre, tt, sourcecode, stem, figure").empty?
98
- uninterrupt_quotes_around_xml1(x)
99
- end
100
- end
101
-
102
- def uninterrupt_quotes_around_xml1(elem)
103
- prev = elem.at(".//preceding::text()[1]") or return
104
- /\S$/.match?(prev.text) or return
105
- foll = elem.at(".//following::text()[1]")
106
- m = /^(["'][[:punct:]]*)(\s|$)/.match(HTMLEntities.new.decode(foll&.text)) or return
107
- foll.content = foll.text.sub(/^(["'][[:punct:]]*)/, "")
108
- prev.content = "#{prev.text}#{m[1]}"
109
- end
110
-
111
- def dumb2smart_quotes(xmldoc)
112
- (xmldoc.xpath("//*[child::text()]") - xmldoc.xpath(IGNORE_DUMBQUOTES))
113
- .each do |x|
114
- x.children.each do |n|
115
- next unless n.text?
116
-
117
- /[-'"(<>]|\.\.|\dx/.match(n) or next
118
-
119
- n.replace(Metanorma::Utils::smartformat(n.text))
120
- end
121
- end
122
- end
123
-
124
- def dumbquote_cleanup(xmldoc)
125
- xmldoc.traverse do |n|
126
- next unless n.text?
127
-
128
- n.replace(n.text.gsub(/(?<=\p{Alnum})\u2019(?=\p{Alpha})/, "'")) # .
129
- end
130
- end
131
-
132
68
  def docidentifier_cleanup(xmldoc); end
133
69
 
134
70
  TEXT_ELEMS =
@@ -172,8 +108,8 @@ module Asciidoctor
172
108
 
173
109
  c.xpath("./variant").each do |n|
174
110
  if n.at_xpath("preceding-sibling::node()"\
175
- "[not(self::text()[not(normalize-space())])][1]"\
176
- "[self::variantwrap]")
111
+ "[not(self::text()[not(normalize-space())])][1]"\
112
+ "[self::variantwrap]")
177
113
  n.previous_element << n
178
114
  else
179
115
  n.replace("<variantwrap/>").first << n
@@ -203,11 +139,11 @@ module Asciidoctor
203
139
  end
204
140
 
205
141
  def toc_index(toc, xmldoc)
206
- depths = toc.xpath("./toc-xpath").each_with_object({}) do |x, m|
207
- m[x.text] = x["depth"]
208
- end
142
+ depths = toc_index_depths(toc)
209
143
  depths.keys.each_with_object([]) do |key, arr|
210
144
  xmldoc.xpath(key).each do |x|
145
+ t = x.at("./following-sibling::variant-title[@type = 'toc']") and
146
+ x = t
211
147
  arr << { text: x.children.to_xml, depth: depths[key].to_i,
212
148
  target: x.xpath("(./ancestor-or-self::*/@id)[last()]")[0].text,
213
149
  line: x.line }
@@ -215,6 +151,12 @@ module Asciidoctor
215
151
  end.sort_by { |a| a[:line] }
216
152
  end
217
153
 
154
+ def toc_index_depths(toc)
155
+ toc.xpath("./toc-xpath").each_with_object({}) do |x, m|
156
+ m[x.text] = x["depth"]
157
+ end
158
+ end
159
+
218
160
  def toc_cleanup1(toc, xmldoc)
219
161
  depth = 1
220
162
  ret = ""
@@ -29,19 +29,19 @@ module Asciidoctor
29
29
  x.to_xml
30
30
  end
31
31
 
32
- def xml_unescape_mathml(x)
33
- return if x.children.any? { |y| y.element? }
32
+ def xml_unescape_mathml(xml)
33
+ return if xml.children.any? { |y| y.element? }
34
34
 
35
- math = x.text.gsub(/&lt;/, "<").gsub(/&gt;/, ">")
35
+ math = xml.text.gsub(/&lt;/, "<").gsub(/&gt;/, ">")
36
36
  .gsub(/&quot;/, '"').gsub(/&apos;/, "'").gsub(/&amp;/, "&")
37
37
  .gsub(/<[^: \r\n\t\/]+:/, "<").gsub(/<\/[^ \r\n\t:]+:/, "</")
38
- x.children = math
38
+ xml.children = math
39
39
  end
40
40
 
41
41
  MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
42
42
 
43
- def mathml_preserve_space(m)
44
- m.xpath(".//m:mtext", "m" => MATHML_NS).each do |x|
43
+ def mathml_preserve_space(math)
44
+ math.xpath(".//m:mtext", "m" => MATHML_NS).each do |x|
45
45
  x.children = x.children.to_xml
46
46
  .gsub(/^\s/, "&#xA0;").gsub(/\s$/, "&#xA0;")
47
47
  end
@@ -57,23 +57,23 @@ module Asciidoctor
57
57
  end
58
58
 
59
59
  # presuppose multichar mi upright, singlechar mi MathML default italic
60
- def mathml_italicise(x)
61
- x.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
62
- "m" => MATHML_NS).each do |i|
60
+ def mathml_italicise(xml)
61
+ xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
62
+ "m" => MATHML_NS).each do |i|
63
63
  char = HTMLEntities.new.decode(i.text)
64
64
  i["mathvariant"] = "normal" if mi_italicise?(char)
65
65
  end
66
66
  end
67
67
 
68
- def mi_italicise?(c)
69
- return false if c.length > 1
68
+ def mi_italicise?(char)
69
+ return false if char.length > 1
70
70
 
71
- if /\p{Greek}/.match?(c)
72
- /\p{Lower}/.match(c) && !mathml_mi_italics[:lowergreek] ||
73
- /\p{Upper}/.match(c) && !mathml_mi_italics[:uppergreek]
74
- elsif /\p{Latin}/.match?(c)
75
- /\p{Lower}/.match(c) && !mathml_mi_italics[:lowerroman] ||
76
- /\p{Upper}/.match(c) && !mathml_mi_italics[:upperroman]
71
+ if /\p{Greek}/.match?(char)
72
+ /\p{Lower}/.match(char) && !mathml_mi_italics[:lowergreek] ||
73
+ /\p{Upper}/.match(char) && !mathml_mi_italics[:uppergreek]
74
+ elsif /\p{Latin}/.match?(char)
75
+ /\p{Lower}/.match(char) && !mathml_mi_italics[:lowerroman] ||
76
+ /\p{Upper}/.match(char) && !mathml_mi_italics[:upperroman]
77
77
  else
78
78
  false
79
79
  end
@@ -100,14 +100,14 @@ module Asciidoctor
100
100
  end
101
101
  end
102
102
 
103
- def gather_unitsml(unitsml, xmldoc, t)
104
- tags = xmldoc.xpath(".//m:#{t}", "m" => UNITSML_NS)
103
+ def gather_unitsml(unitsml, xmldoc, tag)
104
+ tags = xmldoc.xpath(".//m:#{tag}", "m" => UNITSML_NS)
105
105
  .each_with_object({}) do |x, m|
106
106
  m[x["id"]] = x.remove
107
107
  end
108
108
  return if tags.empty?
109
109
 
110
- set = unitsml.add_child("<#{t}Set/>").first
110
+ set = unitsml.add_child("<#{tag}Set/>").first
111
111
  tags.each_value { |v| set << v }
112
112
  end
113
113
 
@@ -115,14 +115,106 @@ module Asciidoctor
115
115
  { multiplier: :space }
116
116
  end
117
117
 
118
+ def mathvariant_override(inner, outer)
119
+ case outer
120
+ when "bold"
121
+ case inner
122
+ when "normal" then "bold"
123
+ when "italic" then "bold-italic"
124
+ when "fraktur" then "bold-fraktur"
125
+ when "script" then "bold-script"
126
+ when "sans-serif" then "bold-sans-serif"
127
+ when "sans-serif-italic" then "sans-serif-bold-italic"
128
+ else inner
129
+ end
130
+ when "italic"
131
+ case inner
132
+ when "normal" then "italic"
133
+ when "bold" then "bold-italic"
134
+ when "sans-serif" then "sans-serif-italic"
135
+ when "bold-sans-serif" then "sans-serif-bold-italic"
136
+ else inner
137
+ end
138
+ when "bold-italic"
139
+ case inner
140
+ when "normal", "bold", "italic" then "bold-italic"
141
+ when "sans-serif", "bold-sans-serif", "sans-serif-italic"
142
+ "sans-serif-bold-italic"
143
+ else inner
144
+ end
145
+ when "fraktur"
146
+ case inner
147
+ when "normal" then "fraktur"
148
+ when "bold" then "bold-fraktur"
149
+ else inner
150
+ end
151
+ when "bold-fraktur"
152
+ case inner
153
+ when "normal", "fraktur" then "bold-fraktur"
154
+ else inner
155
+ end
156
+ when "script"
157
+ case inner
158
+ when "normal" then "script"
159
+ when "bold" then "bold-script"
160
+ else inner
161
+ end
162
+ when "bold-script"
163
+ case inner
164
+ when "normal", "script" then "bold-script"
165
+ else inner
166
+ end
167
+ when "sans-serif"
168
+ case inner
169
+ when "normal" then "sans-serif"
170
+ when "bold" then "bold-sans-serif"
171
+ when "italic" then "sans-serif-italic"
172
+ when "bold-italic" then "sans-serif-bold-italic"
173
+ else inner
174
+ end
175
+ when "bold-sans-serif"
176
+ case inner
177
+ when "normal", "bold", "sans-serif" then "bold-sans-serif"
178
+ when "italic", "bold-italic", "sans-serif-italic"
179
+ "sans-serif-bold-italic"
180
+ else inner
181
+ end
182
+ when "sans-serif-italic"
183
+ case inner
184
+ when "normal", "italic", "sans-serif" then "sans-serif-italic"
185
+ when "bold", "bold-italic", "sans-serif-bold"
186
+ "sans-serif-bold-italic"
187
+ else inner
188
+ end
189
+ when "sans-serif-bold-italic"
190
+ case inner
191
+ when "normal", "italic", "sans-serif", "sans-serif-italic",
192
+ "bold", "bold-italic", "sans-serif-bold"
193
+ "sans-serif-bold-italic"
194
+ else inner
195
+ end
196
+ else inner
197
+ end
198
+ end
199
+
200
+ def mathml_mathvariant(math)
201
+ math.xpath(".//*[@mathvariant]").each do |outer|
202
+ outer.xpath(".//*[@mathvariant]").each do |inner|
203
+ inner["mathvariant"] =
204
+ mathvariant_override(outer["mathvariant"], inner["mathvariant"])
205
+ end
206
+ end
207
+ end
208
+
118
209
  def mathml_cleanup(xmldoc)
119
210
  unitsml = Asciimath2UnitsML::Conv.new(asciimath2unitsml_options)
120
211
  xmldoc.xpath("//stem[@type = 'MathML']").each do |x|
121
212
  xml_unescape_mathml(x)
122
213
  mathml_namespace(x)
123
214
  mathml_preserve_space(x)
124
- mathml_italicise(x)
125
215
  unitsml.MathML2UnitsML(x)
216
+ mathml_mathvariant(x)
217
+ mathml_italicise(x)
126
218
  end
127
219
  mathml_unitsML(xmldoc)
128
220
  end
@@ -24,6 +24,7 @@ module Asciidoctor
24
24
  r.children.each do |e|
25
25
  unless e.element? && (reqt_subpart(e.name) ||
26
26
  %w(requirement recommendation permission).include?(e.name))
27
+ next if e.text.strip.empty?
27
28
  t = Nokogiri::XML::Element.new("description", r)
28
29
  e.before(t)
29
30
  t.children = e.remove
@@ -114,6 +114,7 @@ module Asciidoctor
114
114
  sections_order_cleanup(xml)
115
115
  sections_level_cleanup(xml)
116
116
  sections_names_cleanup(xml)
117
+ sections_variant_title_cleanup(xml)
117
118
  change_clauses(xml)
118
119
  end
119
120
 
@@ -37,39 +37,56 @@ module Asciidoctor
37
37
  end
38
38
 
39
39
  def section_names_refs_cleanup(xml)
40
- replace_title(xml, "//references[@normative = 'true']",
40
+ replace_title(xml, "//bibliography/references[@normative = 'true']",
41
41
  @i18n&.normref, true)
42
- replace_title(xml, "//references[@normative = 'false']",
42
+ replace_title(xml, "//bibliography/references[@normative = 'false']",
43
43
  @i18n&.bibliography, true)
44
44
  end
45
45
 
46
46
  NO_SYMABBR = "[.//definitions[not(@type)]]".freeze
47
47
  SYMABBR = "[.//definitions[@type = 'symbols']]"\
48
- "[.//definitions[@type = 'abbreviated_terms']]".freeze
48
+ "[.//definitions[@type = 'abbreviated_terms']]".freeze
49
49
  SYMnoABBR = "[.//definitions[@type = 'symbols']]"\
50
- "[not(.//definitions[@type = 'abbreviated_terms'])]".freeze
50
+ "[not(.//definitions[@type = 'abbreviated_terms'])]".freeze
51
51
  ABBRnoSYM = "[.//definitions[@type = 'abbreviated_terms']]"\
52
- "[not(.//definitions[@type = 'symbols'])]".freeze
52
+ "[not(.//definitions[@type = 'symbols'])]".freeze
53
53
 
54
- def section_names_terms_cleanup(x)
55
- replace_title(x, "//definitions[@type = 'symbols']", @i18n&.symbols)
56
- replace_title(x, "//definitions[@type = 'abbreviated_terms']",
54
+ def section_names_terms_cleanup(xml)
55
+ replace_title(xml, "//definitions[@type = 'symbols']", @i18n&.symbols)
56
+ replace_title(xml, "//definitions[@type = 'abbreviated_terms']",
57
57
  @i18n&.abbrev)
58
- replace_title(x, "//definitions[not(@type)]", @i18n&.symbolsabbrev)
59
- replace_title(x, "//terms#{SYMnoABBR} | //clause[.//terms]#{SYMnoABBR}",
58
+ replace_title(xml, "//definitions[not(@type)]", @i18n&.symbolsabbrev)
59
+ replace_title(xml, "//terms#{SYMnoABBR} | //clause[.//terms]#{SYMnoABBR}",
60
60
  @i18n&.termsdefsymbols, true)
61
- replace_title(x, "//terms#{ABBRnoSYM} | //clause[.//terms]#{ABBRnoSYM}",
61
+ replace_title(xml, "//terms#{ABBRnoSYM} | //clause[.//terms]#{ABBRnoSYM}",
62
62
  @i18n&.termsdefabbrev, true)
63
- replace_title(x, "//terms#{SYMABBR} | //clause[.//terms]#{SYMABBR}",
63
+ replace_title(xml, "//terms#{SYMABBR} | //clause[.//terms]#{SYMABBR}",
64
64
  @i18n&.termsdefsymbolsabbrev, true)
65
- replace_title(x, "//terms#{NO_SYMABBR} | //clause[.//terms]#{NO_SYMABBR}",
65
+ replace_title(xml, "//terms#{NO_SYMABBR} | //clause[.//terms]#{NO_SYMABBR}",
66
66
  @i18n&.termsdefsymbolsabbrev, true)
67
67
  replace_title(
68
- x,
68
+ xml,
69
69
  "//terms[not(.//definitions)] | //clause[.//terms][not(.//definitions)]",
70
70
  @i18n&.termsdef, true
71
71
  )
72
72
  end
73
+
74
+ SECTION_CONTAINERS = %w(foreword introduction acknowledgements abstract
75
+ clause clause references terms definitions annex
76
+ appendix).freeze
77
+
78
+ def sections_variant_title_cleanup(xml)
79
+ path = SECTION_CONTAINERS.map { |x| "./ancestor::#{x}" }.join(" | ")
80
+ xml.xpath("//p[@variant_title]").each do |p|
81
+ p.xpath("(#{path})[last()]").each do |sect|
82
+ p.name = "variant-title"
83
+ p.delete("id")
84
+ if ins = sect.at("./title") then ins.next = p
85
+ else sect.children.first.previous = p
86
+ end
87
+ end
88
+ end
89
+ end
73
90
  end
74
91
  end
75
92
  end
@@ -0,0 +1,70 @@
1
+ module Asciidoctor
2
+ module Standoc
3
+ module Cleanup
4
+ def textcleanup(result)
5
+ text = result.flatten.map { |l| l.sub(/\s*$/, "") } * "\n"
6
+ !@keepasciimath and text = asciimath2mathml(text)
7
+ text = text.gsub(/\s+<fn /, "<fn ")
8
+ text.gsub(%r{<passthrough\s+formats="metanorma">([^<]*)
9
+ </passthrough>}mx) { HTMLEntities.new.decode($1) }
10
+ end
11
+
12
+ IGNORE_DUMBQUOTES = "//pre | //pre//* | //tt | //tt//* | "\
13
+ "//sourcecode | //sourcecode//* | //bibdata//* | //stem | "\
14
+ "//stem//* | //figure[@class = 'pseudocode'] | "\
15
+ "//figure[@class = 'pseudocode']//*".freeze
16
+
17
+ def smartquotes_cleanup(xmldoc)
18
+ xmldoc.xpath("//date").each { |d| Metanorma::Utils::endash_date(d) }
19
+ if @smartquotes then smartquotes_cleanup1(xmldoc)
20
+ else dumbquote_cleanup(xmldoc)
21
+ end
22
+ end
23
+
24
+ def smartquotes_cleanup1(xmldoc)
25
+ uninterrupt_quotes_around_xml(xmldoc)
26
+ dumb2smart_quotes(xmldoc)
27
+ end
28
+
29
+ # "abc<tag/>", def => "abc",<tag/> def
30
+ def uninterrupt_quotes_around_xml(xmldoc)
31
+ xmldoc.xpath("//*[following::text()[1]"\
32
+ "[starts-with(., '\"') or starts-with(., \"'\")]]")
33
+ .each do |x|
34
+ next if !x.ancestors("pre, tt, sourcecode, stem, figure").empty?
35
+ uninterrupt_quotes_around_xml1(x)
36
+ end
37
+ end
38
+
39
+ def uninterrupt_quotes_around_xml1(elem)
40
+ prev = elem.at(".//preceding::text()[1]") or return
41
+ /\S$/.match?(prev.text) or return
42
+ foll = elem.at(".//following::text()[1]")
43
+ m = /^(["'][[:punct:]]*)(\s|$)/.match(HTMLEntities.new.decode(foll&.text)) or return
44
+ foll.content = foll.text.sub(/^(["'][[:punct:]]*)/, "")
45
+ prev.content = "#{prev.text}#{m[1]}"
46
+ end
47
+
48
+ def dumb2smart_quotes(xmldoc)
49
+ (xmldoc.xpath("//*[child::text()]") - xmldoc.xpath(IGNORE_DUMBQUOTES))
50
+ .each do |x|
51
+ x.children.each do |n|
52
+ next unless n.text?
53
+
54
+ /[-'"(<>]|\.\.|\dx/.match(n) or next
55
+
56
+ n.replace(Metanorma::Utils::smartformat(n.text))
57
+ end
58
+ end
59
+ end
60
+
61
+ def dumbquote_cleanup(xmldoc)
62
+ xmldoc.traverse do |n|
63
+ next unless n.text?
64
+
65
+ n.replace(n.text.gsub(/(?<=\p{Alnum})\u2019(?=\p{Alpha})/, "'")) # .
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end