metanorma-standoc 1.10.4.1 → 1.10.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/lib/asciidoctor/standoc/base.rb +2 -0
  3. data/lib/asciidoctor/standoc/blocks.rb +2 -0
  4. data/lib/asciidoctor/standoc/cleanup.rb +12 -70
  5. data/lib/asciidoctor/standoc/cleanup_maths.rb +113 -21
  6. data/lib/asciidoctor/standoc/cleanup_reqt.rb +1 -0
  7. data/lib/asciidoctor/standoc/cleanup_section.rb +1 -0
  8. data/lib/asciidoctor/standoc/cleanup_section_names.rb +31 -14
  9. data/lib/asciidoctor/standoc/cleanup_text.rb +70 -0
  10. data/lib/asciidoctor/standoc/isodoc.rng +23 -9
  11. data/lib/asciidoctor/standoc/table.rb +22 -20
  12. data/lib/metanorma/standoc/version.rb +1 -1
  13. data/metanorma-standoc.gemspec +1 -1
  14. data/spec/asciidoctor/cleanup_sections_spec.rb +66 -0
  15. data/spec/asciidoctor/cleanup_spec.rb +28 -1
  16. data/spec/asciidoctor/isobib_cache_spec.rb +8 -8
  17. data/spec/asciidoctor/macros_spec.rb +5 -1
  18. data/spec/asciidoctor/refs_dl_spec.rb +1 -1
  19. data/spec/asciidoctor/refs_spec.rb +218 -442
  20. data/spec/asciidoctor/section_spec.rb +1 -1
  21. data/spec/fixtures/datamodel_description_sections_tree.xml +326 -327
  22. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +227 -139
  23. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +152 -0
  24. data/spec/vcr_cassettes/isobib_get_123.yml +50 -34
  25. data/spec/vcr_cassettes/isobib_get_123_1.yml +102 -70
  26. data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +107 -75
  27. data/spec/vcr_cassettes/isobib_get_123_2001.yml +50 -34
  28. data/spec/vcr_cassettes/isobib_get_124.yml +51 -35
  29. data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +14 -14
  30. data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +47 -45
  31. metadata +6 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4de5b7f8779dae7c359b93ce364ffa7cc406156bf1437c7cdccb6595b7bd958a
4
- data.tar.gz: c35902148b405e451af8487faff391b6eddbbd7b07cd2b6c8d565cd515e4515c
3
+ metadata.gz: dd98ba81df1c144552167e677b04d6152d838de0ea2d539a6bc7bde2a4b7f235
4
+ data.tar.gz: 68d9afff164047bf12f97ac23735cc1d61503172ae8656795393314d127223b6
5
5
  SHA512:
6
- metadata.gz: 6e33b33321d6826a65257a3440c63218021d5d40b69fa58b268d0a2c5e8436aed951e05c7f44ab97c74b5ce916c25ad7040cc962cf38be15ee2ebaf0c6858529
7
- data.tar.gz: f8f719f8062c3c71ebd2883617b984d7432134cb94da1be308e9ede34bf74def5be6267570c76ff9f248e628e443d6b71a60b9f71e1ba436b7c64eb2c2b25c13
6
+ metadata.gz: 611ba153c4160486b0ea51eb1564e09cd81d02f6ca6d5a075e3f38cf321321f2e019ba538341786bd4fe01eac9ef53e71ddc5cc19fdccb4e69189d9e7a720955
7
+ data.tar.gz: 0acb358132209888abe3a276c16abcfdf142570fd271712960e64e02db487df9bb39081a1f4e08da7347777a71240b57b2d8f6dda7d980523981efe5a838495c
@@ -53,6 +53,7 @@ module Asciidoctor
53
53
  htmltoclevels: node.attr("htmltoclevels") || node.attr("toclevels"),
54
54
  doctoclevels: node.attr("doctoclevels") || node.attr("toclevels"),
55
55
  break_up_urls_in_tables: node.attr("break-up-urls-in-tables"),
56
+ suppressasciimathdup: node.attr("suppress-asciimath-dup"),
56
57
  bare: node.attr("bare"),
57
58
  sectionsplit: node.attr("sectionsplit"),
58
59
  }
@@ -87,6 +88,7 @@ module Asciidoctor
87
88
  htmltoclevels: node.attr("htmltoclevels") || node.attr("toclevels"),
88
89
  doctoclevels: node.attr("doctoclevels") || node.attr("toclevels"),
89
90
  break_up_urls_in_tables: node.attr("break-up-urls-in-tables"),
91
+ suppressasciimathdup: node.attr("suppress-asciimath-dup"),
90
92
  bare: node.attr("bare"),
91
93
  }
92
94
 
@@ -161,6 +161,8 @@ module Asciidoctor
161
161
  def para_attrs(node)
162
162
  attr_code(keep_attrs(node)
163
163
  .merge(align: node.attr("align"),
164
+ variant_title: node.role == "variant-title" ? true : nil,
165
+ type: node.attr("type"),
164
166
  id: Metanorma::Utils::anchor_or_uuid(node)))
165
167
  end
166
168
 
@@ -13,19 +13,12 @@ require_relative "./cleanup_amend"
13
13
  require_relative "./cleanup_maths"
14
14
  require_relative "./cleanup_image"
15
15
  require_relative "./cleanup_reqt"
16
+ require_relative "./cleanup_text"
16
17
  require "relaton_iev"
17
18
 
18
19
  module Asciidoctor
19
20
  module Standoc
20
21
  module Cleanup
21
- def textcleanup(result)
22
- text = result.flatten.map { |l| l.sub(/\s*$/, "") } * "\n"
23
- !@keepasciimath and text = asciimath2mathml(text)
24
- text = text.gsub(/\s+<fn /, "<fn ")
25
- text.gsub(%r{<passthrough\s+formats="metanorma">([^<]*)
26
- </passthrough>}mx) { HTMLEntities.new.decode($1) }
27
- end
28
-
29
22
  def cleanup(xmldoc)
30
23
  element_name_cleanup(xmldoc)
31
24
  sections_cleanup(xmldoc)
@@ -72,63 +65,6 @@ module Asciidoctor
72
65
  xmldoc
73
66
  end
74
67
 
75
- IGNORE_DUMBQUOTES = "//pre | //pre//* | //tt | //tt//* | "\
76
- "//sourcecode | //sourcecode//* | //bibdata//* | //stem | "\
77
- "//stem//* | //figure[@class = 'pseudocode'] | "\
78
- "//figure[@class = 'pseudocode']//*".freeze
79
-
80
- def smartquotes_cleanup(xmldoc)
81
- xmldoc.xpath("//date").each { |d| Metanorma::Utils::endash_date(d) }
82
- if @smartquotes then smartquotes_cleanup1(xmldoc)
83
- else dumbquote_cleanup(xmldoc)
84
- end
85
- end
86
-
87
- def smartquotes_cleanup1(xmldoc)
88
- uninterrupt_quotes_around_xml(xmldoc)
89
- dumb2smart_quotes(xmldoc)
90
- end
91
-
92
- # "abc<tag/>", def => "abc",<tag/> def
93
- def uninterrupt_quotes_around_xml(xmldoc)
94
- xmldoc.xpath("//*[following::text()[1]"\
95
- "[starts-with(., '\"') or starts-with(., \"'\")]]")
96
- .each do |x|
97
- next if !x.ancestors("pre, tt, sourcecode, stem, figure").empty?
98
- uninterrupt_quotes_around_xml1(x)
99
- end
100
- end
101
-
102
- def uninterrupt_quotes_around_xml1(elem)
103
- prev = elem.at(".//preceding::text()[1]") or return
104
- /\S$/.match?(prev.text) or return
105
- foll = elem.at(".//following::text()[1]")
106
- m = /^(["'][[:punct:]]*)(\s|$)/.match(HTMLEntities.new.decode(foll&.text)) or return
107
- foll.content = foll.text.sub(/^(["'][[:punct:]]*)/, "")
108
- prev.content = "#{prev.text}#{m[1]}"
109
- end
110
-
111
- def dumb2smart_quotes(xmldoc)
112
- (xmldoc.xpath("//*[child::text()]") - xmldoc.xpath(IGNORE_DUMBQUOTES))
113
- .each do |x|
114
- x.children.each do |n|
115
- next unless n.text?
116
-
117
- /[-'"(<>]|\.\.|\dx/.match(n) or next
118
-
119
- n.replace(Metanorma::Utils::smartformat(n.text))
120
- end
121
- end
122
- end
123
-
124
- def dumbquote_cleanup(xmldoc)
125
- xmldoc.traverse do |n|
126
- next unless n.text?
127
-
128
- n.replace(n.text.gsub(/(?<=\p{Alnum})\u2019(?=\p{Alpha})/, "'")) # .
129
- end
130
- end
131
-
132
68
  def docidentifier_cleanup(xmldoc); end
133
69
 
134
70
  TEXT_ELEMS =
@@ -172,8 +108,8 @@ module Asciidoctor
172
108
 
173
109
  c.xpath("./variant").each do |n|
174
110
  if n.at_xpath("preceding-sibling::node()"\
175
- "[not(self::text()[not(normalize-space())])][1]"\
176
- "[self::variantwrap]")
111
+ "[not(self::text()[not(normalize-space())])][1]"\
112
+ "[self::variantwrap]")
177
113
  n.previous_element << n
178
114
  else
179
115
  n.replace("<variantwrap/>").first << n
@@ -203,11 +139,11 @@ module Asciidoctor
203
139
  end
204
140
 
205
141
  def toc_index(toc, xmldoc)
206
- depths = toc.xpath("./toc-xpath").each_with_object({}) do |x, m|
207
- m[x.text] = x["depth"]
208
- end
142
+ depths = toc_index_depths(toc)
209
143
  depths.keys.each_with_object([]) do |key, arr|
210
144
  xmldoc.xpath(key).each do |x|
145
+ t = x.at("./following-sibling::variant-title[@type = 'toc']") and
146
+ x = t
211
147
  arr << { text: x.children.to_xml, depth: depths[key].to_i,
212
148
  target: x.xpath("(./ancestor-or-self::*/@id)[last()]")[0].text,
213
149
  line: x.line }
@@ -215,6 +151,12 @@ module Asciidoctor
215
151
  end.sort_by { |a| a[:line] }
216
152
  end
217
153
 
154
+ def toc_index_depths(toc)
155
+ toc.xpath("./toc-xpath").each_with_object({}) do |x, m|
156
+ m[x.text] = x["depth"]
157
+ end
158
+ end
159
+
218
160
  def toc_cleanup1(toc, xmldoc)
219
161
  depth = 1
220
162
  ret = ""
@@ -29,19 +29,19 @@ module Asciidoctor
29
29
  x.to_xml
30
30
  end
31
31
 
32
- def xml_unescape_mathml(x)
33
- return if x.children.any? { |y| y.element? }
32
+ def xml_unescape_mathml(xml)
33
+ return if xml.children.any? { |y| y.element? }
34
34
 
35
- math = x.text.gsub(/&lt;/, "<").gsub(/&gt;/, ">")
35
+ math = xml.text.gsub(/&lt;/, "<").gsub(/&gt;/, ">")
36
36
  .gsub(/&quot;/, '"').gsub(/&apos;/, "'").gsub(/&amp;/, "&")
37
37
  .gsub(/<[^: \r\n\t\/]+:/, "<").gsub(/<\/[^ \r\n\t:]+:/, "</")
38
- x.children = math
38
+ xml.children = math
39
39
  end
40
40
 
41
41
  MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
42
42
 
43
- def mathml_preserve_space(m)
44
- m.xpath(".//m:mtext", "m" => MATHML_NS).each do |x|
43
+ def mathml_preserve_space(math)
44
+ math.xpath(".//m:mtext", "m" => MATHML_NS).each do |x|
45
45
  x.children = x.children.to_xml
46
46
  .gsub(/^\s/, "&#xA0;").gsub(/\s$/, "&#xA0;")
47
47
  end
@@ -57,23 +57,23 @@ module Asciidoctor
57
57
  end
58
58
 
59
59
  # presuppose multichar mi upright, singlechar mi MathML default italic
60
- def mathml_italicise(x)
61
- x.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
62
- "m" => MATHML_NS).each do |i|
60
+ def mathml_italicise(xml)
61
+ xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
62
+ "m" => MATHML_NS).each do |i|
63
63
  char = HTMLEntities.new.decode(i.text)
64
64
  i["mathvariant"] = "normal" if mi_italicise?(char)
65
65
  end
66
66
  end
67
67
 
68
- def mi_italicise?(c)
69
- return false if c.length > 1
68
+ def mi_italicise?(char)
69
+ return false if char.length > 1
70
70
 
71
- if /\p{Greek}/.match?(c)
72
- /\p{Lower}/.match(c) && !mathml_mi_italics[:lowergreek] ||
73
- /\p{Upper}/.match(c) && !mathml_mi_italics[:uppergreek]
74
- elsif /\p{Latin}/.match?(c)
75
- /\p{Lower}/.match(c) && !mathml_mi_italics[:lowerroman] ||
76
- /\p{Upper}/.match(c) && !mathml_mi_italics[:upperroman]
71
+ if /\p{Greek}/.match?(char)
72
+ /\p{Lower}/.match(char) && !mathml_mi_italics[:lowergreek] ||
73
+ /\p{Upper}/.match(char) && !mathml_mi_italics[:uppergreek]
74
+ elsif /\p{Latin}/.match?(char)
75
+ /\p{Lower}/.match(char) && !mathml_mi_italics[:lowerroman] ||
76
+ /\p{Upper}/.match(char) && !mathml_mi_italics[:upperroman]
77
77
  else
78
78
  false
79
79
  end
@@ -100,14 +100,14 @@ module Asciidoctor
100
100
  end
101
101
  end
102
102
 
103
- def gather_unitsml(unitsml, xmldoc, t)
104
- tags = xmldoc.xpath(".//m:#{t}", "m" => UNITSML_NS)
103
+ def gather_unitsml(unitsml, xmldoc, tag)
104
+ tags = xmldoc.xpath(".//m:#{tag}", "m" => UNITSML_NS)
105
105
  .each_with_object({}) do |x, m|
106
106
  m[x["id"]] = x.remove
107
107
  end
108
108
  return if tags.empty?
109
109
 
110
- set = unitsml.add_child("<#{t}Set/>").first
110
+ set = unitsml.add_child("<#{tag}Set/>").first
111
111
  tags.each_value { |v| set << v }
112
112
  end
113
113
 
@@ -115,14 +115,106 @@ module Asciidoctor
115
115
  { multiplier: :space }
116
116
  end
117
117
 
118
+ def mathvariant_override(inner, outer)
119
+ case outer
120
+ when "bold"
121
+ case inner
122
+ when "normal" then "bold"
123
+ when "italic" then "bold-italic"
124
+ when "fraktur" then "bold-fraktur"
125
+ when "script" then "bold-script"
126
+ when "sans-serif" then "bold-sans-serif"
127
+ when "sans-serif-italic" then "sans-serif-bold-italic"
128
+ else inner
129
+ end
130
+ when "italic"
131
+ case inner
132
+ when "normal" then "italic"
133
+ when "bold" then "bold-italic"
134
+ when "sans-serif" then "sans-serif-italic"
135
+ when "bold-sans-serif" then "sans-serif-bold-italic"
136
+ else inner
137
+ end
138
+ when "bold-italic"
139
+ case inner
140
+ when "normal", "bold", "italic" then "bold-italic"
141
+ when "sans-serif", "bold-sans-serif", "sans-serif-italic"
142
+ "sans-serif-bold-italic"
143
+ else inner
144
+ end
145
+ when "fraktur"
146
+ case inner
147
+ when "normal" then "fraktur"
148
+ when "bold" then "bold-fraktur"
149
+ else inner
150
+ end
151
+ when "bold-fraktur"
152
+ case inner
153
+ when "normal", "fraktur" then "bold-fraktur"
154
+ else inner
155
+ end
156
+ when "script"
157
+ case inner
158
+ when "normal" then "script"
159
+ when "bold" then "bold-script"
160
+ else inner
161
+ end
162
+ when "bold-script"
163
+ case inner
164
+ when "normal", "script" then "bold-script"
165
+ else inner
166
+ end
167
+ when "sans-serif"
168
+ case inner
169
+ when "normal" then "sans-serif"
170
+ when "bold" then "bold-sans-serif"
171
+ when "italic" then "sans-serif-italic"
172
+ when "bold-italic" then "sans-serif-bold-italic"
173
+ else inner
174
+ end
175
+ when "bold-sans-serif"
176
+ case inner
177
+ when "normal", "bold", "sans-serif" then "bold-sans-serif"
178
+ when "italic", "bold-italic", "sans-serif-italic"
179
+ "sans-serif-bold-italic"
180
+ else inner
181
+ end
182
+ when "sans-serif-italic"
183
+ case inner
184
+ when "normal", "italic", "sans-serif" then "sans-serif-italic"
185
+ when "bold", "bold-italic", "sans-serif-bold"
186
+ "sans-serif-bold-italic"
187
+ else inner
188
+ end
189
+ when "sans-serif-bold-italic"
190
+ case inner
191
+ when "normal", "italic", "sans-serif", "sans-serif-italic",
192
+ "bold", "bold-italic", "sans-serif-bold"
193
+ "sans-serif-bold-italic"
194
+ else inner
195
+ end
196
+ else inner
197
+ end
198
+ end
199
+
200
+ def mathml_mathvariant(math)
201
+ math.xpath(".//*[@mathvariant]").each do |outer|
202
+ outer.xpath(".//*[@mathvariant]").each do |inner|
203
+ inner["mathvariant"] =
204
+ mathvariant_override(outer["mathvariant"], inner["mathvariant"])
205
+ end
206
+ end
207
+ end
208
+
118
209
  def mathml_cleanup(xmldoc)
119
210
  unitsml = Asciimath2UnitsML::Conv.new(asciimath2unitsml_options)
120
211
  xmldoc.xpath("//stem[@type = 'MathML']").each do |x|
121
212
  xml_unescape_mathml(x)
122
213
  mathml_namespace(x)
123
214
  mathml_preserve_space(x)
124
- mathml_italicise(x)
125
215
  unitsml.MathML2UnitsML(x)
216
+ mathml_mathvariant(x)
217
+ mathml_italicise(x)
126
218
  end
127
219
  mathml_unitsML(xmldoc)
128
220
  end
@@ -24,6 +24,7 @@ module Asciidoctor
24
24
  r.children.each do |e|
25
25
  unless e.element? && (reqt_subpart(e.name) ||
26
26
  %w(requirement recommendation permission).include?(e.name))
27
+ next if e.text.strip.empty?
27
28
  t = Nokogiri::XML::Element.new("description", r)
28
29
  e.before(t)
29
30
  t.children = e.remove
@@ -114,6 +114,7 @@ module Asciidoctor
114
114
  sections_order_cleanup(xml)
115
115
  sections_level_cleanup(xml)
116
116
  sections_names_cleanup(xml)
117
+ sections_variant_title_cleanup(xml)
117
118
  change_clauses(xml)
118
119
  end
119
120
 
@@ -37,39 +37,56 @@ module Asciidoctor
37
37
  end
38
38
 
39
39
  def section_names_refs_cleanup(xml)
40
- replace_title(xml, "//references[@normative = 'true']",
40
+ replace_title(xml, "//bibliography/references[@normative = 'true']",
41
41
  @i18n&.normref, true)
42
- replace_title(xml, "//references[@normative = 'false']",
42
+ replace_title(xml, "//bibliography/references[@normative = 'false']",
43
43
  @i18n&.bibliography, true)
44
44
  end
45
45
 
46
46
  NO_SYMABBR = "[.//definitions[not(@type)]]".freeze
47
47
  SYMABBR = "[.//definitions[@type = 'symbols']]"\
48
- "[.//definitions[@type = 'abbreviated_terms']]".freeze
48
+ "[.//definitions[@type = 'abbreviated_terms']]".freeze
49
49
  SYMnoABBR = "[.//definitions[@type = 'symbols']]"\
50
- "[not(.//definitions[@type = 'abbreviated_terms'])]".freeze
50
+ "[not(.//definitions[@type = 'abbreviated_terms'])]".freeze
51
51
  ABBRnoSYM = "[.//definitions[@type = 'abbreviated_terms']]"\
52
- "[not(.//definitions[@type = 'symbols'])]".freeze
52
+ "[not(.//definitions[@type = 'symbols'])]".freeze
53
53
 
54
- def section_names_terms_cleanup(x)
55
- replace_title(x, "//definitions[@type = 'symbols']", @i18n&.symbols)
56
- replace_title(x, "//definitions[@type = 'abbreviated_terms']",
54
+ def section_names_terms_cleanup(xml)
55
+ replace_title(xml, "//definitions[@type = 'symbols']", @i18n&.symbols)
56
+ replace_title(xml, "//definitions[@type = 'abbreviated_terms']",
57
57
  @i18n&.abbrev)
58
- replace_title(x, "//definitions[not(@type)]", @i18n&.symbolsabbrev)
59
- replace_title(x, "//terms#{SYMnoABBR} | //clause[.//terms]#{SYMnoABBR}",
58
+ replace_title(xml, "//definitions[not(@type)]", @i18n&.symbolsabbrev)
59
+ replace_title(xml, "//terms#{SYMnoABBR} | //clause[.//terms]#{SYMnoABBR}",
60
60
  @i18n&.termsdefsymbols, true)
61
- replace_title(x, "//terms#{ABBRnoSYM} | //clause[.//terms]#{ABBRnoSYM}",
61
+ replace_title(xml, "//terms#{ABBRnoSYM} | //clause[.//terms]#{ABBRnoSYM}",
62
62
  @i18n&.termsdefabbrev, true)
63
- replace_title(x, "//terms#{SYMABBR} | //clause[.//terms]#{SYMABBR}",
63
+ replace_title(xml, "//terms#{SYMABBR} | //clause[.//terms]#{SYMABBR}",
64
64
  @i18n&.termsdefsymbolsabbrev, true)
65
- replace_title(x, "//terms#{NO_SYMABBR} | //clause[.//terms]#{NO_SYMABBR}",
65
+ replace_title(xml, "//terms#{NO_SYMABBR} | //clause[.//terms]#{NO_SYMABBR}",
66
66
  @i18n&.termsdefsymbolsabbrev, true)
67
67
  replace_title(
68
- x,
68
+ xml,
69
69
  "//terms[not(.//definitions)] | //clause[.//terms][not(.//definitions)]",
70
70
  @i18n&.termsdef, true
71
71
  )
72
72
  end
73
+
74
+ SECTION_CONTAINERS = %w(foreword introduction acknowledgements abstract
75
+ clause clause references terms definitions annex
76
+ appendix).freeze
77
+
78
+ def sections_variant_title_cleanup(xml)
79
+ path = SECTION_CONTAINERS.map { |x| "./ancestor::#{x}" }.join(" | ")
80
+ xml.xpath("//p[@variant_title]").each do |p|
81
+ p.xpath("(#{path})[last()]").each do |sect|
82
+ p.name = "variant-title"
83
+ p.delete("id")
84
+ if ins = sect.at("./title") then ins.next = p
85
+ else sect.children.first.previous = p
86
+ end
87
+ end
88
+ end
89
+ end
73
90
  end
74
91
  end
75
92
  end
@@ -0,0 +1,70 @@
1
+ module Asciidoctor
2
+ module Standoc
3
+ module Cleanup
4
+ def textcleanup(result)
5
+ text = result.flatten.map { |l| l.sub(/\s*$/, "") } * "\n"
6
+ !@keepasciimath and text = asciimath2mathml(text)
7
+ text = text.gsub(/\s+<fn /, "<fn ")
8
+ text.gsub(%r{<passthrough\s+formats="metanorma">([^<]*)
9
+ </passthrough>}mx) { HTMLEntities.new.decode($1) }
10
+ end
11
+
12
+ IGNORE_DUMBQUOTES = "//pre | //pre//* | //tt | //tt//* | "\
13
+ "//sourcecode | //sourcecode//* | //bibdata//* | //stem | "\
14
+ "//stem//* | //figure[@class = 'pseudocode'] | "\
15
+ "//figure[@class = 'pseudocode']//*".freeze
16
+
17
+ def smartquotes_cleanup(xmldoc)
18
+ xmldoc.xpath("//date").each { |d| Metanorma::Utils::endash_date(d) }
19
+ if @smartquotes then smartquotes_cleanup1(xmldoc)
20
+ else dumbquote_cleanup(xmldoc)
21
+ end
22
+ end
23
+
24
+ def smartquotes_cleanup1(xmldoc)
25
+ uninterrupt_quotes_around_xml(xmldoc)
26
+ dumb2smart_quotes(xmldoc)
27
+ end
28
+
29
+ # "abc<tag/>", def => "abc",<tag/> def
30
+ def uninterrupt_quotes_around_xml(xmldoc)
31
+ xmldoc.xpath("//*[following::text()[1]"\
32
+ "[starts-with(., '\"') or starts-with(., \"'\")]]")
33
+ .each do |x|
34
+ next if !x.ancestors("pre, tt, sourcecode, stem, figure").empty?
35
+ uninterrupt_quotes_around_xml1(x)
36
+ end
37
+ end
38
+
39
+ def uninterrupt_quotes_around_xml1(elem)
40
+ prev = elem.at(".//preceding::text()[1]") or return
41
+ /\S$/.match?(prev.text) or return
42
+ foll = elem.at(".//following::text()[1]")
43
+ m = /^(["'][[:punct:]]*)(\s|$)/.match(HTMLEntities.new.decode(foll&.text)) or return
44
+ foll.content = foll.text.sub(/^(["'][[:punct:]]*)/, "")
45
+ prev.content = "#{prev.text}#{m[1]}"
46
+ end
47
+
48
+ def dumb2smart_quotes(xmldoc)
49
+ (xmldoc.xpath("//*[child::text()]") - xmldoc.xpath(IGNORE_DUMBQUOTES))
50
+ .each do |x|
51
+ x.children.each do |n|
52
+ next unless n.text?
53
+
54
+ /[-'"(<>]|\.\.|\dx/.match(n) or next
55
+
56
+ n.replace(Metanorma::Utils::smartformat(n.text))
57
+ end
58
+ end
59
+ end
60
+
61
+ def dumbquote_cleanup(xmldoc)
62
+ xmldoc.traverse do |n|
63
+ next unless n.text?
64
+
65
+ n.replace(n.text.gsub(/(?<=\p{Alnum})\u2019(?=\p{Alpha})/, "'")) # .
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end