metanorma-standoc 1.11.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rake.yml +3 -31
  3. data/.gitignore +23 -0
  4. data/Gemfile +2 -0
  5. data/lib/asciidoctor/standoc/base.rb +2 -241
  6. data/lib/asciidoctor/standoc/blocks.rb +2 -238
  7. data/lib/asciidoctor/standoc/blocks_notes.rb +2 -100
  8. data/lib/asciidoctor/standoc/cleanup.rb +2 -207
  9. data/lib/asciidoctor/standoc/cleanup_amend.rb +2 -53
  10. data/lib/asciidoctor/standoc/cleanup_block.rb +2 -173
  11. data/lib/asciidoctor/standoc/cleanup_boilerplate.rb +2 -204
  12. data/lib/asciidoctor/standoc/cleanup_footnotes.rb +2 -108
  13. data/lib/asciidoctor/standoc/cleanup_image.rb +2 -69
  14. data/lib/asciidoctor/standoc/cleanup_inline.rb +2 -179
  15. data/lib/asciidoctor/standoc/cleanup_maths.rb +2 -221
  16. data/lib/asciidoctor/standoc/cleanup_ref.rb +2 -169
  17. data/lib/asciidoctor/standoc/cleanup_ref_dl.rb +2 -93
  18. data/lib/asciidoctor/standoc/cleanup_reqt.rb +2 -110
  19. data/lib/asciidoctor/standoc/cleanup_section.rb +2 -184
  20. data/lib/asciidoctor/standoc/cleanup_section_names.rb +2 -91
  21. data/lib/asciidoctor/standoc/cleanup_symbols.rb +2 -47
  22. data/lib/asciidoctor/standoc/cleanup_table.rb +2 -67
  23. data/lib/asciidoctor/standoc/cleanup_terms.rb +2 -113
  24. data/lib/asciidoctor/standoc/cleanup_terms_designations.rb +2 -161
  25. data/lib/asciidoctor/standoc/cleanup_text.rb +2 -95
  26. data/lib/asciidoctor/standoc/cleanup_toc.rb +3 -0
  27. data/lib/asciidoctor/standoc/cleanup_xref.rb +2 -106
  28. data/lib/asciidoctor/standoc/converter.rb +2 -123
  29. data/lib/asciidoctor/standoc/datamodel/attributes_table_preprocessor.rb +2 -56
  30. data/lib/asciidoctor/standoc/datamodel/diagram_preprocessor.rb +2 -102
  31. data/lib/asciidoctor/standoc/datamodel/plantuml_renderer.rb +3 -404
  32. data/lib/asciidoctor/standoc/deprecated.rb +5 -0
  33. data/lib/asciidoctor/standoc/front.rb +2 -219
  34. data/lib/asciidoctor/standoc/front_contributor.rb +2 -191
  35. data/lib/asciidoctor/standoc/inline.rb +2 -229
  36. data/lib/asciidoctor/standoc/lists.rb +2 -119
  37. data/lib/asciidoctor/standoc/macros.rb +2 -203
  38. data/lib/asciidoctor/standoc/macros_form.rb +2 -62
  39. data/lib/asciidoctor/standoc/macros_note.rb +2 -44
  40. data/lib/asciidoctor/standoc/macros_plantuml.rb +2 -112
  41. data/lib/asciidoctor/standoc/macros_terms.rb +2 -180
  42. data/lib/asciidoctor/standoc/ref.rb +2 -225
  43. data/lib/asciidoctor/standoc/ref_sect.rb +2 -143
  44. data/lib/asciidoctor/standoc/ref_utility.rb +2 -0
  45. data/lib/asciidoctor/standoc/render.rb +3 -0
  46. data/lib/asciidoctor/standoc/reqt.rb +2 -89
  47. data/lib/asciidoctor/standoc/section.rb +2 -190
  48. data/lib/asciidoctor/standoc/table.rb +2 -84
  49. data/lib/asciidoctor/standoc/term_lookup_cleanup.rb +2 -178
  50. data/lib/asciidoctor/standoc/terms.rb +2 -153
  51. data/lib/asciidoctor/standoc/utils.rb +2 -116
  52. data/lib/asciidoctor/standoc/validate.rb +2 -157
  53. data/lib/asciidoctor/standoc/validate_section.rb +2 -54
  54. data/lib/isodoc/html/htmlstyle.css +20 -11
  55. data/lib/isodoc/html/htmlstyle.scss +11 -11
  56. data/lib/metanorma/standoc/base.rb +149 -0
  57. data/lib/{asciidoctor → metanorma}/standoc/basicdoc.rng +0 -0
  58. data/lib/{asciidoctor → metanorma}/standoc/biblio.rng +0 -0
  59. data/lib/metanorma/standoc/blocks.rb +239 -0
  60. data/lib/metanorma/standoc/blocks_notes.rb +101 -0
  61. data/lib/metanorma/standoc/cleanup.rb +146 -0
  62. data/lib/metanorma/standoc/cleanup_amend.rb +54 -0
  63. data/lib/metanorma/standoc/cleanup_block.rb +173 -0
  64. data/lib/metanorma/standoc/cleanup_boilerplate.rb +213 -0
  65. data/lib/metanorma/standoc/cleanup_footnotes.rb +109 -0
  66. data/lib/metanorma/standoc/cleanup_image.rb +70 -0
  67. data/lib/metanorma/standoc/cleanup_inline.rb +190 -0
  68. data/lib/metanorma/standoc/cleanup_maths.rb +222 -0
  69. data/lib/metanorma/standoc/cleanup_ref.rb +170 -0
  70. data/lib/metanorma/standoc/cleanup_ref_dl.rb +104 -0
  71. data/lib/metanorma/standoc/cleanup_reqt.rb +111 -0
  72. data/lib/metanorma/standoc/cleanup_section.rb +212 -0
  73. data/lib/metanorma/standoc/cleanup_section_names.rb +92 -0
  74. data/lib/metanorma/standoc/cleanup_symbols.rb +48 -0
  75. data/lib/metanorma/standoc/cleanup_table.rb +68 -0
  76. data/lib/metanorma/standoc/cleanup_terms.rb +140 -0
  77. data/lib/metanorma/standoc/cleanup_terms_designations.rb +199 -0
  78. data/lib/metanorma/standoc/cleanup_text.rb +96 -0
  79. data/lib/metanorma/standoc/cleanup_toc.rb +98 -0
  80. data/lib/metanorma/standoc/cleanup_xref.rb +107 -0
  81. data/lib/metanorma/standoc/converter.rb +124 -0
  82. data/lib/metanorma/standoc/datamodel/attributes_table_preprocessor.rb +57 -0
  83. data/lib/metanorma/standoc/datamodel/diagram_preprocessor.rb +103 -0
  84. data/lib/metanorma/standoc/datamodel/plantuml_renderer.rb +409 -0
  85. data/lib/metanorma/standoc/front.rb +224 -0
  86. data/lib/metanorma/standoc/front_contributor.rb +192 -0
  87. data/lib/metanorma/standoc/inline.rb +232 -0
  88. data/lib/{asciidoctor → metanorma}/standoc/isodoc.rng +90 -18
  89. data/lib/metanorma/standoc/lists.rb +120 -0
  90. data/lib/metanorma/standoc/macros.rb +204 -0
  91. data/lib/metanorma/standoc/macros_form.rb +63 -0
  92. data/lib/metanorma/standoc/macros_note.rb +45 -0
  93. data/lib/metanorma/standoc/macros_plantuml.rb +113 -0
  94. data/lib/metanorma/standoc/macros_terms.rb +181 -0
  95. data/lib/metanorma/standoc/ref.rb +243 -0
  96. data/lib/metanorma/standoc/ref_sect.rb +153 -0
  97. data/lib/metanorma/standoc/ref_utility.rb +129 -0
  98. data/lib/metanorma/standoc/render.rb +115 -0
  99. data/lib/metanorma/standoc/reqt.rb +90 -0
  100. data/lib/{asciidoctor → metanorma}/standoc/reqt.rng +0 -0
  101. data/lib/metanorma/standoc/section.rb +209 -0
  102. data/lib/metanorma/standoc/table.rb +85 -0
  103. data/lib/metanorma/standoc/term_lookup_cleanup.rb +179 -0
  104. data/lib/metanorma/standoc/terms.rb +160 -0
  105. data/lib/metanorma/standoc/utils.rb +101 -0
  106. data/lib/metanorma/standoc/validate.rb +158 -0
  107. data/lib/metanorma/standoc/validate_section.rb +55 -0
  108. data/lib/metanorma/standoc/version.rb +1 -1
  109. data/lib/{asciidoctor → metanorma}/standoc/views/datamodel/model_representation.adoc.erb +0 -0
  110. data/lib/{asciidoctor → metanorma}/standoc/views/datamodel/plantuml_representation.adoc.erb +0 -0
  111. data/lib/metanorma-standoc.rb +1 -1
  112. data/metanorma-standoc.gemspec +4 -4
  113. data/spec/{asciidoctor → metanorma}/base_spec.rb +73 -8
  114. data/spec/{asciidoctor → metanorma}/blank_spec.rb +1 -1
  115. data/spec/{asciidoctor → metanorma}/blocks_spec.rb +49 -20
  116. data/spec/{asciidoctor → metanorma}/cleanup_blocks_spec.rb +25 -1
  117. data/spec/{asciidoctor → metanorma}/cleanup_sections_spec.rb +2 -2
  118. data/spec/{asciidoctor → metanorma}/cleanup_spec.rb +9 -9
  119. data/spec/{asciidoctor → metanorma}/cleanup_terms_spec.rb +528 -91
  120. data/spec/{asciidoctor → metanorma}/datamodel/attributes_table_preprocessor_spec.rb +22 -22
  121. data/spec/{asciidoctor → metanorma}/datamodel/diagram_preprocessor_spec.rb +17 -17
  122. data/spec/{asciidoctor → metanorma}/inline_spec.rb +175 -6
  123. data/spec/{asciidoctor → metanorma}/isobib_cache_spec.rb +5 -9
  124. data/spec/{asciidoctor → metanorma}/lists_spec.rb +1 -1
  125. data/spec/{asciidoctor → metanorma}/macros_json2text_spec.rb +0 -0
  126. data/spec/{asciidoctor → metanorma}/macros_plantuml_spec.rb +3 -3
  127. data/spec/{asciidoctor → metanorma}/macros_spec.rb +8 -8
  128. data/spec/{asciidoctor → metanorma}/macros_yaml2text_spec.rb +0 -0
  129. data/spec/metanorma/refs_dl_spec.rb +863 -0
  130. data/spec/{asciidoctor → metanorma}/refs_spec.rb +1277 -687
  131. data/spec/{asciidoctor → metanorma}/section_spec.rb +90 -3
  132. data/spec/{asciidoctor → metanorma}/table_spec.rb +1 -1
  133. data/spec/{asciidoctor → metanorma}/validate_spec.rb +2 -2
  134. data/spec/spec_helper.rb +0 -1
  135. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +179 -179
  136. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +12 -12
  137. data/spec/vcr_cassettes/isobib_get_123.yml +13 -13
  138. data/spec/vcr_cassettes/isobib_get_123_1.yml +98 -98
  139. data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +111 -111
  140. data/spec/vcr_cassettes/isobib_get_123_2001.yml +13 -13
  141. data/spec/vcr_cassettes/isobib_get_124.yml +14 -14
  142. data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +14 -14
  143. data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +46 -46
  144. metadata +89 -38
  145. data/lib/asciidoctor/standoc/ref_date_id.rb +0 -62
  146. data/spec/asciidoctor/refs_dl_spec.rb +0 -864
@@ -0,0 +1,190 @@
1
+ require "metanorma-utils"
2
+ require "digest"
3
+
4
+ module Metanorma
5
+ module Standoc
6
+ module Cleanup
7
+ def empty_text_before_first_element(elem)
8
+ elem.children.each do |c|
9
+ return false if c.text? && /\S/.match(c.text)
10
+ return true if c.element?
11
+ end
12
+ true
13
+ end
14
+
15
+ def strip_initial_space(elem)
16
+ return unless elem.children[0].text?
17
+
18
+ if /\S/.match?(elem.children[0].text)
19
+ elem.children[0].content = elem.children[0].text.gsub(/^ /, "")
20
+ else
21
+ elem.children[0].remove
22
+ end
23
+ end
24
+
25
+ def bookmark_cleanup(xmldoc)
26
+ li_bookmark_cleanup(xmldoc)
27
+ dt_bookmark_cleanup(xmldoc)
28
+ end
29
+
30
+ def bookmark_to_id(elem, bookmark)
31
+ parent = bookmark.parent
32
+ elem["id"] = bookmark.remove["id"]
33
+ strip_initial_space(parent)
34
+ end
35
+
36
+ def li_bookmark_cleanup(xmldoc)
37
+ xmldoc.xpath("//li[descendant::bookmark]").each do |x|
38
+ if x.at("./*[1][local-name() = 'p']/"\
39
+ "*[1][local-name() = 'bookmark']") &&
40
+ empty_text_before_first_element(x.elements[0])
41
+ bookmark_to_id(x, x.elements[0].elements[0])
42
+ end
43
+ end
44
+ end
45
+
46
+ def dt_bookmark_cleanup(xmldoc)
47
+ xmldoc.xpath("//dt[descendant::bookmark]").each do |x|
48
+ if x.at("./*[1][local-name() = 'p']/"\
49
+ "*[1][local-name() = 'bookmark']") &&
50
+ empty_text_before_first_element(x.elements[0])
51
+ bookmark_to_id(x, x.elements[0].elements[0])
52
+ elsif x.at("./*[1][local-name() = 'bookmark']") &&
53
+ empty_text_before_first_element(x)
54
+ bookmark_to_id(x, x.elements[0])
55
+ end
56
+ end
57
+ end
58
+
59
+ def concept_cleanup(xmldoc)
60
+ xmldoc.xpath("//concept[not(termxref)]").each do |x|
61
+ term = x.at("./refterm")
62
+ term&.remove if term&.text&.empty?
63
+ concept_cleanup1(x)
64
+ end
65
+ end
66
+
67
+ def concept_cleanup1(elem)
68
+ elem.children.remove if elem&.children&.text&.strip&.empty?
69
+ key_extract_locality(elem)
70
+ if /:/.match?(elem["key"]) then concept_termbase_cleanup(elem)
71
+ elsif refid? elem["key"] then concept_eref_cleanup(elem)
72
+ else concept_xref_cleanup(elem)
73
+ end
74
+ elem.delete("key")
75
+ end
76
+
77
+ def related_cleanup(xmldoc)
78
+ xmldoc.xpath("//related[not(termxref)]").each do |x|
79
+ term = x.at("./refterm")
80
+ term.replace("<preferred>#{term_expr(term.children.to_xml)}"\
81
+ "</preferred>")
82
+ concept_cleanup1(x)
83
+ end
84
+ end
85
+
86
+ def key_extract_locality(elem)
87
+ return unless /,/.match?(elem["key"])
88
+
89
+ elem.add_child("<locality>#{elem['key'].sub(/^[^,]+,/, '')}</locality>")
90
+ elem["key"] = elem["key"].sub(/,.*$/, "")
91
+ end
92
+
93
+ def concept_termbase_cleanup(elem)
94
+ t = elem&.at("./xrefrender")&.remove&.children
95
+ termbase, key = elem["key"].split(/:/, 2)
96
+ elem.add_child(%(<termref base="#{termbase}" target="#{key}">) +
97
+ "#{t&.to_xml}</termref>")
98
+ end
99
+
100
+ def concept_xref_cleanup(elem)
101
+ t = elem&.at("./xrefrender")&.remove&.children
102
+ elem.add_child(%(<xref target="#{elem['key']}">#{t&.to_xml}</xref>))
103
+ end
104
+
105
+ def concept_eref_cleanup(elem)
106
+ t = elem&.at("./xrefrender")&.remove&.children&.to_xml
107
+ l = elem&.at("./locality")&.remove&.children&.to_xml
108
+ elem.add_child "<eref bibitemid='#{elem['key']}'>#{l}</eref>"
109
+ extract_localities(elem.elements[-1])
110
+ elem.elements[-1].add_child(t) if t
111
+ end
112
+
113
+ def to_xreftarget(str)
114
+ return Metanorma::Utils::to_ncname(str) unless /^[^#]+#.+$/.match?(str)
115
+
116
+ /^(?<pref>[^#]+)#(?<suff>.+)$/ =~ str
117
+ pref = pref.gsub(%r([#{Metanorma::Utils::NAMECHAR}])o, "_")
118
+ suff = suff.gsub(%r([#{Metanorma::Utils::NAMECHAR}])o, "_")
119
+ "#{pref}##{suff}"
120
+ end
121
+
122
+ IDREF = "//*/@id | //review/@from | //review/@to | "\
123
+ "//callout/@target | //citation/@bibitemid | "\
124
+ "//eref/@bibitemid".freeze
125
+
126
+ def anchor_cleanup(elem)
127
+ anchor_cleanup1(elem)
128
+ xreftarget_cleanup(elem)
129
+ contenthash_id_cleanup(elem)
130
+ end
131
+
132
+ def anchor_cleanup1(elem)
133
+ elem.xpath(IDREF).each do |s|
134
+ if (ret = Metanorma::Utils::to_ncname(s.value)) != (orig = s.value)
135
+ s.value = ret
136
+ output = s.parent.dup
137
+ output.children.remove
138
+ @log.add("Anchors", s.parent,
139
+ "normalised identifier in #{output} from #{orig}")
140
+ end
141
+ end
142
+ end
143
+
144
+ def xreftarget_cleanup(elem)
145
+ elem.xpath("//xref/@target").each do |s|
146
+ if (ret = to_xreftarget(s.value)) != (orig = s.value)
147
+ s.value = ret
148
+ output = s.parent.dup
149
+ output.children.remove
150
+ @log.add("Anchors", s.parent,
151
+ "normalised identifier in #{output} from #{orig}")
152
+ end
153
+ end
154
+ end
155
+
156
+ def guid?(str)
157
+ /^_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/
158
+ .match?(str)
159
+ end
160
+
161
+ def contenthash_id_cleanup(doc)
162
+ ids = contenthash_id_make(doc)
163
+ contenthash_id_update_refs(doc, ids)
164
+ end
165
+
166
+ def contenthash_id_make(doc)
167
+ doc.xpath("//*[@id]").each_with_object({}) do |x, m|
168
+ next unless guid?(x["id"])
169
+
170
+ m[x["id"]] = contenthash(x)
171
+ x["id"] = m[x["id"]]
172
+ end
173
+ end
174
+
175
+ def contenthash_id_update_refs(doc, ids)
176
+ [%w(review from), %w(review to), %w(callout target), %w(eref bibitemid),
177
+ %w(citation bibitemid), %w(xref target), %w(xref to)].each do |a|
178
+ doc.xpath("//#{a[0]}").each do |x|
179
+ ids[x[a[1]]] and x[a[1]] = ids[x[a[1]]]
180
+ end
181
+ end
182
+ end
183
+
184
+ def contenthash(elem)
185
+ Digest::MD5.hexdigest("#{elem.path}////#{elem.text}")
186
+ .sub(/^(.{8})(.{4})(.{4})(.{4})(.{12})$/, "_\\1-\\2-\\3-\\4-\\5")
187
+ end
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,222 @@
1
+ require "nokogiri"
2
+ require "pathname"
3
+ require "html2doc"
4
+ require "asciimath2unitsml"
5
+ require_relative "./cleanup_block"
6
+ require_relative "./cleanup_footnotes"
7
+ require_relative "./cleanup_ref"
8
+ require_relative "./cleanup_ref_dl"
9
+ require_relative "./cleanup_boilerplate"
10
+ require_relative "./cleanup_section"
11
+ require_relative "./cleanup_terms"
12
+ require_relative "./cleanup_inline"
13
+ require_relative "./cleanup_amend"
14
+ require "relaton_iev"
15
+
16
+ module Metanorma
17
+ module Standoc
18
+ module Cleanup
19
+ def asciimath2mathml(text)
20
+ text = text.gsub(%r{<stem type="AsciiMath">(.+?)</stem>}m) do
21
+ "<amathstem>#{HTMLEntities.new.decode($1)}</amathstem>"
22
+ end
23
+ text = Html2Doc.asciimath_to_mathml(text,
24
+ ["<amathstem>", "</amathstem>"])
25
+ x = Nokogiri::XML(text)
26
+ x.xpath("//*[local-name() = 'math'][not(parent::stem)]").each do |y|
27
+ y.wrap("<stem type='MathML'></stem>")
28
+ end
29
+ x.to_xml
30
+ end
31
+
32
+ def xml_unescape_mathml(xml)
33
+ return if xml.children.any? { |y| y.element? }
34
+
35
+ math = xml.text.gsub(/&lt;/, "<").gsub(/&gt;/, ">")
36
+ .gsub(/&quot;/, '"').gsub(/&apos;/, "'").gsub(/&amp;/, "&")
37
+ .gsub(/<[^: \r\n\t\/]+:/, "<").gsub(/<\/[^ \r\n\t:]+:/, "</")
38
+ xml.children = math
39
+ end
40
+
41
+ MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
42
+
43
+ def mathml_preserve_space(math)
44
+ math.xpath(".//m:mtext", "m" => MATHML_NS).each do |x|
45
+ x.children = x.children.to_xml
46
+ .gsub(/^\s/, "&#xA0;").gsub(/\s$/, "&#xA0;")
47
+ end
48
+ end
49
+
50
+ def mathml_namespace(stem)
51
+ stem.xpath("./math").each { |x| x.default_namespace = MATHML_NS }
52
+ end
53
+
54
+ def mathml_mi_italics
55
+ { uppergreek: true, upperroman: true,
56
+ lowergreek: true, lowerroman: true }
57
+ end
58
+
59
+ # presuppose multichar mi upright, singlechar mi MathML default italic
60
+ def mathml_italicise(xml)
61
+ xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
62
+ "m" => MATHML_NS).each do |i|
63
+ char = HTMLEntities.new.decode(i.text)
64
+ i["mathvariant"] = "normal" if mi_italicise?(char)
65
+ end
66
+ end
67
+
68
+ def mi_italicise?(char)
69
+ return false if char.length > 1
70
+
71
+ if /\p{Greek}/.match?(char)
72
+ (/\p{Lower}/.match(char) && !mathml_mi_italics[:lowergreek]) ||
73
+ (/\p{Upper}/.match(char) && !mathml_mi_italics[:uppergreek])
74
+ elsif /\p{Latin}/.match?(char)
75
+ (/\p{Lower}/.match(char) && !mathml_mi_italics[:lowerroman]) ||
76
+ (/\p{Upper}/.match(char) && !mathml_mi_italics[:upperroman])
77
+ else false
78
+ end
79
+ end
80
+
81
+ UNITSML_NS = "https://schema.unitsml.org/unitsml/1.0".freeze
82
+
83
+ def add_misc_container(xmldoc)
84
+ unless ins = xmldoc.at("//misc-container")
85
+ a = xmldoc.at("//termdocsource") || xmldoc.at("//bibdata")
86
+ a.next = "<misc-container/>"
87
+ ins = xmldoc.at("//misc-container")
88
+ end
89
+ ins
90
+ end
91
+
92
+ def mathml_unitsML(xmldoc)
93
+ return unless xmldoc.at(".//m:*", "m" => UNITSML_NS)
94
+
95
+ misc = add_misc_container(xmldoc)
96
+ unitsml = misc.add_child("<UnitsML xmlns='#{UNITSML_NS}'/>").first
97
+ %w(Unit CountedItem Quantity Dimension Prefix).each do |t|
98
+ gather_unitsml(unitsml, xmldoc, t)
99
+ end
100
+ end
101
+
102
+ def gather_unitsml(unitsml, xmldoc, tag)
103
+ tags = xmldoc.xpath(".//m:#{tag}", "m" => UNITSML_NS)
104
+ .each_with_object({}) do |x, m|
105
+ m[x["id"]] = x.remove
106
+ end
107
+ return if tags.empty?
108
+
109
+ set = unitsml.add_child("<#{tag}Set/>").first
110
+ tags.each_value { |v| set << v }
111
+ end
112
+
113
+ def asciimath2unitsml_options
114
+ { multiplier: :space }
115
+ end
116
+
117
+ def mathvariant_override(inner, outer)
118
+ case outer
119
+ when "bold"
120
+ case inner
121
+ when "normal" then "bold"
122
+ when "italic" then "bold-italic"
123
+ when "fraktur" then "bold-fraktur"
124
+ when "script" then "bold-script"
125
+ when "sans-serif" then "bold-sans-serif"
126
+ when "sans-serif-italic" then "sans-serif-bold-italic"
127
+ else inner
128
+ end
129
+ when "italic"
130
+ case inner
131
+ when "normal" then "italic"
132
+ when "bold" then "bold-italic"
133
+ when "sans-serif" then "sans-serif-italic"
134
+ when "bold-sans-serif" then "sans-serif-bold-italic"
135
+ else inner
136
+ end
137
+ when "bold-italic"
138
+ case inner
139
+ when "normal", "bold", "italic" then "bold-italic"
140
+ when "sans-serif", "bold-sans-serif", "sans-serif-italic"
141
+ "sans-serif-bold-italic"
142
+ else inner
143
+ end
144
+ when "fraktur"
145
+ case inner
146
+ when "normal" then "fraktur"
147
+ when "bold" then "bold-fraktur"
148
+ else inner
149
+ end
150
+ when "bold-fraktur"
151
+ case inner
152
+ when "normal", "fraktur" then "bold-fraktur"
153
+ else inner
154
+ end
155
+ when "script"
156
+ case inner
157
+ when "normal" then "script"
158
+ when "bold" then "bold-script"
159
+ else inner
160
+ end
161
+ when "bold-script"
162
+ case inner
163
+ when "normal", "script" then "bold-script"
164
+ else inner
165
+ end
166
+ when "sans-serif"
167
+ case inner
168
+ when "normal" then "sans-serif"
169
+ when "bold" then "bold-sans-serif"
170
+ when "italic" then "sans-serif-italic"
171
+ when "bold-italic" then "sans-serif-bold-italic"
172
+ else inner
173
+ end
174
+ when "bold-sans-serif"
175
+ case inner
176
+ when "normal", "bold", "sans-serif" then "bold-sans-serif"
177
+ when "italic", "bold-italic", "sans-serif-italic"
178
+ "sans-serif-bold-italic"
179
+ else inner
180
+ end
181
+ when "sans-serif-italic"
182
+ case inner
183
+ when "normal", "italic", "sans-serif" then "sans-serif-italic"
184
+ when "bold", "bold-italic", "sans-serif-bold"
185
+ "sans-serif-bold-italic"
186
+ else inner
187
+ end
188
+ when "sans-serif-bold-italic"
189
+ case inner
190
+ when "normal", "italic", "sans-serif", "sans-serif-italic",
191
+ "bold", "bold-italic", "sans-serif-bold"
192
+ "sans-serif-bold-italic"
193
+ else inner
194
+ end
195
+ else inner
196
+ end
197
+ end
198
+
199
+ def mathml_mathvariant(math)
200
+ math.xpath(".//*[@mathvariant]").each do |outer|
201
+ outer.xpath(".//*[@mathvariant]").each do |inner|
202
+ inner["mathvariant"] =
203
+ mathvariant_override(outer["mathvariant"], inner["mathvariant"])
204
+ end
205
+ end
206
+ end
207
+
208
+ def mathml_cleanup(xmldoc)
209
+ unitsml = Asciimath2UnitsML::Conv.new(asciimath2unitsml_options)
210
+ xmldoc.xpath("//stem[@type = 'MathML']").each do |x|
211
+ xml_unescape_mathml(x)
212
+ mathml_namespace(x)
213
+ mathml_preserve_space(x)
214
+ unitsml.MathML2UnitsML(x)
215
+ mathml_mathvariant(x)
216
+ mathml_italicise(x)
217
+ end
218
+ mathml_unitsML(xmldoc)
219
+ end
220
+ end
221
+ end
222
+ end
@@ -0,0 +1,170 @@
1
+ require "set"
2
+ require "relaton_bib"
3
+
4
+ module Metanorma
5
+ module Standoc
6
+ module Cleanup
7
+ def biblio_reorder(xmldoc)
8
+ xmldoc.xpath("//references[@normative = 'false']").each do |r|
9
+ biblio_reorder1(r)
10
+ end
11
+ end
12
+
13
+ def biblio_reorder1(refs)
14
+ fold_notes_into_biblio(refs)
15
+ bib = sort_biblio(refs.xpath("./bibitem"))
16
+ insert = refs&.at("./bibitem")&.previous_element
17
+ refs.xpath("./bibitem").each(&:remove)
18
+ bib.reverse.each do |b|
19
+ insert and insert.next = b.to_xml or
20
+ refs.children.first.add_previous_sibling b.to_xml
21
+ end
22
+ extract_notes_from_biblio(refs)
23
+ refs.xpath("./references").each { |r| biblio_reorder1(r) }
24
+ end
25
+
26
+ def fold_notes_into_biblio(refs)
27
+ refs.xpath("./bibitem").each do |r|
28
+ while r&.next_element&.name == "note"
29
+ r.next_element["appended"] = true
30
+ r << r.next_element.remove
31
+ end
32
+ end
33
+ end
34
+
35
+ def extract_notes_from_biblio(refs)
36
+ refs.xpath("./bibitem").each do |r|
37
+ r.xpath("./note[@appended]").reverse.each do |n|
38
+ n.delete("appended")
39
+ r.next = n
40
+ end
41
+ end
42
+ end
43
+
44
+ def sort_biblio(bib)
45
+ bib
46
+ end
47
+
48
+ # default presuppose that all citations in biblio numbered
49
+ # consecutively, but that standards codes are preserved as is:
50
+ # only numeric references are renumbered
51
+ def biblio_renumber(xmldoc)
52
+ i = 0
53
+ xmldoc.xpath("//bibliography//references | //clause//references | "\
54
+ "//annex//references").each do |r|
55
+ next if r["normative"] == "true"
56
+
57
+ r.xpath("./bibitem[not(@hidden = 'true')]").each do |b|
58
+ i += 1
59
+ next unless docid = b.at("./docidentifier[@type = 'metanorma']")
60
+ next unless /^\[\d+\]$/.match?(docid.text)
61
+
62
+ docid.children = "[#{i}]"
63
+ end
64
+ end
65
+ end
66
+
67
+ # move ref before p
68
+ def ref_cleanup(xmldoc)
69
+ xmldoc.xpath("//p/ref").each do |r|
70
+ parent = r.parent
71
+ parent.previous = r.remove
72
+ end
73
+ end
74
+
75
+ def normref_cleanup(xmldoc)
76
+ r = xmldoc.at(self.class::NORM_REF) || return
77
+ preface = ((r.xpath("./title/following-sibling::*") & # intersection
78
+ r.xpath("./bibitem[1]/preceding-sibling::*")) -
79
+ r.xpath("./note[@type = 'boilerplate']/descendant-or-self::*"))
80
+ preface.each(&:remove)
81
+ end
82
+
83
+ def biblio_cleanup(xmldoc)
84
+ biblio_reorder(xmldoc)
85
+ biblio_nested(xmldoc)
86
+ biblio_renumber(xmldoc)
87
+ biblio_no_ext(xmldoc)
88
+ end
89
+
90
+ def biblio_no_ext(xmldoc)
91
+ xmldoc.xpath("//bibitem/ext").each(&:remove)
92
+ end
93
+
94
+ def biblio_nested(xmldoc)
95
+ xmldoc.xpath("//references[references]").each do |t|
96
+ t.name = "clause"
97
+ t.xpath("./references").each { |r| r["normative"] = t["normative"] }
98
+ t.delete("normative")
99
+ end
100
+ end
101
+
102
+ def format_ref(ref, type)
103
+ return @isodoc.docid_prefix(type, ref) if type != "metanorma"
104
+ return "[#{ref}]" if /^\d+$/.match(ref) && !/^\[.*\]$/.match(ref)
105
+
106
+ ref
107
+ end
108
+
109
+ ISO_PUBLISHER_XPATH =
110
+ "./contributor[role/@type = 'publisher']/"\
111
+ "organization[abbreviation = 'ISO' or abbreviation = 'IEC' or "\
112
+ "name = 'International Organization for Standardization' or "\
113
+ "name = 'International Electrotechnical Commission']".freeze
114
+
115
+ def reference_names(xmldoc)
116
+ xmldoc.xpath("//bibitem[not(ancestor::bibitem)]").each do |ref|
117
+ # isopub = ref.at(ISO_PUBLISHER_XPATH)
118
+ docid = ref.at("./docidentifier[@type = 'metanorma']") ||
119
+ ref.at("./docidentifier[not(@type = 'DOI')]") or next
120
+ reference = format_ref(docid.text, docid["type"])
121
+ @anchors[ref["id"]] = { xref: reference }
122
+ end
123
+ end
124
+
125
+ def fetch_termbase(_termbase, _id)
126
+ ""
127
+ end
128
+
129
+ def read_local_bibitem(uri)
130
+ return nil if %r{^https?://}.match?(uri)
131
+
132
+ file = "#{@localdir}#{uri}.rxl"
133
+ File.file?(file) or file = "#{@localdir}#{uri}.xml"
134
+ File.file?(file) or return nil
135
+ xml = Nokogiri::XML(File.read(file, encoding: "utf-8"))
136
+ ret = xml.at("//*[local-name() = 'bibdata']") or return nil
137
+ ret = Nokogiri::XML(ret.to_xml
138
+ .sub(%r{(<bibdata[^>]*?) xmlns=("[^"]+"|'[^']+')}, "\\1")).root
139
+ ret.name = "bibitem"
140
+ ins = ret.at("./*[local-name() = 'docidentifier']") or return nil
141
+ ins.previous = %{<uri type="citation">#{uri}</uri>}
142
+ ret&.at("./*[local-name() = 'ext']")&.remove
143
+ ret
144
+ end
145
+
146
+ # if citation uri points to local file, get bibitem from it
147
+ def fetch_local_bibitem(xmldoc)
148
+ xmldoc.xpath("//bibitem[formattedref][uri[@type = 'citation']]")
149
+ .each do |b|
150
+ uri = b&.at("./uri[@type = 'citation']")&.text
151
+ bibitem = read_local_bibitem(uri) or next
152
+ bibitem["id"] = b["id"]
153
+ b.replace(bibitem)
154
+ end
155
+ end
156
+
157
+ def bibitem_nested_id(xmldoc)
158
+ xmldoc.xpath("//bibitem//bibitem").each do |b|
159
+ b.delete("id")
160
+ end
161
+ end
162
+
163
+ def bibitem_cleanup(xmldoc)
164
+ bibitem_nested_id(xmldoc)
165
+ ref_dl_cleanup(xmldoc)
166
+ fetch_local_bibitem(xmldoc)
167
+ end
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,104 @@
1
+ require "set"
2
+ require "relaton_bib"
3
+
4
+ module Metanorma
5
+ module Standoc
6
+ module Cleanup
7
+ def ref_dl_cleanup(xmldoc)
8
+ xmldoc.xpath("//clause[@bibitem = 'true']").each do |c|
9
+ bib = dl_bib_extract(c) or next
10
+ validate_ref_dl(bib, c)
11
+ bibitemxml = RelatonBib::BibliographicItem.from_hash(bib).to_xml or next
12
+ bibitem = Nokogiri::XML(bibitemxml)
13
+ bibitem.root["id"] = c["id"] if c["id"] && !/^_/.match(c["id"])
14
+ c.replace(bibitem.root)
15
+ end
16
+ end
17
+
18
+ def validate_ref_dl(bib, c)
19
+ id = bib["id"]
20
+ id ||= c["id"] unless /^_/.match?(c["id"]) # do not accept implicit id
21
+ unless id
22
+ @log.add("Anchors", c,
23
+ "The following reference is missing an anchor:\n" + c.to_xml)
24
+ return
25
+ end
26
+ @refids << id
27
+ bib["title"] or
28
+ @log.add("Bibliography", c, "Reference #{id} is missing a title")
29
+ bib["docid"] or
30
+ @log.add("Bibliography", c,
31
+ "Reference #{id} is missing a document identifier (docid)")
32
+ end
33
+
34
+ def extract_from_p(tag, bib, key)
35
+ return unless bib[tag]
36
+
37
+ "<#{key}>#{bib[tag].at('p').children}</#{key}>"
38
+ end
39
+
40
+ # if the content is a single paragraph, replace it with its children
41
+ # single links replaced with uri
42
+ def p_unwrap(para)
43
+ elems = para.elements
44
+ if elems.size == 1 && elems[0].name == "p"
45
+ link_unwrap(elems[0]).children.to_xml.strip
46
+ else
47
+ para.to_xml.strip
48
+ end
49
+ end
50
+
51
+ def link_unwrap(para)
52
+ elems = para.elements
53
+ if elems.size == 1 && elems[0].name == "link"
54
+ para.at("./link").replace(elems[0]["target"].strip)
55
+ end
56
+ para
57
+ end
58
+
59
+ def dd_bib_extract(dtd)
60
+ return nil if dtd.children.empty?
61
+
62
+ dtd.at("./dl") and return dl_bib_extract(dtd)
63
+ elems = dtd.remove.elements
64
+ return p_unwrap(dtd) unless elems.size == 1 &&
65
+ %w(ol ul).include?(elems[0].name)
66
+
67
+ ret = []
68
+ elems[0].xpath("./li").each do |li|
69
+ ret << p_unwrap(li)
70
+ end
71
+ ret
72
+ end
73
+
74
+ def add_to_hash(bib, key, val)
75
+ Metanorma::Utils::set_nested_value(bib, key.split("."), val)
76
+ end
77
+
78
+ # definition list, with at most one level of unordered lists
79
+ def dl_bib_extract(c, nested = false)
80
+ dl = c.at("./dl") or return
81
+ bib = {}
82
+ key = ""
83
+ dl.xpath("./dt | ./dd").each do |dtd|
84
+ (dtd.name == "dt" and key = dtd.text.sub(/:+$/, "")) or
85
+ add_to_hash(bib, key, dd_bib_extract(dtd))
86
+ end
87
+ c.xpath("./clause").each do |c1|
88
+ key = c1&.at("./title")&.text&.downcase&.strip
89
+ next unless %w(contributor relation series).include? key
90
+
91
+ add_to_hash(bib, key, dl_bib_extract(c1, true))
92
+ end
93
+ if !nested && c.at("./title")
94
+ title = c.at("./title").remove.children.to_xml
95
+ bib["title"] = [bib["title"]] if bib["title"].is_a? Hash
96
+ bib["title"] = [bib["title"]] if bib["title"].is_a? String
97
+ bib["title"] = [] unless bib["title"]
98
+ bib["title"] << title if !title.empty?
99
+ end
100
+ bib
101
+ end
102
+ end
103
+ end
104
+ end