metanorma-standoc 1.11.4 → 2.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (150) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rake.yml +3 -31
  3. data/.gitignore +23 -0
  4. data/Gemfile +0 -1
  5. data/lib/asciidoctor/standoc/base.rb +2 -145
  6. data/lib/asciidoctor/standoc/blocks.rb +2 -238
  7. data/lib/asciidoctor/standoc/blocks_notes.rb +2 -100
  8. data/lib/asciidoctor/standoc/cleanup.rb +2 -208
  9. data/lib/asciidoctor/standoc/cleanup_amend.rb +2 -53
  10. data/lib/asciidoctor/standoc/cleanup_block.rb +2 -172
  11. data/lib/asciidoctor/standoc/cleanup_boilerplate.rb +2 -212
  12. data/lib/asciidoctor/standoc/cleanup_footnotes.rb +2 -108
  13. data/lib/asciidoctor/standoc/cleanup_image.rb +2 -69
  14. data/lib/asciidoctor/standoc/cleanup_inline.rb +2 -189
  15. data/lib/asciidoctor/standoc/cleanup_maths.rb +2 -221
  16. data/lib/asciidoctor/standoc/cleanup_ref.rb +2 -169
  17. data/lib/asciidoctor/standoc/cleanup_ref_dl.rb +2 -103
  18. data/lib/asciidoctor/standoc/cleanup_reqt.rb +2 -110
  19. data/lib/asciidoctor/standoc/cleanup_section.rb +2 -184
  20. data/lib/asciidoctor/standoc/cleanup_section_names.rb +2 -91
  21. data/lib/asciidoctor/standoc/cleanup_symbols.rb +2 -47
  22. data/lib/asciidoctor/standoc/cleanup_table.rb +2 -67
  23. data/lib/asciidoctor/standoc/cleanup_terms.rb +2 -139
  24. data/lib/asciidoctor/standoc/cleanup_terms_designations.rb +2 -198
  25. data/lib/asciidoctor/standoc/cleanup_text.rb +2 -95
  26. data/lib/asciidoctor/standoc/cleanup_toc.rb +3 -0
  27. data/lib/asciidoctor/standoc/cleanup_xref.rb +2 -106
  28. data/lib/asciidoctor/standoc/converter.rb +2 -123
  29. data/lib/asciidoctor/standoc/datamodel/attributes_table_preprocessor.rb +2 -56
  30. data/lib/asciidoctor/standoc/datamodel/diagram_preprocessor.rb +2 -102
  31. data/lib/asciidoctor/standoc/datamodel/plantuml_renderer.rb +3 -404
  32. data/lib/asciidoctor/standoc/deprecated.rb +5 -0
  33. data/lib/asciidoctor/standoc/front.rb +2 -223
  34. data/lib/asciidoctor/standoc/front_contributor.rb +2 -191
  35. data/lib/asciidoctor/standoc/inline.rb +2 -231
  36. data/lib/asciidoctor/standoc/lists.rb +2 -119
  37. data/lib/asciidoctor/standoc/macros.rb +2 -203
  38. data/lib/asciidoctor/standoc/macros_form.rb +2 -62
  39. data/lib/asciidoctor/standoc/macros_note.rb +2 -44
  40. data/lib/asciidoctor/standoc/macros_plantuml.rb +2 -112
  41. data/lib/asciidoctor/standoc/macros_terms.rb +2 -180
  42. data/lib/asciidoctor/standoc/ref.rb +2 -251
  43. data/lib/asciidoctor/standoc/ref_sect.rb +2 -153
  44. data/lib/asciidoctor/standoc/ref_utility.rb +2 -0
  45. data/lib/asciidoctor/standoc/render.rb +2 -114
  46. data/lib/asciidoctor/standoc/reqt.rb +2 -89
  47. data/lib/asciidoctor/standoc/section.rb +2 -207
  48. data/lib/asciidoctor/standoc/table.rb +2 -84
  49. data/lib/asciidoctor/standoc/term_lookup_cleanup.rb +2 -178
  50. data/lib/asciidoctor/standoc/terms.rb +2 -159
  51. data/lib/asciidoctor/standoc/utils.rb +2 -100
  52. data/lib/asciidoctor/standoc/validate.rb +2 -157
  53. data/lib/asciidoctor/standoc/validate_section.rb +2 -54
  54. data/lib/isodoc/html/htmlstyle.css +25 -19
  55. data/lib/isodoc/html/htmlstyle.scss +7 -2
  56. data/lib/metanorma/standoc/base.rb +163 -0
  57. data/lib/{asciidoctor → metanorma}/standoc/basicdoc.rng +5 -3
  58. data/lib/{asciidoctor → metanorma}/standoc/biblio.rng +7 -5
  59. data/lib/metanorma/standoc/blocks.rb +239 -0
  60. data/lib/metanorma/standoc/blocks_notes.rb +101 -0
  61. data/lib/metanorma/standoc/cleanup.rb +157 -0
  62. data/lib/metanorma/standoc/cleanup_amend.rb +54 -0
  63. data/lib/metanorma/standoc/cleanup_block.rb +173 -0
  64. data/lib/metanorma/standoc/cleanup_boilerplate.rb +213 -0
  65. data/lib/metanorma/standoc/cleanup_footnotes.rb +109 -0
  66. data/lib/metanorma/standoc/cleanup_image.rb +184 -0
  67. data/lib/metanorma/standoc/cleanup_inline.rb +190 -0
  68. data/lib/metanorma/standoc/cleanup_maths.rb +222 -0
  69. data/lib/metanorma/standoc/cleanup_ref.rb +170 -0
  70. data/lib/metanorma/standoc/cleanup_ref_dl.rb +104 -0
  71. data/lib/metanorma/standoc/cleanup_reqt.rb +111 -0
  72. data/lib/metanorma/standoc/cleanup_section.rb +212 -0
  73. data/lib/metanorma/standoc/cleanup_section_names.rb +92 -0
  74. data/lib/metanorma/standoc/cleanup_symbols.rb +48 -0
  75. data/lib/metanorma/standoc/cleanup_table.rb +68 -0
  76. data/lib/metanorma/standoc/cleanup_terms.rb +140 -0
  77. data/lib/metanorma/standoc/cleanup_terms_designations.rb +199 -0
  78. data/lib/metanorma/standoc/cleanup_text.rb +74 -0
  79. data/lib/metanorma/standoc/cleanup_toc.rb +98 -0
  80. data/lib/metanorma/standoc/cleanup_xref.rb +114 -0
  81. data/lib/metanorma/standoc/converter.rb +126 -0
  82. data/lib/metanorma/standoc/datamodel/attributes_table_preprocessor.rb +57 -0
  83. data/lib/metanorma/standoc/datamodel/diagram_preprocessor.rb +103 -0
  84. data/lib/metanorma/standoc/datamodel/plantuml_renderer.rb +409 -0
  85. data/lib/metanorma/standoc/front.rb +224 -0
  86. data/lib/metanorma/standoc/front_contributor.rb +192 -0
  87. data/lib/metanorma/standoc/inline.rb +232 -0
  88. data/lib/{asciidoctor → metanorma}/standoc/isodoc.rng +108 -1
  89. data/lib/metanorma/standoc/lists.rb +120 -0
  90. data/lib/metanorma/standoc/macros.rb +205 -0
  91. data/lib/metanorma/standoc/macros_embed.rb +72 -0
  92. data/lib/metanorma/standoc/macros_form.rb +63 -0
  93. data/lib/metanorma/standoc/macros_note.rb +45 -0
  94. data/lib/metanorma/standoc/macros_plantuml.rb +113 -0
  95. data/lib/metanorma/standoc/macros_terms.rb +194 -0
  96. data/lib/metanorma/standoc/ref.rb +248 -0
  97. data/lib/metanorma/standoc/ref_sect.rb +153 -0
  98. data/lib/{asciidoctor/standoc/ref_date_id.rb → metanorma/standoc/ref_utility.rb} +45 -6
  99. data/lib/metanorma/standoc/render.rb +115 -0
  100. data/lib/metanorma/standoc/reqt.rb +90 -0
  101. data/lib/{asciidoctor → metanorma}/standoc/reqt.rng +0 -0
  102. data/lib/metanorma/standoc/section.rb +209 -0
  103. data/lib/metanorma/standoc/table.rb +85 -0
  104. data/lib/metanorma/standoc/term_lookup_cleanup.rb +181 -0
  105. data/lib/metanorma/standoc/terms.rb +163 -0
  106. data/lib/metanorma/standoc/utils.rb +101 -0
  107. data/lib/metanorma/standoc/validate.rb +163 -0
  108. data/lib/metanorma/standoc/validate_section.rb +55 -0
  109. data/lib/metanorma/standoc/version.rb +1 -1
  110. data/lib/{asciidoctor → metanorma}/standoc/views/datamodel/model_representation.adoc.erb +0 -0
  111. data/lib/{asciidoctor → metanorma}/standoc/views/datamodel/plantuml_representation.adoc.erb +0 -0
  112. data/lib/metanorma-standoc.rb +1 -1
  113. data/metanorma-standoc.gemspec +1 -1
  114. data/spec/assets/a1.adoc +8 -0
  115. data/spec/assets/a2.adoc +8 -0
  116. data/spec/assets/a3.adoc +9 -0
  117. data/spec/assets/a4.adoc +4 -0
  118. data/spec/{asciidoctor → metanorma}/base_spec.rb +453 -409
  119. data/spec/{asciidoctor → metanorma}/blank_spec.rb +1 -1
  120. data/spec/{asciidoctor → metanorma}/blocks_spec.rb +1 -1
  121. data/spec/{asciidoctor → metanorma}/cleanup_blocks_spec.rb +137 -1
  122. data/spec/{asciidoctor → metanorma}/cleanup_sections_spec.rb +1 -1
  123. data/spec/{asciidoctor → metanorma}/cleanup_spec.rb +8 -8
  124. data/spec/{asciidoctor → metanorma}/cleanup_terms_spec.rb +204 -204
  125. data/spec/{asciidoctor → metanorma}/datamodel/attributes_table_preprocessor_spec.rb +1 -1
  126. data/spec/{asciidoctor → metanorma}/datamodel/diagram_preprocessor_spec.rb +1 -1
  127. data/spec/{asciidoctor → metanorma}/inline_spec.rb +1 -1
  128. data/spec/{asciidoctor → metanorma}/isobib_cache_spec.rb +3 -3
  129. data/spec/{asciidoctor → metanorma}/lists_spec.rb +1 -1
  130. data/spec/{asciidoctor → metanorma}/macros_json2text_spec.rb +0 -0
  131. data/spec/{asciidoctor → metanorma}/macros_plantuml_spec.rb +3 -3
  132. data/spec/{asciidoctor → metanorma}/macros_spec.rb +158 -6
  133. data/spec/{asciidoctor → metanorma}/macros_yaml2text_spec.rb +0 -0
  134. data/spec/metanorma/refs_dl_spec.rb +863 -0
  135. data/spec/{asciidoctor → metanorma}/refs_spec.rb +580 -66
  136. data/spec/{asciidoctor → metanorma}/section_spec.rb +42 -17
  137. data/spec/{asciidoctor → metanorma}/table_spec.rb +1 -1
  138. data/spec/{asciidoctor → metanorma}/validate_spec.rb +2 -2
  139. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +46 -46
  140. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +10 -10
  141. data/spec/vcr_cassettes/hide_refs.yml +599 -0
  142. data/spec/vcr_cassettes/isobib_get_123.yml +12 -12
  143. data/spec/vcr_cassettes/isobib_get_123_1.yml +23 -23
  144. data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +85 -85
  145. data/spec/vcr_cassettes/isobib_get_123_2001.yml +12 -12
  146. data/spec/vcr_cassettes/isobib_get_124.yml +13 -13
  147. data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +14 -14
  148. data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +60 -50
  149. metadata +88 -32
  150. data/spec/asciidoctor/refs_dl_spec.rb +0 -864
@@ -0,0 +1,109 @@
1
+ require "date"
2
+ require "htmlentities"
3
+ require "json"
4
+
5
+ module Metanorma
6
+ module Standoc
7
+ module Cleanup
8
+ def footnote_content(fn)
9
+ c = fn.children.respond_to?(:to_xml) ? fn.children.to_xml : fn.children
10
+ c.gsub(/ id="[^"]+"/, "")
11
+ end
12
+
13
+ # include footnotes inside figure if they are the only content
14
+ # of the paras following
15
+ def figure_footnote_cleanup(xmldoc)
16
+ nomatches = false
17
+ until nomatches
18
+ q = "//figure/following-sibling::*[1][self::p and *[1][self::fn]]"
19
+ nomatches = true
20
+ xmldoc.xpath(q).each do |s|
21
+ next if s.children.map { |c| c.text? && /[[:alpha:]]/.match(c.text) }.any?
22
+
23
+ s.previous_element << s.first_element_child.remove
24
+ s.remove
25
+ nomatches = false
26
+ end
27
+ end
28
+ end
29
+
30
+ def table_footnote_renumber1(fn, i, seen)
31
+ content = footnote_content(fn)
32
+ if seen[content] then outnum = seen[content]
33
+ else
34
+ i += 1
35
+ outnum = i
36
+ seen[content] = outnum
37
+ end
38
+ fn["reference"] = (outnum - 1 + "a".ord).chr
39
+ fn["table"] = true
40
+ [i, seen]
41
+ end
42
+
43
+ def table_footnote_renumber(xmldoc)
44
+ xmldoc.xpath("//table | //figure").each do |t|
45
+ seen = {}
46
+ i = 0
47
+ t.xpath(".//fn[not(ancestor::name)]").each do |fn|
48
+ i, seen = table_footnote_renumber1(fn, i, seen)
49
+ end
50
+ end
51
+ end
52
+
53
+ def other_footnote_renumber1(fn, i, seen)
54
+ unless fn["table"]
55
+ content = footnote_content(fn)
56
+ if seen[content] then outnum = seen[content]
57
+ else
58
+ i += 1
59
+ outnum = i
60
+ seen[content] = outnum
61
+ end
62
+ fn["reference"] = outnum.to_s
63
+ end
64
+ [i, seen]
65
+ end
66
+
67
+ def other_footnote_renumber(xmldoc)
68
+ seen = {}
69
+ i = 0
70
+ xmldoc.xpath("//fn").each do |fn|
71
+ i, seen = other_footnote_renumber1(fn, i, seen)
72
+ end
73
+ end
74
+
75
+ def title_footnote_move(xmldoc)
76
+ ins = xmldoc.at("//bibdata/language")
77
+ xmldoc.xpath("//bibdata/title//fn").each do |f|
78
+ f.name = "note"
79
+ f["type"] = "title-footnote"
80
+ f.delete("reference")
81
+ ins.previous = f.remove
82
+ end
83
+ end
84
+
85
+ def footnote_block_cleanup(xmldoc)
86
+ xmldoc.xpath("//footnoteblock").each do |f|
87
+ f.name = 'fn'
88
+ if id = xmldoc.at("//*[@id = '#{f.text}']")
89
+ f.children = id.remove.children
90
+ else
91
+ @log.add("Crossreferences", f,
92
+ "Could not resolve footnoteblock:[#{f.text}]")
93
+ f.children = "[ERROR]"
94
+ end
95
+ end
96
+ end
97
+
98
+ def footnote_cleanup(xmldoc)
99
+ footnote_block_cleanup(xmldoc)
100
+ title_footnote_move(xmldoc)
101
+ table_footnote_renumber(xmldoc)
102
+ other_footnote_renumber(xmldoc)
103
+ xmldoc.xpath("//fn").each do |fn|
104
+ fn.delete("table")
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,184 @@
1
+ module Metanorma
2
+ module Standoc
3
+ module Cleanup
4
+ def svgmap_cleanup(xmldoc)
5
+ svg_uniqueids(xmldoc)
6
+ svgmap_moveattrs(xmldoc)
7
+ svgmap_populate(xmldoc)
8
+ Metanorma::Utils::svgmap_rewrite(xmldoc, @localdir)
9
+ end
10
+
11
+ def guid?(str)
12
+ /^_[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/i
13
+ .match(str)
14
+ end
15
+
16
+ def svgmap_moveattrs(xmldoc)
17
+ xmldoc.xpath("//svgmap").each do |s|
18
+ f = s.at(".//figure") or next
19
+ (t = s.at("./name")) && !f.at("./name") and
20
+ f.children.first.previous = t.remove
21
+ if s["id"] && guid?(f["id"])
22
+ f["id"] = s["id"]
23
+ s.delete("id")
24
+ end
25
+ svgmap_moveattrs1(s, f)
26
+ end
27
+ end
28
+
29
+ def svgmap_moveattrs1(svgmap, figure)
30
+ %w(unnumbered number subsequence keep-with-next
31
+ keep-lines-together tag multilingual-rendering).each do |a|
32
+ next if figure[a] || !svgmap[a]
33
+
34
+ figure[a] = svgmap[a]
35
+ svgmap.delete(a)
36
+ end
37
+ end
38
+
39
+ def svgmap_populate(xmldoc)
40
+ xmldoc.xpath("//svgmap").each do |s|
41
+ s1 = s.dup
42
+ s.children.remove
43
+ f = s1.at(".//figure") and s << f
44
+ s1.xpath(".//li").each do |li|
45
+ t = li&.at(".//eref | .//link | .//xref") or next
46
+ href = t.xpath("./following-sibling::node()")
47
+ href.empty? or
48
+ s << %[<target href="#{svgmap_target(href)}">#{t.to_xml}</target>]
49
+ end
50
+ end
51
+ end
52
+
53
+ def svgmap_target(nodeset)
54
+ nodeset.each do |n|
55
+ next unless n.name == "link"
56
+
57
+ n.children = n["target"]
58
+ end
59
+ nodeset.text.sub(/^[,; ]/, "").strip
60
+ end
61
+
62
+ def img_cleanup(xmldoc)
63
+ if @datauriimage
64
+ xmldoc.xpath("//image").each do |i|
65
+ # do not datauri encode SVG, we need to deduplicate its IDs
66
+ unless read_in_if_svg(i, @localdir)
67
+ i["src"] = Metanorma::Utils::datauri(i["src"], @localdir)
68
+ end
69
+ end
70
+ end
71
+ svg_uniqueids(xmldoc)
72
+ xmldoc
73
+ end
74
+
75
+ def read_in_if_svg(img, localdir)
76
+ return false unless img["src"]
77
+
78
+ path = Metanorma::Utils::svgmap_rewrite0_path(img["src"], localdir)
79
+ File.file?(path) or return false
80
+ types = MIME::Types.type_for(path) or return false
81
+ types.first == "image/svg+xml" or return false
82
+ svg = File.read(path, encoding: "utf-8") or return false
83
+ img.replace(Nokogiri::XML(svg).root.to_xml)
84
+ true
85
+ end
86
+
87
+ IRI_TAG_PROPERTIES_MAP = {
88
+ clipPath: ["clip-path"],
89
+ "color-profile": nil,
90
+ cursor: nil,
91
+ filter: nil,
92
+ linearGradient: ["fill", "stroke"],
93
+ marker: ["marker", "marker-end", "marker-mid", "marker-start"],
94
+ mask: nil,
95
+ pattern: ["fill", "stroke"],
96
+ radialGradient: ["fill", "stroke"],
97
+ }.freeze
98
+
99
+ SVG_NS = "http://www.w3.org/2000/svg".freeze
100
+
101
+ def svg_uniqueids(xmldoc)
102
+ # only keep non-unique identifiers
103
+ ids = xmldoc.xpath("//m:svg//*/@id | //svg/@id", "m" => SVG_NS)
104
+ .map(&:text).group_by(&:itself).transform_values(&:count)
105
+ .delete_if { |_, v| v < 2 }
106
+ xmldoc.xpath("//m:svg", "m" => SVG_NS).each_with_index do |s, i|
107
+ ids = svg_uniqueids1(s, i, ids)
108
+ end
109
+ end
110
+
111
+ def svg_iri_properties(id_elems)
112
+ iri_tag_names = id_elems.each_with_object([]) do |e, m|
113
+ IRI_TAG_PROPERTIES_MAP.key?(e.name.to_sym) and m = m << e.name
114
+ end.uniq
115
+ iri_properties = iri_tag_names.each_with_object([]) do |t, m|
116
+ (IRI_TAG_PROPERTIES_MAP[t.to_sym] || [t]).each { |t1| m = m << t1 }
117
+ end.uniq
118
+ return [] if iri_properties.empty?
119
+
120
+ iri_properties << "style"
121
+ end
122
+
123
+ def svg_uniqueids1(svg, idx, ids)
124
+ id_elems = svg.xpath(".//*[@id] | ./@id/..")
125
+ iri_properties = svg_iri_properties(id_elems)
126
+ svg_uniqueids2(svg, iri_properties, idx, ids)
127
+ new_ids = id_elems.map { |x| x["id"] }
128
+ .map { |x| x + (ids[x] ? "_inject_#{idx}" : "") }
129
+ ids.merge(new_ids.each.map { |value| [value, true] }.to_h)
130
+ end
131
+
132
+ def svg_uniqueids2(svg, iri_properties, idx, ids)
133
+ svg.traverse do |e|
134
+ next unless e.element?
135
+
136
+ if e.name == "style"
137
+ svg_styleupdate(e, idx, ids)
138
+ elsif !e.attributes.empty?
139
+ svg_attrupdate(e, iri_properties, idx, ids)
140
+ end
141
+ svg_linkupdate(e, idx, ids)
142
+ svg_idupdate(e, idx, ids)
143
+ end
144
+ end
145
+
146
+ def svg_update_url(text, idx, ids)
147
+ text.gsub(/url\("?#([a-zA-Z][\w:.-]*)"?\)/) do |x|
148
+ if ids[$1] then "url(##{$1}_inject_#{idx})"
149
+ else x
150
+ end
151
+ end
152
+ end
153
+
154
+ def svg_styleupdate(elem, idx, ids)
155
+ elem.children = svg_update_url(elem.text, idx, ids)
156
+ end
157
+
158
+ def svg_attrupdate(elem, iri_properties, idx, ids)
159
+ iri_properties.each do |p|
160
+ next unless elem[p]
161
+
162
+ elem[p] = svg_update_url(elem[p], idx, ids)
163
+ end
164
+ end
165
+
166
+ def svg_linkupdate(elem, idx, ids)
167
+ %w(xlink:href href).each do |ref|
168
+ iri = elem[ref]&.strip
169
+ next unless /^#/.match?(iri)
170
+ next unless ids[iri.sub(/^#/, "")]
171
+
172
+ elem[ref] += "_inject_#{idx}"
173
+ end
174
+ end
175
+
176
+ def svg_idupdate(elem, idx, ids)
177
+ return unless elem["id"]
178
+ return unless ids[elem["id"]]
179
+
180
+ elem["id"] += "_inject_#{idx}"
181
+ end
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,190 @@
1
+ require "metanorma-utils"
2
+ require "digest"
3
+
4
+ module Metanorma
5
+ module Standoc
6
+ module Cleanup
7
+ def empty_text_before_first_element(elem)
8
+ elem.children.each do |c|
9
+ return false if c.text? && /\S/.match(c.text)
10
+ return true if c.element?
11
+ end
12
+ true
13
+ end
14
+
15
+ def strip_initial_space(elem)
16
+ return unless elem.children[0].text?
17
+
18
+ if /\S/.match?(elem.children[0].text)
19
+ elem.children[0].content = elem.children[0].text.gsub(/^ /, "")
20
+ else
21
+ elem.children[0].remove
22
+ end
23
+ end
24
+
25
+ def bookmark_cleanup(xmldoc)
26
+ li_bookmark_cleanup(xmldoc)
27
+ dt_bookmark_cleanup(xmldoc)
28
+ end
29
+
30
+ def bookmark_to_id(elem, bookmark)
31
+ parent = bookmark.parent
32
+ elem["id"] = bookmark.remove["id"]
33
+ strip_initial_space(parent)
34
+ end
35
+
36
+ def li_bookmark_cleanup(xmldoc)
37
+ xmldoc.xpath("//li[descendant::bookmark]").each do |x|
38
+ if x.at("./*[1][local-name() = 'p']/"\
39
+ "*[1][local-name() = 'bookmark']") &&
40
+ empty_text_before_first_element(x.elements[0])
41
+ bookmark_to_id(x, x.elements[0].elements[0])
42
+ end
43
+ end
44
+ end
45
+
46
+ def dt_bookmark_cleanup(xmldoc)
47
+ xmldoc.xpath("//dt[descendant::bookmark]").each do |x|
48
+ if x.at("./*[1][local-name() = 'p']/"\
49
+ "*[1][local-name() = 'bookmark']") &&
50
+ empty_text_before_first_element(x.elements[0])
51
+ bookmark_to_id(x, x.elements[0].elements[0])
52
+ elsif x.at("./*[1][local-name() = 'bookmark']") &&
53
+ empty_text_before_first_element(x)
54
+ bookmark_to_id(x, x.elements[0])
55
+ end
56
+ end
57
+ end
58
+
59
+ def concept_cleanup(xmldoc)
60
+ xmldoc.xpath("//concept[not(termxref)]").each do |x|
61
+ term = x.at("./refterm")
62
+ term&.remove if term&.text&.empty?
63
+ concept_cleanup1(x)
64
+ end
65
+ end
66
+
67
+ def concept_cleanup1(elem)
68
+ elem.children.remove if elem&.children&.text&.strip&.empty?
69
+ key_extract_locality(elem)
70
+ if /:/.match?(elem["key"]) then concept_termbase_cleanup(elem)
71
+ elsif refid? elem["key"] then concept_eref_cleanup(elem)
72
+ else concept_xref_cleanup(elem)
73
+ end
74
+ elem.delete("key")
75
+ end
76
+
77
+ def related_cleanup(xmldoc)
78
+ xmldoc.xpath("//related[not(termxref)]").each do |x|
79
+ term = x.at("./refterm")
80
+ term.replace("<preferred>#{term_expr(term.children.to_xml)}"\
81
+ "</preferred>")
82
+ concept_cleanup1(x)
83
+ end
84
+ end
85
+
86
+ def key_extract_locality(elem)
87
+ return unless /,/.match?(elem["key"])
88
+
89
+ elem.add_child("<locality>#{elem['key'].sub(/^[^,]+,/, '')}</locality>")
90
+ elem["key"] = elem["key"].sub(/,.*$/, "")
91
+ end
92
+
93
+ def concept_termbase_cleanup(elem)
94
+ t = elem&.at("./xrefrender")&.remove&.children
95
+ termbase, key = elem["key"].split(/:/, 2)
96
+ elem.add_child(%(<termref base="#{termbase}" target="#{key}">) +
97
+ "#{t&.to_xml}</termref>")
98
+ end
99
+
100
+ def concept_xref_cleanup(elem)
101
+ t = elem&.at("./xrefrender")&.remove&.children
102
+ elem.add_child(%(<xref target="#{elem['key']}">#{t&.to_xml}</xref>))
103
+ end
104
+
105
+ def concept_eref_cleanup(elem)
106
+ t = elem&.at("./xrefrender")&.remove&.children&.to_xml
107
+ l = elem&.at("./locality")&.remove&.children&.to_xml
108
+ elem.add_child "<eref bibitemid='#{elem['key']}'>#{l}</eref>"
109
+ extract_localities(elem.elements[-1])
110
+ elem.elements[-1].add_child(t) if t
111
+ end
112
+
113
+ def to_xreftarget(str)
114
+ return Metanorma::Utils::to_ncname(str) unless /^[^#]+#.+$/.match?(str)
115
+
116
+ /^(?<pref>[^#]+)#(?<suff>.+)$/ =~ str
117
+ pref = pref.gsub(%r([#{Metanorma::Utils::NAMECHAR}])o, "_")
118
+ suff = suff.gsub(%r([#{Metanorma::Utils::NAMECHAR}])o, "_")
119
+ "#{pref}##{suff}"
120
+ end
121
+
122
+ IDREF = "//*/@id | //review/@from | //review/@to | "\
123
+ "//callout/@target | //citation/@bibitemid | "\
124
+ "//eref/@bibitemid".freeze
125
+
126
+ def anchor_cleanup(elem)
127
+ anchor_cleanup1(elem)
128
+ xreftarget_cleanup(elem)
129
+ contenthash_id_cleanup(elem)
130
+ end
131
+
132
+ def anchor_cleanup1(elem)
133
+ elem.xpath(IDREF).each do |s|
134
+ if (ret = Metanorma::Utils::to_ncname(s.value)) != (orig = s.value)
135
+ s.value = ret
136
+ output = s.parent.dup
137
+ output.children.remove
138
+ @log.add("Anchors", s.parent,
139
+ "normalised identifier in #{output} from #{orig}")
140
+ end
141
+ end
142
+ end
143
+
144
+ def xreftarget_cleanup(elem)
145
+ elem.xpath("//xref/@target").each do |s|
146
+ if (ret = to_xreftarget(s.value)) != (orig = s.value)
147
+ s.value = ret
148
+ output = s.parent.dup
149
+ output.children.remove
150
+ @log.add("Anchors", s.parent,
151
+ "normalised identifier in #{output} from #{orig}")
152
+ end
153
+ end
154
+ end
155
+
156
+ def guid?(str)
157
+ /^_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/
158
+ .match?(str)
159
+ end
160
+
161
+ def contenthash_id_cleanup(doc)
162
+ ids = contenthash_id_make(doc)
163
+ contenthash_id_update_refs(doc, ids)
164
+ end
165
+
166
+ def contenthash_id_make(doc)
167
+ doc.xpath("//*[@id]").each_with_object({}) do |x, m|
168
+ next unless guid?(x["id"])
169
+
170
+ m[x["id"]] = contenthash(x)
171
+ x["id"] = m[x["id"]]
172
+ end
173
+ end
174
+
175
+ def contenthash_id_update_refs(doc, ids)
176
+ [%w(review from), %w(review to), %w(callout target), %w(eref bibitemid),
177
+ %w(citation bibitemid), %w(xref target), %w(xref to)].each do |a|
178
+ doc.xpath("//#{a[0]}").each do |x|
179
+ ids[x[a[1]]] and x[a[1]] = ids[x[a[1]]]
180
+ end
181
+ end
182
+ end
183
+
184
+ def contenthash(elem)
185
+ Digest::MD5.hexdigest("#{elem.path}////#{elem.text}")
186
+ .sub(/^(.{8})(.{4})(.{4})(.{4})(.{12})$/, "_\\1-\\2-\\3-\\4-\\5")
187
+ end
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,222 @@
1
+ require "nokogiri"
2
+ require "pathname"
3
+ require "html2doc"
4
+ require "asciimath2unitsml"
5
+ require_relative "./cleanup_block"
6
+ require_relative "./cleanup_footnotes"
7
+ require_relative "./cleanup_ref"
8
+ require_relative "./cleanup_ref_dl"
9
+ require_relative "./cleanup_boilerplate"
10
+ require_relative "./cleanup_section"
11
+ require_relative "./cleanup_terms"
12
+ require_relative "./cleanup_inline"
13
+ require_relative "./cleanup_amend"
14
+ require "relaton_iev"
15
+
16
+ module Metanorma
17
+ module Standoc
18
+ module Cleanup
19
+ def asciimath2mathml(text)
20
+ text = text.gsub(%r{<stem type="AsciiMath">(.+?)</stem>}m) do
21
+ "<amathstem>#{HTMLEntities.new.decode($1)}</amathstem>"
22
+ end
23
+ text = Html2Doc.asciimath_to_mathml(text,
24
+ ["<amathstem>", "</amathstem>"])
25
+ x = Nokogiri::XML(text)
26
+ x.xpath("//*[local-name() = 'math'][not(parent::stem)]").each do |y|
27
+ y.wrap("<stem type='MathML'></stem>")
28
+ end
29
+ x.to_xml
30
+ end
31
+
32
+ def xml_unescape_mathml(xml)
33
+ return if xml.children.any? { |y| y.element? }
34
+
35
+ math = xml.text.gsub(/&lt;/, "<").gsub(/&gt;/, ">")
36
+ .gsub(/&quot;/, '"').gsub(/&apos;/, "'").gsub(/&amp;/, "&")
37
+ .gsub(/<[^: \r\n\t\/]+:/, "<").gsub(/<\/[^ \r\n\t:]+:/, "</")
38
+ xml.children = math
39
+ end
40
+
41
+ MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
42
+
43
+ def mathml_preserve_space(math)
44
+ math.xpath(".//m:mtext", "m" => MATHML_NS).each do |x|
45
+ x.children = x.children.to_xml
46
+ .gsub(/^\s/, "&#xA0;").gsub(/\s$/, "&#xA0;")
47
+ end
48
+ end
49
+
50
+ def mathml_namespace(stem)
51
+ stem.xpath("./math").each { |x| x.default_namespace = MATHML_NS }
52
+ end
53
+
54
+ def mathml_mi_italics
55
+ { uppergreek: true, upperroman: true,
56
+ lowergreek: true, lowerroman: true }
57
+ end
58
+
59
+ # presuppose multichar mi upright, singlechar mi MathML default italic
60
+ def mathml_italicise(xml)
61
+ xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
62
+ "m" => MATHML_NS).each do |i|
63
+ char = HTMLEntities.new.decode(i.text)
64
+ i["mathvariant"] = "normal" if mi_italicise?(char)
65
+ end
66
+ end
67
+
68
+ def mi_italicise?(char)
69
+ return false if char.length > 1
70
+
71
+ if /\p{Greek}/.match?(char)
72
+ (/\p{Lower}/.match(char) && !mathml_mi_italics[:lowergreek]) ||
73
+ (/\p{Upper}/.match(char) && !mathml_mi_italics[:uppergreek])
74
+ elsif /\p{Latin}/.match?(char)
75
+ (/\p{Lower}/.match(char) && !mathml_mi_italics[:lowerroman]) ||
76
+ (/\p{Upper}/.match(char) && !mathml_mi_italics[:upperroman])
77
+ else false
78
+ end
79
+ end
80
+
81
+ UNITSML_NS = "https://schema.unitsml.org/unitsml/1.0".freeze
82
+
83
+ def add_misc_container(xmldoc)
84
+ unless ins = xmldoc.at("//misc-container")
85
+ a = xmldoc.at("//termdocsource") || xmldoc.at("//bibdata")
86
+ a.next = "<misc-container/>"
87
+ ins = xmldoc.at("//misc-container")
88
+ end
89
+ ins
90
+ end
91
+
92
+ def mathml_unitsML(xmldoc)
93
+ return unless xmldoc.at(".//m:*", "m" => UNITSML_NS)
94
+
95
+ misc = add_misc_container(xmldoc)
96
+ unitsml = misc.add_child("<UnitsML xmlns='#{UNITSML_NS}'/>").first
97
+ %w(Unit CountedItem Quantity Dimension Prefix).each do |t|
98
+ gather_unitsml(unitsml, xmldoc, t)
99
+ end
100
+ end
101
+
102
+ def gather_unitsml(unitsml, xmldoc, tag)
103
+ tags = xmldoc.xpath(".//m:#{tag}", "m" => UNITSML_NS)
104
+ .each_with_object({}) do |x, m|
105
+ m[x["id"]] = x.remove
106
+ end
107
+ return if tags.empty?
108
+
109
+ set = unitsml.add_child("<#{tag}Set/>").first
110
+ tags.each_value { |v| set << v }
111
+ end
112
+
113
+ def asciimath2unitsml_options
114
+ { multiplier: :space }
115
+ end
116
+
117
+ def mathvariant_override(inner, outer)
118
+ case outer
119
+ when "bold"
120
+ case inner
121
+ when "normal" then "bold"
122
+ when "italic" then "bold-italic"
123
+ when "fraktur" then "bold-fraktur"
124
+ when "script" then "bold-script"
125
+ when "sans-serif" then "bold-sans-serif"
126
+ when "sans-serif-italic" then "sans-serif-bold-italic"
127
+ else inner
128
+ end
129
+ when "italic"
130
+ case inner
131
+ when "normal" then "italic"
132
+ when "bold" then "bold-italic"
133
+ when "sans-serif" then "sans-serif-italic"
134
+ when "bold-sans-serif" then "sans-serif-bold-italic"
135
+ else inner
136
+ end
137
+ when "bold-italic"
138
+ case inner
139
+ when "normal", "bold", "italic" then "bold-italic"
140
+ when "sans-serif", "bold-sans-serif", "sans-serif-italic"
141
+ "sans-serif-bold-italic"
142
+ else inner
143
+ end
144
+ when "fraktur"
145
+ case inner
146
+ when "normal" then "fraktur"
147
+ when "bold" then "bold-fraktur"
148
+ else inner
149
+ end
150
+ when "bold-fraktur"
151
+ case inner
152
+ when "normal", "fraktur" then "bold-fraktur"
153
+ else inner
154
+ end
155
+ when "script"
156
+ case inner
157
+ when "normal" then "script"
158
+ when "bold" then "bold-script"
159
+ else inner
160
+ end
161
+ when "bold-script"
162
+ case inner
163
+ when "normal", "script" then "bold-script"
164
+ else inner
165
+ end
166
+ when "sans-serif"
167
+ case inner
168
+ when "normal" then "sans-serif"
169
+ when "bold" then "bold-sans-serif"
170
+ when "italic" then "sans-serif-italic"
171
+ when "bold-italic" then "sans-serif-bold-italic"
172
+ else inner
173
+ end
174
+ when "bold-sans-serif"
175
+ case inner
176
+ when "normal", "bold", "sans-serif" then "bold-sans-serif"
177
+ when "italic", "bold-italic", "sans-serif-italic"
178
+ "sans-serif-bold-italic"
179
+ else inner
180
+ end
181
+ when "sans-serif-italic"
182
+ case inner
183
+ when "normal", "italic", "sans-serif" then "sans-serif-italic"
184
+ when "bold", "bold-italic", "sans-serif-bold"
185
+ "sans-serif-bold-italic"
186
+ else inner
187
+ end
188
+ when "sans-serif-bold-italic"
189
+ case inner
190
+ when "normal", "italic", "sans-serif", "sans-serif-italic",
191
+ "bold", "bold-italic", "sans-serif-bold"
192
+ "sans-serif-bold-italic"
193
+ else inner
194
+ end
195
+ else inner
196
+ end
197
+ end
198
+
199
+ def mathml_mathvariant(math)
200
+ math.xpath(".//*[@mathvariant]").each do |outer|
201
+ outer.xpath(".//*[@mathvariant]").each do |inner|
202
+ inner["mathvariant"] =
203
+ mathvariant_override(outer["mathvariant"], inner["mathvariant"])
204
+ end
205
+ end
206
+ end
207
+
208
+ def mathml_cleanup(xmldoc)
209
+ unitsml = Asciimath2UnitsML::Conv.new(asciimath2unitsml_options)
210
+ xmldoc.xpath("//stem[@type = 'MathML']").each do |x|
211
+ xml_unescape_mathml(x)
212
+ mathml_namespace(x)
213
+ mathml_preserve_space(x)
214
+ unitsml.MathML2UnitsML(x)
215
+ mathml_mathvariant(x)
216
+ mathml_italicise(x)
217
+ end
218
+ mathml_unitsML(xmldoc)
219
+ end
220
+ end
221
+ end
222
+ end