metanorma-standoc 1.7.3 → 1.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +11 -41
- data/.rubocop.yml +1 -1
- data/lib/asciidoctor/standoc/base.rb +34 -45
- data/lib/asciidoctor/standoc/basicdoc.rng +5 -3
- data/lib/asciidoctor/standoc/blocks.rb +20 -11
- data/lib/asciidoctor/standoc/blocks_notes.rb +2 -2
- data/lib/asciidoctor/standoc/cleanup.rb +29 -77
- data/lib/asciidoctor/standoc/cleanup_block.rb +39 -27
- data/lib/asciidoctor/standoc/cleanup_boilerplate.rb +56 -0
- data/lib/asciidoctor/standoc/cleanup_inline.rb +26 -29
- data/lib/asciidoctor/standoc/cleanup_maths.rb +123 -0
- data/lib/asciidoctor/standoc/cleanup_ref.rb +4 -98
- data/lib/asciidoctor/standoc/cleanup_ref_dl.rb +94 -0
- data/lib/asciidoctor/standoc/cleanup_section.rb +5 -0
- data/lib/asciidoctor/standoc/cleanup_terms.rb +2 -2
- data/lib/asciidoctor/standoc/converter.rb +0 -1
- data/lib/asciidoctor/standoc/front.rb +2 -2
- data/lib/asciidoctor/standoc/inline.rb +8 -16
- data/lib/asciidoctor/standoc/isodoc.rng +108 -2
- data/lib/asciidoctor/standoc/lists.rb +2 -2
- data/lib/asciidoctor/standoc/macros_plantuml.rb +1 -1
- data/lib/asciidoctor/standoc/ref_sect.rb +2 -2
- data/lib/asciidoctor/standoc/reqt.rb +6 -1
- data/lib/asciidoctor/standoc/section.rb +12 -80
- data/lib/asciidoctor/standoc/table.rb +1 -1
- data/lib/asciidoctor/standoc/terms.rb +125 -0
- data/lib/asciidoctor/standoc/utils.rb +2 -120
- data/lib/asciidoctor/standoc/validate.rb +16 -1
- data/lib/metanorma/standoc/version.rb +1 -1
- data/metanorma-standoc.gemspec +4 -4
- data/spec/{asciidoctor-standoc → asciidoctor}/base_spec.rb +30 -1
- data/spec/{asciidoctor-standoc → asciidoctor}/blocks_spec.rb +0 -0
- data/spec/{asciidoctor-standoc → asciidoctor}/cleanup_sections_spec.rb +0 -0
- data/spec/{asciidoctor-standoc → asciidoctor}/cleanup_spec.rb +408 -6
- data/spec/{asciidoctor-standoc → asciidoctor}/datamodel/attributes_table_preprocessor_spec.rb +0 -0
- data/spec/{asciidoctor-standoc → asciidoctor}/datamodel/diagram_preprocessor_spec.rb +0 -0
- data/spec/{asciidoctor-standoc → asciidoctor}/inline_spec.rb +2 -0
- data/spec/{asciidoctor-standoc → asciidoctor}/isobib_cache_spec.rb +4 -4
- data/spec/{asciidoctor-standoc → asciidoctor}/lists_spec.rb +0 -0
- data/spec/{asciidoctor-standoc → asciidoctor}/macros_json2text_spec.rb +0 -0
- data/spec/{asciidoctor-standoc → asciidoctor}/macros_lutaml_spec.rb +0 -0
- data/spec/{asciidoctor-standoc → asciidoctor}/macros_plantuml_spec.rb +0 -0
- data/spec/{asciidoctor-standoc → asciidoctor}/macros_spec.rb +0 -0
- data/spec/{asciidoctor-standoc → asciidoctor}/macros_yaml2text_spec.rb +0 -0
- data/spec/{asciidoctor-standoc → asciidoctor}/refs_dl_spec.rb +13 -1
- data/spec/{asciidoctor-standoc → asciidoctor}/refs_spec.rb +175 -153
- data/spec/{asciidoctor-standoc → asciidoctor}/section_spec.rb +22 -3
- data/spec/{asciidoctor-standoc → asciidoctor}/table_spec.rb +0 -0
- data/spec/{asciidoctor-standoc → asciidoctor}/validate_spec.rb +20 -0
- data/spec/fixtures/action_schemaexpg1.svg +122 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +47 -47
- data/spec/vcr_cassettes/isobib_get_123.yml +14 -14
- data/spec/vcr_cassettes/isobib_get_123_1.yml +27 -27
- data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +36 -36
- data/spec/vcr_cassettes/isobib_get_123_2001.yml +13 -13
- data/spec/vcr_cassettes/isobib_get_124.yml +13 -13
- data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +94 -30
- data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +45 -45
- metadata +49 -46
- data/lib/asciidoctor/standoc/log.rb +0 -59
@@ -1,6 +1,5 @@
|
|
1
1
|
require "date"
|
2
2
|
require "htmlentities"
|
3
|
-
require "json"
|
4
3
|
require "open-uri"
|
5
4
|
|
6
5
|
module Asciidoctor
|
@@ -8,23 +7,21 @@ module Asciidoctor
|
|
8
7
|
module Cleanup
|
9
8
|
def para_cleanup(xmldoc)
|
10
9
|
["//p[not(ancestor::bibdata)]", "//ol[not(ancestor::bibdata)]",
|
11
|
-
"//ul[not(ancestor::bibdata)]",
|
10
|
+
"//ul[not(ancestor::bibdata)]", "//quote[not(ancestor::bibdata)]",
|
12
11
|
"//note[not(ancestor::bibitem or ancestor::table or ancestor::bibdata)]"
|
13
12
|
].each { |w| inject_id(xmldoc, w) }
|
14
13
|
end
|
15
14
|
|
16
15
|
def inject_id(xmldoc, path)
|
17
16
|
xmldoc.xpath(path).each do |x|
|
18
|
-
x["id"] ||= Utils::anchor_or_uuid
|
17
|
+
x["id"] ||= Metanorma::Utils::anchor_or_uuid
|
19
18
|
end
|
20
19
|
end
|
21
20
|
|
22
21
|
def dl1_table_cleanup(xmldoc)
|
23
22
|
q = "//table/following-sibling::*[1][self::dl]"
|
24
23
|
xmldoc.xpath(q).each do |s|
|
25
|
-
|
26
|
-
s.previous_element << s.remove
|
27
|
-
end
|
24
|
+
s["key"] == "true" and s.previous_element << s.remove
|
28
25
|
end
|
29
26
|
end
|
30
27
|
|
@@ -32,8 +29,7 @@ module Asciidoctor
|
|
32
29
|
def dl2_table_cleanup(xmldoc)
|
33
30
|
q = "//table/following-sibling::*[1][self::p]"
|
34
31
|
xmldoc.xpath(q).each do |s|
|
35
|
-
if s.text =~ /^\s*key[^a-z]*$/i && !s.next_element.nil? &&
|
36
|
-
s.next_element.name == "dl"
|
32
|
+
if s.text =~ /^\s*key[^a-z]*$/i && !s.next_element.nil? && s.next_element.name == "dl"
|
37
33
|
s.next_element["key"] = "true"
|
38
34
|
s.previous_element << s.next_element.remove
|
39
35
|
s.remove
|
@@ -92,17 +88,14 @@ module Asciidoctor
|
|
92
88
|
def formula_cleanup_where1(x)
|
93
89
|
q = "//formula/following-sibling::*[1][self::dl]"
|
94
90
|
x.xpath(q).each do |s|
|
95
|
-
|
96
|
-
s.previous_element << s.remove
|
97
|
-
end
|
91
|
+
s["key"] == "true" and s.previous_element << s.remove
|
98
92
|
end
|
99
93
|
end
|
100
94
|
|
101
95
|
def formula_cleanup_where2(x)
|
102
96
|
q = "//formula/following-sibling::*[1][self::p]"
|
103
97
|
x.xpath(q).each do |s|
|
104
|
-
if s.text =~ /^\s*where[^a-z]*$/i && !s.next_element.nil? &&
|
105
|
-
s.next_element.name == "dl"
|
98
|
+
if s.text =~ /^\s*where[^a-z]*$/i && !s.next_element.nil? && s.next_element.name == "dl"
|
106
99
|
s.next_element["key"] = "true"
|
107
100
|
s.previous_element << s.next_element.remove
|
108
101
|
s.remove
|
@@ -113,9 +106,7 @@ module Asciidoctor
|
|
113
106
|
def figure_dl_cleanup1(xmldoc)
|
114
107
|
q = "//figure/following-sibling::*[self::dl]"
|
115
108
|
xmldoc.xpath(q).each do |s|
|
116
|
-
|
117
|
-
s.previous_element << s.remove
|
118
|
-
end
|
109
|
+
s["key"] == "true" and s.previous_element << s.remove
|
119
110
|
end
|
120
111
|
end
|
121
112
|
|
@@ -123,8 +114,7 @@ module Asciidoctor
|
|
123
114
|
def figure_dl_cleanup2(xmldoc)
|
124
115
|
q = "//figure/following-sibling::*[self::p]"
|
125
116
|
xmldoc.xpath(q).each do |s|
|
126
|
-
if s.text =~ /^\s*key[^a-z]*$/i && !s.next_element.nil? &&
|
127
|
-
s.next_element.name == "dl"
|
117
|
+
if s.text =~ /^\s*key[^a-z]*$/i && !s.next_element.nil? && s.next_element.name == "dl"
|
128
118
|
s.next_element["key"] = "true"
|
129
119
|
s.previous_element << s.next_element.remove
|
130
120
|
s.remove
|
@@ -135,8 +125,7 @@ module Asciidoctor
|
|
135
125
|
# examples containing only figures become subfigures of figures
|
136
126
|
def subfigure_cleanup(xmldoc)
|
137
127
|
xmldoc.xpath("//example[figure]").each do |e|
|
138
|
-
next unless e.elements.map { |m| m.name }.
|
139
|
-
reject { |m| %w(name figure).include? m }.empty?
|
128
|
+
next unless e.elements.map { |m| m.name }.reject { |m| %w(name figure).include? m }.empty?
|
140
129
|
e.name = "figure"
|
141
130
|
end
|
142
131
|
end
|
@@ -148,12 +137,10 @@ module Asciidoctor
|
|
148
137
|
subfigure_cleanup(xmldoc)
|
149
138
|
end
|
150
139
|
|
151
|
-
ELEMS_ALLOW_NOTES =
|
152
|
-
%w[p formula ul ol dl figure].freeze
|
140
|
+
ELEMS_ALLOW_NOTES = %w[p formula ul ol dl figure].freeze
|
153
141
|
|
154
142
|
# if a note is at the end of a section, it is left alone
|
155
|
-
# if a note is followed by a non-note block,
|
156
|
-
# it is moved inside its preceding block if it is not delimited
|
143
|
+
# if a note is followed by a non-note block, it is moved inside its preceding block if it is not delimited
|
157
144
|
# (so there was no way of making that block include the note)
|
158
145
|
def note_cleanup(xmldoc)
|
159
146
|
q = "//note[following-sibling::*[not(local-name() = 'note')]]"
|
@@ -185,7 +172,7 @@ module Asciidoctor
|
|
185
172
|
def requirement_descriptions(x)
|
186
173
|
x.xpath("//requirement | //recommendation | //permission").each do |r|
|
187
174
|
r.children.each do |e|
|
188
|
-
unless e.element? && (
|
175
|
+
unless e.element? && (reqt_subpart(e.name) ||
|
189
176
|
%w(requirement recommendation permission).include?(e.name))
|
190
177
|
t = Nokogiri::XML::Element.new("description", x)
|
191
178
|
e.before(t)
|
@@ -201,9 +188,34 @@ module Asciidoctor
|
|
201
188
|
n = d.next.remove
|
202
189
|
d << n.children
|
203
190
|
end
|
204
|
-
r.xpath("./description[normalize-space(.)='']").each
|
205
|
-
|
191
|
+
r.xpath("./description[normalize-space(.)='']").each { |d| d.replace("\n") }
|
192
|
+
end
|
193
|
+
|
194
|
+
def svgmap_cleanup(xmldoc)
|
195
|
+
svgmap_populate(xmldoc)
|
196
|
+
Metanorma::Utils::svgmap_rewrite(xmldoc, @localdir)
|
197
|
+
end
|
198
|
+
|
199
|
+
def svgmap_populate(xmldoc)
|
200
|
+
xmldoc.xpath("//svgmap").each do |s|
|
201
|
+
s1 = s.dup
|
202
|
+
s.children.remove
|
203
|
+
f = s1.at(".//figure") and s << f
|
204
|
+
s1.xpath(".//li").each do |li|
|
205
|
+
t = li&.at(".//eref | .//link | .//xref") or next
|
206
|
+
href = t.xpath("./following-sibling::node()")
|
207
|
+
next if href.empty?
|
208
|
+
s << %[<target href="#{svgmap_target(href)}">#{t.to_xml}</target>]
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
def svgmap_target(nodeset)
|
214
|
+
nodeset.each do |n|
|
215
|
+
next unless n.name == "link"
|
216
|
+
n.children = n["target"]
|
206
217
|
end
|
218
|
+
nodeset.text.sub(/^[,; ]/, "").strip
|
207
219
|
end
|
208
220
|
end
|
209
221
|
end
|
@@ -106,6 +106,7 @@ module Asciidoctor
|
|
106
106
|
def bibdata_cleanup(xmldoc)
|
107
107
|
bibdata_anchor_cleanup(xmldoc)
|
108
108
|
bibdata_docidentifier_cleanup(xmldoc)
|
109
|
+
biblio_indirect_erefs(xmldoc, @internal_eref_namespaces&.uniq)
|
109
110
|
end
|
110
111
|
|
111
112
|
def bibdata_anchor_cleanup(xmldoc)
|
@@ -122,6 +123,61 @@ module Asciidoctor
|
|
122
123
|
ins = ins.next
|
123
124
|
end
|
124
125
|
end
|
126
|
+
|
127
|
+
def gather_indirect_erefs(xmldoc, prefix)
|
128
|
+
xmldoc.xpath("//eref[@type = '#{prefix}']").each_with_object({}) do |e, m|
|
129
|
+
e.delete("type")
|
130
|
+
m[e["bibitemid"]] = true
|
131
|
+
end.keys
|
132
|
+
end
|
133
|
+
|
134
|
+
def insert_indirect_biblio(xmldoc, refs, prefix)
|
135
|
+
ins = xmldoc.at("bibliography") or
|
136
|
+
xmldoc.root << "<bibliography/>" and ins = xmldoc.at("bibliography")
|
137
|
+
ins = ins.add_child("<references hidden='true' normative='false'/>").first
|
138
|
+
refs.each do |x|
|
139
|
+
ins << <<~END
|
140
|
+
<bibitem id="#{x}" type="internal">
|
141
|
+
<docidentifier type="repository">#{x.sub(/^#{prefix}_/, "#{prefix}/")}</docidentifier>
|
142
|
+
</bibitem>
|
143
|
+
END
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def indirect_eref_to_xref(e, id)
|
148
|
+
loc = e&.at("./localityStack[locality[@type = 'anchor']]")&.remove&.text ||
|
149
|
+
e&.at("./locality[@type = 'anchor']")&.remove&.text || id
|
150
|
+
e.name = "xref"
|
151
|
+
e.delete("bibitemid")
|
152
|
+
e.delete("citeas")
|
153
|
+
e["target"] = loc
|
154
|
+
unless e.document.at("//*[@id = '#{loc}']")
|
155
|
+
e.children = %(** Missing target #{loc})
|
156
|
+
e["target"] = id
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def resolve_local_indirect_erefs(xmldoc, refs, prefix)
|
161
|
+
refs.each_with_object([]) do |r, m|
|
162
|
+
id = r.sub(/^#{prefix}_/, "")
|
163
|
+
if n = xmldoc.at("//*[@id = '#{id}']") and n.at("./ancestor-or-self::*[@type = '#{prefix}']")
|
164
|
+
xmldoc.xpath("//eref[@bibitemid = '#{r}']").each do |e|
|
165
|
+
indirect_eref_to_xref(e, id)
|
166
|
+
end
|
167
|
+
else
|
168
|
+
m << r
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def biblio_indirect_erefs(xmldoc, prefixes)
|
174
|
+
prefixes&.each do |prefix|
|
175
|
+
refs = gather_indirect_erefs(xmldoc, prefix)
|
176
|
+
refs = resolve_local_indirect_erefs(xmldoc, refs, prefix)
|
177
|
+
refs.empty? and next
|
178
|
+
insert_indirect_biblio(xmldoc, refs, prefix)
|
179
|
+
end
|
180
|
+
end
|
125
181
|
end
|
126
182
|
end
|
127
183
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require "metanorma-utils"
|
2
|
+
|
1
3
|
module Asciidoctor
|
2
4
|
module Standoc
|
3
5
|
module Cleanup
|
@@ -72,16 +74,19 @@ module Asciidoctor
|
|
72
74
|
|
73
75
|
def xref_to_eref(x)
|
74
76
|
x["bibitemid"] = x["target"]
|
75
|
-
x["citeas"] = @anchors&.dig(x["target"], :xref)
|
77
|
+
unless x["citeas"] = @anchors&.dig(x["target"], :xref)
|
78
|
+
@internal_eref_namespaces.include?(x["type"]) or
|
76
79
|
@log.add("Crossreferences", x,
|
77
|
-
"#{x['target']} does not have a corresponding anchor ID "
|
78
|
-
|
80
|
+
"#{x['target']} does not have a corresponding anchor ID in the bibliography!")
|
81
|
+
end
|
79
82
|
x.delete("target")
|
80
83
|
extract_localities(x) unless x.children.empty?
|
81
84
|
end
|
82
85
|
|
83
86
|
def xref_cleanup(xmldoc)
|
84
87
|
xmldoc.xpath("//xref").each do |x|
|
88
|
+
/:/.match(x["target"]) and xref_to_internal_eref(x)
|
89
|
+
next unless x.name == "xref"
|
85
90
|
if refid? x["target"]
|
86
91
|
x.name = "eref"
|
87
92
|
xref_to_eref(x)
|
@@ -91,6 +96,18 @@ module Asciidoctor
|
|
91
96
|
end
|
92
97
|
end
|
93
98
|
|
99
|
+
def xref_to_internal_eref(x)
|
100
|
+
a = x["target"].split(":", 3)
|
101
|
+
unless a.size < 2 || a[0].empty? || a[1].empty?
|
102
|
+
x["target"] = "#{a[0]}_#{a[1]}"
|
103
|
+
a.size > 2 and x.children = %{anchor="#{a[2..-1].join("")}",#{x&.children&.text}}
|
104
|
+
x["type"] = a[0]
|
105
|
+
@internal_eref_namespaces << a[0]
|
106
|
+
x.name = "eref"
|
107
|
+
xref_to_eref(x)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
94
111
|
def quotesource_cleanup(xmldoc)
|
95
112
|
xmldoc.xpath("//quote/source | //terms/source").each do |x|
|
96
113
|
xref_to_eref(x)
|
@@ -139,29 +156,11 @@ module Asciidoctor
|
|
139
156
|
extract_localities(x.first_element_child)
|
140
157
|
end
|
141
158
|
|
142
|
-
NAMECHAR = "\u0000-\u0022\u0024\u002c\u002f\u003a-\u0040\\u005b-\u005e"\
|
143
|
-
"\u0060\u007b-\u00b6\u00b8-\u00bf\u00d7\u00f7\u037e\u2000-\u200b"\
|
144
|
-
"\u200e-\u203e\u2041-\u206f\u2190-\u2bff\u2ff0-\u3000".freeze
|
145
|
-
#"\ud800-\uf8ff\ufdd0-\ufdef\ufffe-\uffff".freeze
|
146
|
-
NAMESTARTCHAR = "\\u002d\u002e\u0030-\u0039\u00b7\u0300-\u036f"\
|
147
|
-
"\u203f-\u2040".freeze
|
148
|
-
|
149
|
-
def to_ncname(s)
|
150
|
-
start = s[0]
|
151
|
-
ret1 = %r([#{NAMECHAR}#]).match(start) ? "_" :
|
152
|
-
(%r([#{NAMESTARTCHAR}#]).match(start) ? "_#{start}" : start)
|
153
|
-
ret2 = s[1..-1] || ""
|
154
|
-
ret = (ret1 || "") + ret2.gsub(%r([#{NAMECHAR}#]), "_")
|
155
|
-
ret
|
156
|
-
end
|
157
|
-
|
158
|
-
module_function :to_ncname
|
159
|
-
|
160
159
|
def to_xreftarget(s)
|
161
|
-
return to_ncname(s) unless /^[^#]+#.+$/.match(s)
|
160
|
+
return Metanorma::Utils::to_ncname(s) unless /^[^#]+#.+$/.match(s)
|
162
161
|
/^(?<pref>[^#]+)#(?<suff>.+)$/ =~ s
|
163
|
-
pref = pref.gsub(%r([#{NAMECHAR}]), "_")
|
164
|
-
suff = suff.gsub(%r([#{NAMECHAR}]), "_")
|
162
|
+
pref = pref.gsub(%r([#{Metanorma::Utils::NAMECHAR}]), "_")
|
163
|
+
suff = suff.gsub(%r([#{Metanorma::Utils::NAMECHAR}]), "_")
|
165
164
|
"#{pref}##{suff}"
|
166
165
|
end
|
167
166
|
|
@@ -175,12 +174,11 @@ module Asciidoctor
|
|
175
174
|
|
176
175
|
def anchor_cleanup1(x)
|
177
176
|
x.xpath(IDREF).each do |s|
|
178
|
-
if (ret = to_ncname(s.value)) != (orig = s.value)
|
177
|
+
if (ret = Metanorma::Utils::to_ncname(s.value)) != (orig = s.value)
|
179
178
|
s.value = ret
|
180
179
|
output = s.parent.dup
|
181
180
|
output.children.remove
|
182
|
-
@log.add("Anchors", s.parent, "normalised identifier in #{output} "
|
183
|
-
"from #{orig}")
|
181
|
+
@log.add("Anchors", s.parent, "normalised identifier in #{output} from #{orig}")
|
184
182
|
end
|
185
183
|
end
|
186
184
|
end
|
@@ -191,8 +189,7 @@ module Asciidoctor
|
|
191
189
|
s.value = ret
|
192
190
|
output = s.parent.dup
|
193
191
|
output.children.remove
|
194
|
-
@log.add("Anchors", s.parent, "normalised identifier in #{output} "
|
195
|
-
"from #{orig}")
|
192
|
+
@log.add("Anchors", s.parent, "normalised identifier in #{output} from #{orig}")
|
196
193
|
end
|
197
194
|
end
|
198
195
|
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
require "pathname"
|
3
|
+
require "open-uri"
|
4
|
+
require "html2doc"
|
5
|
+
require "asciimath2unitsml"
|
6
|
+
require_relative "./cleanup_block.rb"
|
7
|
+
require_relative "./cleanup_footnotes.rb"
|
8
|
+
require_relative "./cleanup_ref.rb"
|
9
|
+
require_relative "./cleanup_ref_dl.rb"
|
10
|
+
require_relative "./cleanup_boilerplate.rb"
|
11
|
+
require_relative "./cleanup_section.rb"
|
12
|
+
require_relative "./cleanup_terms.rb"
|
13
|
+
require_relative "./cleanup_inline.rb"
|
14
|
+
require_relative "./cleanup_amend.rb"
|
15
|
+
require "relaton_iev"
|
16
|
+
|
17
|
+
module Asciidoctor
|
18
|
+
module Standoc
|
19
|
+
module Cleanup
|
20
|
+
def asciimath2mathml(text)
|
21
|
+
text = text.gsub(%r{<stem type="AsciiMath">(.+?)</stem>}m) do |m|
|
22
|
+
"<amathstem>#{HTMLEntities.new.decode($1)}</amathstem>"
|
23
|
+
end
|
24
|
+
text = Html2Doc.asciimath_to_mathml(text, ["<amathstem>", "</amathstem>"])
|
25
|
+
x = Nokogiri::XML(text)
|
26
|
+
x.xpath("//*[local-name() = 'math'][not(parent::stem)]").each do |y|
|
27
|
+
y.wrap("<stem type='MathML'></stem>")
|
28
|
+
end
|
29
|
+
x.to_xml
|
30
|
+
end
|
31
|
+
|
32
|
+
def xml_unescape_mathml(x)
|
33
|
+
return if x.children.any? { |y| y.element? }
|
34
|
+
math = x.text.gsub(/</, "<").gsub(/>/, ">").gsub(/"/, '"').gsub(/'/, "'").gsub(/&/, "&").
|
35
|
+
gsub(/<[^: \r\n\t\/]+:/, "<").gsub(/<\/[^ \r\n\t:]+:/, "</")
|
36
|
+
x.children = math
|
37
|
+
end
|
38
|
+
|
39
|
+
MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
|
40
|
+
|
41
|
+
def mathml_preserve_space(m)
|
42
|
+
m.xpath(".//m:mtext", "m" => MATHML_NS).each do |x|
|
43
|
+
x.children = x.children.to_xml.gsub(/^\s/, " ").gsub(/\s$/, " ")
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def mathml_namespace(stem)
|
48
|
+
stem.xpath("./math", ).each { |x| x.default_namespace = MATHML_NS }
|
49
|
+
end
|
50
|
+
|
51
|
+
def mathml_mi_italics
|
52
|
+
{ uppergreek: true, upperroman: true,
|
53
|
+
lowergreek: true, lowerroman: true }
|
54
|
+
end
|
55
|
+
|
56
|
+
# presuppose multichar mi upright, singlechar mi MathML default italic
|
57
|
+
def mathml_italicise(x)
|
58
|
+
x.xpath(".//m:mi[not(ancestor::*[@mathvariant])]", "m" => MATHML_NS).each do |i|
|
59
|
+
char = HTMLEntities.new.decode(i.text)
|
60
|
+
i["mathvariant"] = "normal" if mi_italicise?(char)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def mi_italicise?(c)
|
65
|
+
return false if c.length > 1
|
66
|
+
if /\p{Greek}/.match(c)
|
67
|
+
/\p{Lower}/.match(c) && !mathml_mi_italics[:lowergreek] ||
|
68
|
+
/\p{Upper}/.match(c) && !mathml_mi_italics[:uppergreek]
|
69
|
+
elsif /\p{Latin}/.match(c)
|
70
|
+
/\p{Lower}/.match(c) && !mathml_mi_italics[:lowerroman] ||
|
71
|
+
/\p{Upper}/.match(c) && !mathml_mi_italics[:upperroman]
|
72
|
+
else
|
73
|
+
false
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
UNITSML_NS = "http://unitsml.nist.gov/2005".freeze
|
78
|
+
|
79
|
+
def add_misc_container(xmldoc)
|
80
|
+
unless ins = xmldoc.at("//misc-container")
|
81
|
+
a = xmldoc.at("//termdocsource") || xmldoc.at("//bibdata")
|
82
|
+
a.next = "<misc-container/>"
|
83
|
+
ins = xmldoc.at("//misc-container")
|
84
|
+
end
|
85
|
+
ins
|
86
|
+
end
|
87
|
+
|
88
|
+
def mathml_unitsML(xmldoc)
|
89
|
+
return unless xmldoc.at(".//m:*", "m" => UNITSML_NS)
|
90
|
+
misc = add_misc_container(xmldoc)
|
91
|
+
unitsml = misc.add_child("<UnitsML xmlns='#{UNITSML_NS}'/>").first
|
92
|
+
%w(Unit CountedItem Quantity Dimension Prefix).each do |t|
|
93
|
+
gather_unitsml(unitsml, xmldoc, t)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def gather_unitsml(unitsml, xmldoc, t)
|
98
|
+
tags = xmldoc.xpath(".//m:#{t}", "m" => UNITSML_NS).each_with_object({}) do |x, m|
|
99
|
+
m[x["id"]] = x.remove
|
100
|
+
end
|
101
|
+
return if tags.empty?
|
102
|
+
set = unitsml.add_child("<#{t}Set/>").first
|
103
|
+
tags.values.each { |v| set << v }
|
104
|
+
end
|
105
|
+
|
106
|
+
def asciimath2unitsml_options
|
107
|
+
{ multiplier: :space }
|
108
|
+
end
|
109
|
+
|
110
|
+
def mathml_cleanup(xmldoc)
|
111
|
+
unitsml = Asciimath2UnitsML::Conv.new(asciimath2unitsml_options)
|
112
|
+
xmldoc.xpath("//stem[@type = 'MathML']").each do |x|
|
113
|
+
xml_unescape_mathml(x)
|
114
|
+
mathml_namespace(x)
|
115
|
+
mathml_preserve_space(x)
|
116
|
+
mathml_italicise(x)
|
117
|
+
unitsml.MathML2UnitsML(x)
|
118
|
+
end
|
119
|
+
mathml_unitsML(xmldoc)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|