metanorma-standoc 1.11.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +3 -31
- data/.gitignore +23 -0
- data/Gemfile +2 -0
- data/lib/asciidoctor/standoc/base.rb +2 -241
- data/lib/asciidoctor/standoc/blocks.rb +2 -238
- data/lib/asciidoctor/standoc/blocks_notes.rb +2 -100
- data/lib/asciidoctor/standoc/cleanup.rb +2 -207
- data/lib/asciidoctor/standoc/cleanup_amend.rb +2 -53
- data/lib/asciidoctor/standoc/cleanup_block.rb +2 -173
- data/lib/asciidoctor/standoc/cleanup_boilerplate.rb +2 -204
- data/lib/asciidoctor/standoc/cleanup_footnotes.rb +2 -108
- data/lib/asciidoctor/standoc/cleanup_image.rb +2 -69
- data/lib/asciidoctor/standoc/cleanup_inline.rb +2 -179
- data/lib/asciidoctor/standoc/cleanup_maths.rb +2 -221
- data/lib/asciidoctor/standoc/cleanup_ref.rb +2 -169
- data/lib/asciidoctor/standoc/cleanup_ref_dl.rb +2 -93
- data/lib/asciidoctor/standoc/cleanup_reqt.rb +2 -110
- data/lib/asciidoctor/standoc/cleanup_section.rb +2 -184
- data/lib/asciidoctor/standoc/cleanup_section_names.rb +2 -91
- data/lib/asciidoctor/standoc/cleanup_symbols.rb +2 -47
- data/lib/asciidoctor/standoc/cleanup_table.rb +2 -67
- data/lib/asciidoctor/standoc/cleanup_terms.rb +2 -113
- data/lib/asciidoctor/standoc/cleanup_terms_designations.rb +2 -161
- data/lib/asciidoctor/standoc/cleanup_text.rb +2 -95
- data/lib/asciidoctor/standoc/cleanup_toc.rb +3 -0
- data/lib/asciidoctor/standoc/cleanup_xref.rb +2 -106
- data/lib/asciidoctor/standoc/converter.rb +2 -123
- data/lib/asciidoctor/standoc/datamodel/attributes_table_preprocessor.rb +2 -56
- data/lib/asciidoctor/standoc/datamodel/diagram_preprocessor.rb +2 -102
- data/lib/asciidoctor/standoc/datamodel/plantuml_renderer.rb +3 -404
- data/lib/asciidoctor/standoc/deprecated.rb +5 -0
- data/lib/asciidoctor/standoc/front.rb +2 -219
- data/lib/asciidoctor/standoc/front_contributor.rb +2 -191
- data/lib/asciidoctor/standoc/inline.rb +2 -229
- data/lib/asciidoctor/standoc/lists.rb +2 -119
- data/lib/asciidoctor/standoc/macros.rb +2 -203
- data/lib/asciidoctor/standoc/macros_form.rb +2 -62
- data/lib/asciidoctor/standoc/macros_note.rb +2 -44
- data/lib/asciidoctor/standoc/macros_plantuml.rb +2 -112
- data/lib/asciidoctor/standoc/macros_terms.rb +2 -180
- data/lib/asciidoctor/standoc/ref.rb +2 -225
- data/lib/asciidoctor/standoc/ref_sect.rb +2 -143
- data/lib/asciidoctor/standoc/ref_utility.rb +2 -0
- data/lib/asciidoctor/standoc/render.rb +3 -0
- data/lib/asciidoctor/standoc/reqt.rb +2 -89
- data/lib/asciidoctor/standoc/section.rb +2 -190
- data/lib/asciidoctor/standoc/table.rb +2 -84
- data/lib/asciidoctor/standoc/term_lookup_cleanup.rb +2 -178
- data/lib/asciidoctor/standoc/terms.rb +2 -153
- data/lib/asciidoctor/standoc/utils.rb +2 -116
- data/lib/asciidoctor/standoc/validate.rb +2 -157
- data/lib/asciidoctor/standoc/validate_section.rb +2 -54
- data/lib/isodoc/html/htmlstyle.css +20 -11
- data/lib/isodoc/html/htmlstyle.scss +11 -11
- data/lib/metanorma/standoc/base.rb +149 -0
- data/lib/{asciidoctor → metanorma}/standoc/basicdoc.rng +0 -0
- data/lib/{asciidoctor → metanorma}/standoc/biblio.rng +0 -0
- data/lib/metanorma/standoc/blocks.rb +239 -0
- data/lib/metanorma/standoc/blocks_notes.rb +101 -0
- data/lib/metanorma/standoc/cleanup.rb +146 -0
- data/lib/metanorma/standoc/cleanup_amend.rb +54 -0
- data/lib/metanorma/standoc/cleanup_block.rb +173 -0
- data/lib/metanorma/standoc/cleanup_boilerplate.rb +213 -0
- data/lib/metanorma/standoc/cleanup_footnotes.rb +109 -0
- data/lib/metanorma/standoc/cleanup_image.rb +70 -0
- data/lib/metanorma/standoc/cleanup_inline.rb +190 -0
- data/lib/metanorma/standoc/cleanup_maths.rb +222 -0
- data/lib/metanorma/standoc/cleanup_ref.rb +170 -0
- data/lib/metanorma/standoc/cleanup_ref_dl.rb +104 -0
- data/lib/metanorma/standoc/cleanup_reqt.rb +111 -0
- data/lib/metanorma/standoc/cleanup_section.rb +212 -0
- data/lib/metanorma/standoc/cleanup_section_names.rb +92 -0
- data/lib/metanorma/standoc/cleanup_symbols.rb +48 -0
- data/lib/metanorma/standoc/cleanup_table.rb +68 -0
- data/lib/metanorma/standoc/cleanup_terms.rb +140 -0
- data/lib/metanorma/standoc/cleanup_terms_designations.rb +199 -0
- data/lib/metanorma/standoc/cleanup_text.rb +96 -0
- data/lib/metanorma/standoc/cleanup_toc.rb +98 -0
- data/lib/metanorma/standoc/cleanup_xref.rb +107 -0
- data/lib/metanorma/standoc/converter.rb +124 -0
- data/lib/metanorma/standoc/datamodel/attributes_table_preprocessor.rb +57 -0
- data/lib/metanorma/standoc/datamodel/diagram_preprocessor.rb +103 -0
- data/lib/metanorma/standoc/datamodel/plantuml_renderer.rb +409 -0
- data/lib/metanorma/standoc/front.rb +224 -0
- data/lib/metanorma/standoc/front_contributor.rb +192 -0
- data/lib/metanorma/standoc/inline.rb +232 -0
- data/lib/{asciidoctor → metanorma}/standoc/isodoc.rng +90 -18
- data/lib/metanorma/standoc/lists.rb +120 -0
- data/lib/metanorma/standoc/macros.rb +204 -0
- data/lib/metanorma/standoc/macros_form.rb +63 -0
- data/lib/metanorma/standoc/macros_note.rb +45 -0
- data/lib/metanorma/standoc/macros_plantuml.rb +113 -0
- data/lib/metanorma/standoc/macros_terms.rb +181 -0
- data/lib/metanorma/standoc/ref.rb +243 -0
- data/lib/metanorma/standoc/ref_sect.rb +153 -0
- data/lib/metanorma/standoc/ref_utility.rb +129 -0
- data/lib/metanorma/standoc/render.rb +115 -0
- data/lib/metanorma/standoc/reqt.rb +90 -0
- data/lib/{asciidoctor → metanorma}/standoc/reqt.rng +0 -0
- data/lib/metanorma/standoc/section.rb +209 -0
- data/lib/metanorma/standoc/table.rb +85 -0
- data/lib/metanorma/standoc/term_lookup_cleanup.rb +179 -0
- data/lib/metanorma/standoc/terms.rb +160 -0
- data/lib/metanorma/standoc/utils.rb +101 -0
- data/lib/metanorma/standoc/validate.rb +158 -0
- data/lib/metanorma/standoc/validate_section.rb +55 -0
- data/lib/metanorma/standoc/version.rb +1 -1
- data/lib/{asciidoctor → metanorma}/standoc/views/datamodel/model_representation.adoc.erb +0 -0
- data/lib/{asciidoctor → metanorma}/standoc/views/datamodel/plantuml_representation.adoc.erb +0 -0
- data/lib/metanorma-standoc.rb +1 -1
- data/metanorma-standoc.gemspec +4 -4
- data/spec/{asciidoctor → metanorma}/base_spec.rb +73 -8
- data/spec/{asciidoctor → metanorma}/blank_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/blocks_spec.rb +49 -20
- data/spec/{asciidoctor → metanorma}/cleanup_blocks_spec.rb +25 -1
- data/spec/{asciidoctor → metanorma}/cleanup_sections_spec.rb +2 -2
- data/spec/{asciidoctor → metanorma}/cleanup_spec.rb +9 -9
- data/spec/{asciidoctor → metanorma}/cleanup_terms_spec.rb +528 -91
- data/spec/{asciidoctor → metanorma}/datamodel/attributes_table_preprocessor_spec.rb +22 -22
- data/spec/{asciidoctor → metanorma}/datamodel/diagram_preprocessor_spec.rb +17 -17
- data/spec/{asciidoctor → metanorma}/inline_spec.rb +175 -6
- data/spec/{asciidoctor → metanorma}/isobib_cache_spec.rb +5 -9
- data/spec/{asciidoctor → metanorma}/lists_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/macros_json2text_spec.rb +0 -0
- data/spec/{asciidoctor → metanorma}/macros_plantuml_spec.rb +3 -3
- data/spec/{asciidoctor → metanorma}/macros_spec.rb +8 -8
- data/spec/{asciidoctor → metanorma}/macros_yaml2text_spec.rb +0 -0
- data/spec/metanorma/refs_dl_spec.rb +863 -0
- data/spec/{asciidoctor → metanorma}/refs_spec.rb +1277 -687
- data/spec/{asciidoctor → metanorma}/section_spec.rb +90 -3
- data/spec/{asciidoctor → metanorma}/table_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/validate_spec.rb +2 -2
- data/spec/spec_helper.rb +0 -1
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +179 -179
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +12 -12
- data/spec/vcr_cassettes/isobib_get_123.yml +13 -13
- data/spec/vcr_cassettes/isobib_get_123_1.yml +98 -98
- data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +111 -111
- data/spec/vcr_cassettes/isobib_get_123_2001.yml +13 -13
- data/spec/vcr_cassettes/isobib_get_124.yml +14 -14
- data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +14 -14
- data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +46 -46
- metadata +89 -38
- data/lib/asciidoctor/standoc/ref_date_id.rb +0 -62
- data/spec/asciidoctor/refs_dl_spec.rb +0 -864
@@ -0,0 +1,190 @@
|
|
1
|
+
require "metanorma-utils"
|
2
|
+
require "digest"
|
3
|
+
|
4
|
+
module Metanorma
|
5
|
+
module Standoc
|
6
|
+
module Cleanup
|
7
|
+
def empty_text_before_first_element(elem)
|
8
|
+
elem.children.each do |c|
|
9
|
+
return false if c.text? && /\S/.match(c.text)
|
10
|
+
return true if c.element?
|
11
|
+
end
|
12
|
+
true
|
13
|
+
end
|
14
|
+
|
15
|
+
def strip_initial_space(elem)
|
16
|
+
return unless elem.children[0].text?
|
17
|
+
|
18
|
+
if /\S/.match?(elem.children[0].text)
|
19
|
+
elem.children[0].content = elem.children[0].text.gsub(/^ /, "")
|
20
|
+
else
|
21
|
+
elem.children[0].remove
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def bookmark_cleanup(xmldoc)
|
26
|
+
li_bookmark_cleanup(xmldoc)
|
27
|
+
dt_bookmark_cleanup(xmldoc)
|
28
|
+
end
|
29
|
+
|
30
|
+
def bookmark_to_id(elem, bookmark)
|
31
|
+
parent = bookmark.parent
|
32
|
+
elem["id"] = bookmark.remove["id"]
|
33
|
+
strip_initial_space(parent)
|
34
|
+
end
|
35
|
+
|
36
|
+
def li_bookmark_cleanup(xmldoc)
|
37
|
+
xmldoc.xpath("//li[descendant::bookmark]").each do |x|
|
38
|
+
if x.at("./*[1][local-name() = 'p']/"\
|
39
|
+
"*[1][local-name() = 'bookmark']") &&
|
40
|
+
empty_text_before_first_element(x.elements[0])
|
41
|
+
bookmark_to_id(x, x.elements[0].elements[0])
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def dt_bookmark_cleanup(xmldoc)
|
47
|
+
xmldoc.xpath("//dt[descendant::bookmark]").each do |x|
|
48
|
+
if x.at("./*[1][local-name() = 'p']/"\
|
49
|
+
"*[1][local-name() = 'bookmark']") &&
|
50
|
+
empty_text_before_first_element(x.elements[0])
|
51
|
+
bookmark_to_id(x, x.elements[0].elements[0])
|
52
|
+
elsif x.at("./*[1][local-name() = 'bookmark']") &&
|
53
|
+
empty_text_before_first_element(x)
|
54
|
+
bookmark_to_id(x, x.elements[0])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def concept_cleanup(xmldoc)
|
60
|
+
xmldoc.xpath("//concept[not(termxref)]").each do |x|
|
61
|
+
term = x.at("./refterm")
|
62
|
+
term&.remove if term&.text&.empty?
|
63
|
+
concept_cleanup1(x)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def concept_cleanup1(elem)
|
68
|
+
elem.children.remove if elem&.children&.text&.strip&.empty?
|
69
|
+
key_extract_locality(elem)
|
70
|
+
if /:/.match?(elem["key"]) then concept_termbase_cleanup(elem)
|
71
|
+
elsif refid? elem["key"] then concept_eref_cleanup(elem)
|
72
|
+
else concept_xref_cleanup(elem)
|
73
|
+
end
|
74
|
+
elem.delete("key")
|
75
|
+
end
|
76
|
+
|
77
|
+
def related_cleanup(xmldoc)
|
78
|
+
xmldoc.xpath("//related[not(termxref)]").each do |x|
|
79
|
+
term = x.at("./refterm")
|
80
|
+
term.replace("<preferred>#{term_expr(term.children.to_xml)}"\
|
81
|
+
"</preferred>")
|
82
|
+
concept_cleanup1(x)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def key_extract_locality(elem)
|
87
|
+
return unless /,/.match?(elem["key"])
|
88
|
+
|
89
|
+
elem.add_child("<locality>#{elem['key'].sub(/^[^,]+,/, '')}</locality>")
|
90
|
+
elem["key"] = elem["key"].sub(/,.*$/, "")
|
91
|
+
end
|
92
|
+
|
93
|
+
def concept_termbase_cleanup(elem)
|
94
|
+
t = elem&.at("./xrefrender")&.remove&.children
|
95
|
+
termbase, key = elem["key"].split(/:/, 2)
|
96
|
+
elem.add_child(%(<termref base="#{termbase}" target="#{key}">) +
|
97
|
+
"#{t&.to_xml}</termref>")
|
98
|
+
end
|
99
|
+
|
100
|
+
def concept_xref_cleanup(elem)
|
101
|
+
t = elem&.at("./xrefrender")&.remove&.children
|
102
|
+
elem.add_child(%(<xref target="#{elem['key']}">#{t&.to_xml}</xref>))
|
103
|
+
end
|
104
|
+
|
105
|
+
def concept_eref_cleanup(elem)
|
106
|
+
t = elem&.at("./xrefrender")&.remove&.children&.to_xml
|
107
|
+
l = elem&.at("./locality")&.remove&.children&.to_xml
|
108
|
+
elem.add_child "<eref bibitemid='#{elem['key']}'>#{l}</eref>"
|
109
|
+
extract_localities(elem.elements[-1])
|
110
|
+
elem.elements[-1].add_child(t) if t
|
111
|
+
end
|
112
|
+
|
113
|
+
def to_xreftarget(str)
|
114
|
+
return Metanorma::Utils::to_ncname(str) unless /^[^#]+#.+$/.match?(str)
|
115
|
+
|
116
|
+
/^(?<pref>[^#]+)#(?<suff>.+)$/ =~ str
|
117
|
+
pref = pref.gsub(%r([#{Metanorma::Utils::NAMECHAR}])o, "_")
|
118
|
+
suff = suff.gsub(%r([#{Metanorma::Utils::NAMECHAR}])o, "_")
|
119
|
+
"#{pref}##{suff}"
|
120
|
+
end
|
121
|
+
|
122
|
+
IDREF = "//*/@id | //review/@from | //review/@to | "\
|
123
|
+
"//callout/@target | //citation/@bibitemid | "\
|
124
|
+
"//eref/@bibitemid".freeze
|
125
|
+
|
126
|
+
def anchor_cleanup(elem)
|
127
|
+
anchor_cleanup1(elem)
|
128
|
+
xreftarget_cleanup(elem)
|
129
|
+
contenthash_id_cleanup(elem)
|
130
|
+
end
|
131
|
+
|
132
|
+
def anchor_cleanup1(elem)
|
133
|
+
elem.xpath(IDREF).each do |s|
|
134
|
+
if (ret = Metanorma::Utils::to_ncname(s.value)) != (orig = s.value)
|
135
|
+
s.value = ret
|
136
|
+
output = s.parent.dup
|
137
|
+
output.children.remove
|
138
|
+
@log.add("Anchors", s.parent,
|
139
|
+
"normalised identifier in #{output} from #{orig}")
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def xreftarget_cleanup(elem)
|
145
|
+
elem.xpath("//xref/@target").each do |s|
|
146
|
+
if (ret = to_xreftarget(s.value)) != (orig = s.value)
|
147
|
+
s.value = ret
|
148
|
+
output = s.parent.dup
|
149
|
+
output.children.remove
|
150
|
+
@log.add("Anchors", s.parent,
|
151
|
+
"normalised identifier in #{output} from #{orig}")
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def guid?(str)
|
157
|
+
/^_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/
|
158
|
+
.match?(str)
|
159
|
+
end
|
160
|
+
|
161
|
+
def contenthash_id_cleanup(doc)
|
162
|
+
ids = contenthash_id_make(doc)
|
163
|
+
contenthash_id_update_refs(doc, ids)
|
164
|
+
end
|
165
|
+
|
166
|
+
def contenthash_id_make(doc)
|
167
|
+
doc.xpath("//*[@id]").each_with_object({}) do |x, m|
|
168
|
+
next unless guid?(x["id"])
|
169
|
+
|
170
|
+
m[x["id"]] = contenthash(x)
|
171
|
+
x["id"] = m[x["id"]]
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def contenthash_id_update_refs(doc, ids)
|
176
|
+
[%w(review from), %w(review to), %w(callout target), %w(eref bibitemid),
|
177
|
+
%w(citation bibitemid), %w(xref target), %w(xref to)].each do |a|
|
178
|
+
doc.xpath("//#{a[0]}").each do |x|
|
179
|
+
ids[x[a[1]]] and x[a[1]] = ids[x[a[1]]]
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def contenthash(elem)
|
185
|
+
Digest::MD5.hexdigest("#{elem.path}////#{elem.text}")
|
186
|
+
.sub(/^(.{8})(.{4})(.{4})(.{4})(.{12})$/, "_\\1-\\2-\\3-\\4-\\5")
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
@@ -0,0 +1,222 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
require "pathname"
|
3
|
+
require "html2doc"
|
4
|
+
require "asciimath2unitsml"
|
5
|
+
require_relative "./cleanup_block"
|
6
|
+
require_relative "./cleanup_footnotes"
|
7
|
+
require_relative "./cleanup_ref"
|
8
|
+
require_relative "./cleanup_ref_dl"
|
9
|
+
require_relative "./cleanup_boilerplate"
|
10
|
+
require_relative "./cleanup_section"
|
11
|
+
require_relative "./cleanup_terms"
|
12
|
+
require_relative "./cleanup_inline"
|
13
|
+
require_relative "./cleanup_amend"
|
14
|
+
require "relaton_iev"
|
15
|
+
|
16
|
+
module Metanorma
|
17
|
+
module Standoc
|
18
|
+
module Cleanup
|
19
|
+
def asciimath2mathml(text)
|
20
|
+
text = text.gsub(%r{<stem type="AsciiMath">(.+?)</stem>}m) do
|
21
|
+
"<amathstem>#{HTMLEntities.new.decode($1)}</amathstem>"
|
22
|
+
end
|
23
|
+
text = Html2Doc.asciimath_to_mathml(text,
|
24
|
+
["<amathstem>", "</amathstem>"])
|
25
|
+
x = Nokogiri::XML(text)
|
26
|
+
x.xpath("//*[local-name() = 'math'][not(parent::stem)]").each do |y|
|
27
|
+
y.wrap("<stem type='MathML'></stem>")
|
28
|
+
end
|
29
|
+
x.to_xml
|
30
|
+
end
|
31
|
+
|
32
|
+
def xml_unescape_mathml(xml)
|
33
|
+
return if xml.children.any? { |y| y.element? }
|
34
|
+
|
35
|
+
math = xml.text.gsub(/</, "<").gsub(/>/, ">")
|
36
|
+
.gsub(/"/, '"').gsub(/'/, "'").gsub(/&/, "&")
|
37
|
+
.gsub(/<[^: \r\n\t\/]+:/, "<").gsub(/<\/[^ \r\n\t:]+:/, "</")
|
38
|
+
xml.children = math
|
39
|
+
end
|
40
|
+
|
41
|
+
MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
|
42
|
+
|
43
|
+
def mathml_preserve_space(math)
|
44
|
+
math.xpath(".//m:mtext", "m" => MATHML_NS).each do |x|
|
45
|
+
x.children = x.children.to_xml
|
46
|
+
.gsub(/^\s/, " ").gsub(/\s$/, " ")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def mathml_namespace(stem)
|
51
|
+
stem.xpath("./math").each { |x| x.default_namespace = MATHML_NS }
|
52
|
+
end
|
53
|
+
|
54
|
+
def mathml_mi_italics
|
55
|
+
{ uppergreek: true, upperroman: true,
|
56
|
+
lowergreek: true, lowerroman: true }
|
57
|
+
end
|
58
|
+
|
59
|
+
# presuppose multichar mi upright, singlechar mi MathML default italic
|
60
|
+
def mathml_italicise(xml)
|
61
|
+
xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
|
62
|
+
"m" => MATHML_NS).each do |i|
|
63
|
+
char = HTMLEntities.new.decode(i.text)
|
64
|
+
i["mathvariant"] = "normal" if mi_italicise?(char)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def mi_italicise?(char)
|
69
|
+
return false if char.length > 1
|
70
|
+
|
71
|
+
if /\p{Greek}/.match?(char)
|
72
|
+
(/\p{Lower}/.match(char) && !mathml_mi_italics[:lowergreek]) ||
|
73
|
+
(/\p{Upper}/.match(char) && !mathml_mi_italics[:uppergreek])
|
74
|
+
elsif /\p{Latin}/.match?(char)
|
75
|
+
(/\p{Lower}/.match(char) && !mathml_mi_italics[:lowerroman]) ||
|
76
|
+
(/\p{Upper}/.match(char) && !mathml_mi_italics[:upperroman])
|
77
|
+
else false
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
UNITSML_NS = "https://schema.unitsml.org/unitsml/1.0".freeze
|
82
|
+
|
83
|
+
def add_misc_container(xmldoc)
|
84
|
+
unless ins = xmldoc.at("//misc-container")
|
85
|
+
a = xmldoc.at("//termdocsource") || xmldoc.at("//bibdata")
|
86
|
+
a.next = "<misc-container/>"
|
87
|
+
ins = xmldoc.at("//misc-container")
|
88
|
+
end
|
89
|
+
ins
|
90
|
+
end
|
91
|
+
|
92
|
+
def mathml_unitsML(xmldoc)
|
93
|
+
return unless xmldoc.at(".//m:*", "m" => UNITSML_NS)
|
94
|
+
|
95
|
+
misc = add_misc_container(xmldoc)
|
96
|
+
unitsml = misc.add_child("<UnitsML xmlns='#{UNITSML_NS}'/>").first
|
97
|
+
%w(Unit CountedItem Quantity Dimension Prefix).each do |t|
|
98
|
+
gather_unitsml(unitsml, xmldoc, t)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def gather_unitsml(unitsml, xmldoc, tag)
|
103
|
+
tags = xmldoc.xpath(".//m:#{tag}", "m" => UNITSML_NS)
|
104
|
+
.each_with_object({}) do |x, m|
|
105
|
+
m[x["id"]] = x.remove
|
106
|
+
end
|
107
|
+
return if tags.empty?
|
108
|
+
|
109
|
+
set = unitsml.add_child("<#{tag}Set/>").first
|
110
|
+
tags.each_value { |v| set << v }
|
111
|
+
end
|
112
|
+
|
113
|
+
def asciimath2unitsml_options
|
114
|
+
{ multiplier: :space }
|
115
|
+
end
|
116
|
+
|
117
|
+
def mathvariant_override(inner, outer)
|
118
|
+
case outer
|
119
|
+
when "bold"
|
120
|
+
case inner
|
121
|
+
when "normal" then "bold"
|
122
|
+
when "italic" then "bold-italic"
|
123
|
+
when "fraktur" then "bold-fraktur"
|
124
|
+
when "script" then "bold-script"
|
125
|
+
when "sans-serif" then "bold-sans-serif"
|
126
|
+
when "sans-serif-italic" then "sans-serif-bold-italic"
|
127
|
+
else inner
|
128
|
+
end
|
129
|
+
when "italic"
|
130
|
+
case inner
|
131
|
+
when "normal" then "italic"
|
132
|
+
when "bold" then "bold-italic"
|
133
|
+
when "sans-serif" then "sans-serif-italic"
|
134
|
+
when "bold-sans-serif" then "sans-serif-bold-italic"
|
135
|
+
else inner
|
136
|
+
end
|
137
|
+
when "bold-italic"
|
138
|
+
case inner
|
139
|
+
when "normal", "bold", "italic" then "bold-italic"
|
140
|
+
when "sans-serif", "bold-sans-serif", "sans-serif-italic"
|
141
|
+
"sans-serif-bold-italic"
|
142
|
+
else inner
|
143
|
+
end
|
144
|
+
when "fraktur"
|
145
|
+
case inner
|
146
|
+
when "normal" then "fraktur"
|
147
|
+
when "bold" then "bold-fraktur"
|
148
|
+
else inner
|
149
|
+
end
|
150
|
+
when "bold-fraktur"
|
151
|
+
case inner
|
152
|
+
when "normal", "fraktur" then "bold-fraktur"
|
153
|
+
else inner
|
154
|
+
end
|
155
|
+
when "script"
|
156
|
+
case inner
|
157
|
+
when "normal" then "script"
|
158
|
+
when "bold" then "bold-script"
|
159
|
+
else inner
|
160
|
+
end
|
161
|
+
when "bold-script"
|
162
|
+
case inner
|
163
|
+
when "normal", "script" then "bold-script"
|
164
|
+
else inner
|
165
|
+
end
|
166
|
+
when "sans-serif"
|
167
|
+
case inner
|
168
|
+
when "normal" then "sans-serif"
|
169
|
+
when "bold" then "bold-sans-serif"
|
170
|
+
when "italic" then "sans-serif-italic"
|
171
|
+
when "bold-italic" then "sans-serif-bold-italic"
|
172
|
+
else inner
|
173
|
+
end
|
174
|
+
when "bold-sans-serif"
|
175
|
+
case inner
|
176
|
+
when "normal", "bold", "sans-serif" then "bold-sans-serif"
|
177
|
+
when "italic", "bold-italic", "sans-serif-italic"
|
178
|
+
"sans-serif-bold-italic"
|
179
|
+
else inner
|
180
|
+
end
|
181
|
+
when "sans-serif-italic"
|
182
|
+
case inner
|
183
|
+
when "normal", "italic", "sans-serif" then "sans-serif-italic"
|
184
|
+
when "bold", "bold-italic", "sans-serif-bold"
|
185
|
+
"sans-serif-bold-italic"
|
186
|
+
else inner
|
187
|
+
end
|
188
|
+
when "sans-serif-bold-italic"
|
189
|
+
case inner
|
190
|
+
when "normal", "italic", "sans-serif", "sans-serif-italic",
|
191
|
+
"bold", "bold-italic", "sans-serif-bold"
|
192
|
+
"sans-serif-bold-italic"
|
193
|
+
else inner
|
194
|
+
end
|
195
|
+
else inner
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def mathml_mathvariant(math)
|
200
|
+
math.xpath(".//*[@mathvariant]").each do |outer|
|
201
|
+
outer.xpath(".//*[@mathvariant]").each do |inner|
|
202
|
+
inner["mathvariant"] =
|
203
|
+
mathvariant_override(outer["mathvariant"], inner["mathvariant"])
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def mathml_cleanup(xmldoc)
|
209
|
+
unitsml = Asciimath2UnitsML::Conv.new(asciimath2unitsml_options)
|
210
|
+
xmldoc.xpath("//stem[@type = 'MathML']").each do |x|
|
211
|
+
xml_unescape_mathml(x)
|
212
|
+
mathml_namespace(x)
|
213
|
+
mathml_preserve_space(x)
|
214
|
+
unitsml.MathML2UnitsML(x)
|
215
|
+
mathml_mathvariant(x)
|
216
|
+
mathml_italicise(x)
|
217
|
+
end
|
218
|
+
mathml_unitsML(xmldoc)
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
@@ -0,0 +1,170 @@
|
|
1
|
+
require "set"
|
2
|
+
require "relaton_bib"
|
3
|
+
|
4
|
+
module Metanorma
|
5
|
+
module Standoc
|
6
|
+
module Cleanup
|
7
|
+
def biblio_reorder(xmldoc)
|
8
|
+
xmldoc.xpath("//references[@normative = 'false']").each do |r|
|
9
|
+
biblio_reorder1(r)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def biblio_reorder1(refs)
|
14
|
+
fold_notes_into_biblio(refs)
|
15
|
+
bib = sort_biblio(refs.xpath("./bibitem"))
|
16
|
+
insert = refs&.at("./bibitem")&.previous_element
|
17
|
+
refs.xpath("./bibitem").each(&:remove)
|
18
|
+
bib.reverse.each do |b|
|
19
|
+
insert and insert.next = b.to_xml or
|
20
|
+
refs.children.first.add_previous_sibling b.to_xml
|
21
|
+
end
|
22
|
+
extract_notes_from_biblio(refs)
|
23
|
+
refs.xpath("./references").each { |r| biblio_reorder1(r) }
|
24
|
+
end
|
25
|
+
|
26
|
+
def fold_notes_into_biblio(refs)
|
27
|
+
refs.xpath("./bibitem").each do |r|
|
28
|
+
while r&.next_element&.name == "note"
|
29
|
+
r.next_element["appended"] = true
|
30
|
+
r << r.next_element.remove
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def extract_notes_from_biblio(refs)
|
36
|
+
refs.xpath("./bibitem").each do |r|
|
37
|
+
r.xpath("./note[@appended]").reverse.each do |n|
|
38
|
+
n.delete("appended")
|
39
|
+
r.next = n
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def sort_biblio(bib)
|
45
|
+
bib
|
46
|
+
end
|
47
|
+
|
48
|
+
# default presuppose that all citations in biblio numbered
|
49
|
+
# consecutively, but that standards codes are preserved as is:
|
50
|
+
# only numeric references are renumbered
|
51
|
+
def biblio_renumber(xmldoc)
|
52
|
+
i = 0
|
53
|
+
xmldoc.xpath("//bibliography//references | //clause//references | "\
|
54
|
+
"//annex//references").each do |r|
|
55
|
+
next if r["normative"] == "true"
|
56
|
+
|
57
|
+
r.xpath("./bibitem[not(@hidden = 'true')]").each do |b|
|
58
|
+
i += 1
|
59
|
+
next unless docid = b.at("./docidentifier[@type = 'metanorma']")
|
60
|
+
next unless /^\[\d+\]$/.match?(docid.text)
|
61
|
+
|
62
|
+
docid.children = "[#{i}]"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# move ref before p
|
68
|
+
def ref_cleanup(xmldoc)
|
69
|
+
xmldoc.xpath("//p/ref").each do |r|
|
70
|
+
parent = r.parent
|
71
|
+
parent.previous = r.remove
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def normref_cleanup(xmldoc)
|
76
|
+
r = xmldoc.at(self.class::NORM_REF) || return
|
77
|
+
preface = ((r.xpath("./title/following-sibling::*") & # intersection
|
78
|
+
r.xpath("./bibitem[1]/preceding-sibling::*")) -
|
79
|
+
r.xpath("./note[@type = 'boilerplate']/descendant-or-self::*"))
|
80
|
+
preface.each(&:remove)
|
81
|
+
end
|
82
|
+
|
83
|
+
def biblio_cleanup(xmldoc)
|
84
|
+
biblio_reorder(xmldoc)
|
85
|
+
biblio_nested(xmldoc)
|
86
|
+
biblio_renumber(xmldoc)
|
87
|
+
biblio_no_ext(xmldoc)
|
88
|
+
end
|
89
|
+
|
90
|
+
def biblio_no_ext(xmldoc)
|
91
|
+
xmldoc.xpath("//bibitem/ext").each(&:remove)
|
92
|
+
end
|
93
|
+
|
94
|
+
def biblio_nested(xmldoc)
|
95
|
+
xmldoc.xpath("//references[references]").each do |t|
|
96
|
+
t.name = "clause"
|
97
|
+
t.xpath("./references").each { |r| r["normative"] = t["normative"] }
|
98
|
+
t.delete("normative")
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def format_ref(ref, type)
|
103
|
+
return @isodoc.docid_prefix(type, ref) if type != "metanorma"
|
104
|
+
return "[#{ref}]" if /^\d+$/.match(ref) && !/^\[.*\]$/.match(ref)
|
105
|
+
|
106
|
+
ref
|
107
|
+
end
|
108
|
+
|
109
|
+
ISO_PUBLISHER_XPATH =
|
110
|
+
"./contributor[role/@type = 'publisher']/"\
|
111
|
+
"organization[abbreviation = 'ISO' or abbreviation = 'IEC' or "\
|
112
|
+
"name = 'International Organization for Standardization' or "\
|
113
|
+
"name = 'International Electrotechnical Commission']".freeze
|
114
|
+
|
115
|
+
def reference_names(xmldoc)
|
116
|
+
xmldoc.xpath("//bibitem[not(ancestor::bibitem)]").each do |ref|
|
117
|
+
# isopub = ref.at(ISO_PUBLISHER_XPATH)
|
118
|
+
docid = ref.at("./docidentifier[@type = 'metanorma']") ||
|
119
|
+
ref.at("./docidentifier[not(@type = 'DOI')]") or next
|
120
|
+
reference = format_ref(docid.text, docid["type"])
|
121
|
+
@anchors[ref["id"]] = { xref: reference }
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def fetch_termbase(_termbase, _id)
|
126
|
+
""
|
127
|
+
end
|
128
|
+
|
129
|
+
def read_local_bibitem(uri)
|
130
|
+
return nil if %r{^https?://}.match?(uri)
|
131
|
+
|
132
|
+
file = "#{@localdir}#{uri}.rxl"
|
133
|
+
File.file?(file) or file = "#{@localdir}#{uri}.xml"
|
134
|
+
File.file?(file) or return nil
|
135
|
+
xml = Nokogiri::XML(File.read(file, encoding: "utf-8"))
|
136
|
+
ret = xml.at("//*[local-name() = 'bibdata']") or return nil
|
137
|
+
ret = Nokogiri::XML(ret.to_xml
|
138
|
+
.sub(%r{(<bibdata[^>]*?) xmlns=("[^"]+"|'[^']+')}, "\\1")).root
|
139
|
+
ret.name = "bibitem"
|
140
|
+
ins = ret.at("./*[local-name() = 'docidentifier']") or return nil
|
141
|
+
ins.previous = %{<uri type="citation">#{uri}</uri>}
|
142
|
+
ret&.at("./*[local-name() = 'ext']")&.remove
|
143
|
+
ret
|
144
|
+
end
|
145
|
+
|
146
|
+
# if citation uri points to local file, get bibitem from it
|
147
|
+
def fetch_local_bibitem(xmldoc)
|
148
|
+
xmldoc.xpath("//bibitem[formattedref][uri[@type = 'citation']]")
|
149
|
+
.each do |b|
|
150
|
+
uri = b&.at("./uri[@type = 'citation']")&.text
|
151
|
+
bibitem = read_local_bibitem(uri) or next
|
152
|
+
bibitem["id"] = b["id"]
|
153
|
+
b.replace(bibitem)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def bibitem_nested_id(xmldoc)
|
158
|
+
xmldoc.xpath("//bibitem//bibitem").each do |b|
|
159
|
+
b.delete("id")
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def bibitem_cleanup(xmldoc)
|
164
|
+
bibitem_nested_id(xmldoc)
|
165
|
+
ref_dl_cleanup(xmldoc)
|
166
|
+
fetch_local_bibitem(xmldoc)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
require "set"
|
2
|
+
require "relaton_bib"
|
3
|
+
|
4
|
+
module Metanorma
|
5
|
+
module Standoc
|
6
|
+
module Cleanup
|
7
|
+
def ref_dl_cleanup(xmldoc)
|
8
|
+
xmldoc.xpath("//clause[@bibitem = 'true']").each do |c|
|
9
|
+
bib = dl_bib_extract(c) or next
|
10
|
+
validate_ref_dl(bib, c)
|
11
|
+
bibitemxml = RelatonBib::BibliographicItem.from_hash(bib).to_xml or next
|
12
|
+
bibitem = Nokogiri::XML(bibitemxml)
|
13
|
+
bibitem.root["id"] = c["id"] if c["id"] && !/^_/.match(c["id"])
|
14
|
+
c.replace(bibitem.root)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def validate_ref_dl(bib, c)
|
19
|
+
id = bib["id"]
|
20
|
+
id ||= c["id"] unless /^_/.match?(c["id"]) # do not accept implicit id
|
21
|
+
unless id
|
22
|
+
@log.add("Anchors", c,
|
23
|
+
"The following reference is missing an anchor:\n" + c.to_xml)
|
24
|
+
return
|
25
|
+
end
|
26
|
+
@refids << id
|
27
|
+
bib["title"] or
|
28
|
+
@log.add("Bibliography", c, "Reference #{id} is missing a title")
|
29
|
+
bib["docid"] or
|
30
|
+
@log.add("Bibliography", c,
|
31
|
+
"Reference #{id} is missing a document identifier (docid)")
|
32
|
+
end
|
33
|
+
|
34
|
+
def extract_from_p(tag, bib, key)
|
35
|
+
return unless bib[tag]
|
36
|
+
|
37
|
+
"<#{key}>#{bib[tag].at('p').children}</#{key}>"
|
38
|
+
end
|
39
|
+
|
40
|
+
# if the content is a single paragraph, replace it with its children
|
41
|
+
# single links replaced with uri
|
42
|
+
def p_unwrap(para)
|
43
|
+
elems = para.elements
|
44
|
+
if elems.size == 1 && elems[0].name == "p"
|
45
|
+
link_unwrap(elems[0]).children.to_xml.strip
|
46
|
+
else
|
47
|
+
para.to_xml.strip
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def link_unwrap(para)
|
52
|
+
elems = para.elements
|
53
|
+
if elems.size == 1 && elems[0].name == "link"
|
54
|
+
para.at("./link").replace(elems[0]["target"].strip)
|
55
|
+
end
|
56
|
+
para
|
57
|
+
end
|
58
|
+
|
59
|
+
def dd_bib_extract(dtd)
|
60
|
+
return nil if dtd.children.empty?
|
61
|
+
|
62
|
+
dtd.at("./dl") and return dl_bib_extract(dtd)
|
63
|
+
elems = dtd.remove.elements
|
64
|
+
return p_unwrap(dtd) unless elems.size == 1 &&
|
65
|
+
%w(ol ul).include?(elems[0].name)
|
66
|
+
|
67
|
+
ret = []
|
68
|
+
elems[0].xpath("./li").each do |li|
|
69
|
+
ret << p_unwrap(li)
|
70
|
+
end
|
71
|
+
ret
|
72
|
+
end
|
73
|
+
|
74
|
+
def add_to_hash(bib, key, val)
|
75
|
+
Metanorma::Utils::set_nested_value(bib, key.split("."), val)
|
76
|
+
end
|
77
|
+
|
78
|
+
# definition list, with at most one level of unordered lists
|
79
|
+
def dl_bib_extract(c, nested = false)
|
80
|
+
dl = c.at("./dl") or return
|
81
|
+
bib = {}
|
82
|
+
key = ""
|
83
|
+
dl.xpath("./dt | ./dd").each do |dtd|
|
84
|
+
(dtd.name == "dt" and key = dtd.text.sub(/:+$/, "")) or
|
85
|
+
add_to_hash(bib, key, dd_bib_extract(dtd))
|
86
|
+
end
|
87
|
+
c.xpath("./clause").each do |c1|
|
88
|
+
key = c1&.at("./title")&.text&.downcase&.strip
|
89
|
+
next unless %w(contributor relation series).include? key
|
90
|
+
|
91
|
+
add_to_hash(bib, key, dl_bib_extract(c1, true))
|
92
|
+
end
|
93
|
+
if !nested && c.at("./title")
|
94
|
+
title = c.at("./title").remove.children.to_xml
|
95
|
+
bib["title"] = [bib["title"]] if bib["title"].is_a? Hash
|
96
|
+
bib["title"] = [bib["title"]] if bib["title"].is_a? String
|
97
|
+
bib["title"] = [] unless bib["title"]
|
98
|
+
bib["title"] << title if !title.empty?
|
99
|
+
end
|
100
|
+
bib
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|