metanorma-standoc 2.1.4 → 2.2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/metanorma/standoc/base.rb +1 -0
- data/lib/metanorma/standoc/blocks.rb +3 -7
- data/lib/metanorma/standoc/cleanup.rb +4 -2
- data/lib/metanorma/standoc/cleanup_biblio.rb +204 -0
- data/lib/metanorma/standoc/cleanup_block.rb +46 -4
- data/lib/metanorma/standoc/cleanup_maths.rb +2 -15
- data/lib/metanorma/standoc/cleanup_ref.rb +22 -13
- data/lib/metanorma/standoc/cleanup_reqt.rb +37 -4
- data/lib/metanorma/standoc/cleanup_symbols.rb +1 -1
- data/lib/metanorma/standoc/cleanup_terms.rb +6 -2
- data/lib/metanorma/standoc/cleanup_text.rb +10 -8
- data/lib/metanorma/standoc/cleanup_xref.rb +1 -2
- data/lib/metanorma/standoc/converter.rb +2 -0
- data/lib/metanorma/standoc/front.rb +1 -1
- data/lib/metanorma/standoc/inline.rb +8 -4
- data/lib/metanorma/standoc/isodoc.rng +16 -1
- data/lib/metanorma/standoc/macros.rb +1 -180
- data/lib/metanorma/standoc/macros_inline.rb +194 -0
- data/lib/metanorma/standoc/ref_sect.rb +2 -2
- data/lib/metanorma/standoc/ref_utility.rb +5 -6
- data/lib/metanorma/standoc/reqt.rb +5 -5
- data/lib/metanorma/standoc/reqt.rng +1 -1
- data/lib/metanorma/standoc/section.rb +2 -0
- data/lib/metanorma/standoc/term_lookup_cleanup.rb +1 -1
- data/lib/metanorma/standoc/utils.rb +1 -1
- data/lib/metanorma/standoc/validate.rb +1 -69
- data/lib/metanorma/standoc/validate_table.rb +91 -0
- data/lib/metanorma/standoc/version.rb +1 -1
- data/metanorma-standoc.gemspec +2 -3
- data/spec/metanorma/{refs_dl_spec.rb → biblio_spec.rb} +84 -1
- data/spec/metanorma/blocks_spec.rb +68 -8
- data/spec/metanorma/cleanup_blocks_spec.rb +107 -27
- data/spec/metanorma/inline_spec.rb +4 -0
- data/spec/metanorma/isobib_cache_spec.rb +6 -4
- data/spec/metanorma/macros_spec.rb +6 -2
- data/spec/metanorma/refs_spec.rb +261 -232
- data/spec/metanorma/validate_spec.rb +106 -7
- data/spec/vcr_cassettes/bsi16341.yml +63 -51
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +62 -62
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +12 -12
- data/spec/vcr_cassettes/hide_refs.yml +58 -58
- data/spec/vcr_cassettes/isobib_get_123.yml +12 -12
- data/spec/vcr_cassettes/isobib_get_123_1.yml +22 -22
- data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +33 -33
- data/spec/vcr_cassettes/isobib_get_123_2.yml +295 -0
- data/spec/vcr_cassettes/isobib_get_123_2001.yml +11 -11
- data/spec/vcr_cassettes/isobib_get_124.yml +12 -12
- data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +24 -30
- data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +46 -46
- data/spec/vcr_cassettes/std-link.yml +14 -72
- metadata +9 -6
- data/lib/metanorma/standoc/cleanup_ref_dl.rb +0 -113
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e6a6d6a98133ba0cf81631287aff2157906da7d0978899abe5c00c78f0da6b2d
|
4
|
+
data.tar.gz: 81c33e16f952e56dcff81bd0485ead5ba998839b307e30eb8de500bf2448a600
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b36c1f1766233c69f9b5a63298d8f7cd8745cac7f7186f210500bc23a27983b3202a724ebd799f6d305b928377ed5d2ef28dcb43aac4e9c91f65b9300a98ac19
|
7
|
+
data.tar.gz: e4b1be10928b81269486083791e7dd23fab26b2697d1b1a82fa607dfa4c6f46a00bdc4373403605c2807a993ab2f4015f34f643607610d10344cfbd95a6a83b3
|
@@ -15,8 +15,7 @@ module Metanorma
|
|
15
15
|
attr_code(id_attr(node).merge(
|
16
16
|
unnumbered: node.option?("unnumbered") ? "true" : nil,
|
17
17
|
number: node.attr("number"),
|
18
|
-
subsequence: node.attr("subsequence")
|
19
|
-
))
|
18
|
+
subsequence: node.attr("subsequence")))
|
20
19
|
end
|
21
20
|
|
22
21
|
def formula_attrs(node)
|
@@ -105,9 +104,8 @@ module Metanorma
|
|
105
104
|
|
106
105
|
def svgmap_attrs(node)
|
107
106
|
attr_code(id_attr(node)
|
108
|
-
.merge(id: node.id,
|
107
|
+
.merge(id: node.id, number: node.attr("number"),
|
109
108
|
unnumbered: node.option?("unnumbered") ? "true" : nil,
|
110
|
-
number: node.attr("number"),
|
111
109
|
subsequence: node.attr("subsequence"))
|
112
110
|
.merge(keep_attrs(node)))
|
113
111
|
end
|
@@ -225,7 +223,6 @@ module Metanorma
|
|
225
223
|
filename: node.attr("filename"))))
|
226
224
|
end
|
227
225
|
|
228
|
-
# NOTE: html escaping is performed by Nokogiri
|
229
226
|
def listing(node)
|
230
227
|
fragment = ::Nokogiri::XML::Builder.new do |xml|
|
231
228
|
xml.sourcecode **listing_attrs(node) do |s|
|
@@ -238,11 +235,10 @@ module Metanorma
|
|
238
235
|
end
|
239
236
|
|
240
237
|
def pass(node)
|
241
|
-
c = HTMLEntities.new
|
242
238
|
noko do |xml|
|
243
239
|
xml.passthrough **attr_code(formats:
|
244
240
|
node.attr("format") || "metanorma") do |p|
|
245
|
-
p << c.encode(c.decode(node.content), :basic, :hexadecimal)
|
241
|
+
p << @c.encode(@c.decode(node.content), :basic, :hexadecimal)
|
246
242
|
end
|
247
243
|
end
|
248
244
|
end
|
@@ -5,7 +5,7 @@ require_relative "./cleanup_block"
|
|
5
5
|
require_relative "./cleanup_table"
|
6
6
|
require_relative "./cleanup_footnotes"
|
7
7
|
require_relative "./cleanup_ref"
|
8
|
-
require_relative "./
|
8
|
+
require_relative "./cleanup_biblio"
|
9
9
|
require_relative "./cleanup_boilerplate"
|
10
10
|
require_relative "./cleanup_section"
|
11
11
|
require_relative "./cleanup_terms"
|
@@ -28,6 +28,8 @@ module Metanorma
|
|
28
28
|
passthrough_cleanup(xmldoc)
|
29
29
|
sections_cleanup(xmldoc)
|
30
30
|
obligations_cleanup(xmldoc)
|
31
|
+
para_index_cleanup(xmldoc)
|
32
|
+
block_index_cleanup(xmldoc)
|
31
33
|
table_cleanup(xmldoc)
|
32
34
|
formula_cleanup(xmldoc)
|
33
35
|
form_cleanup(xmldoc)
|
@@ -51,7 +53,7 @@ module Metanorma
|
|
51
53
|
termdef_cleanup(xmldoc)
|
52
54
|
RelatonIev::iev_cleanup(xmldoc, @bibdb)
|
53
55
|
element_name_cleanup(xmldoc)
|
54
|
-
|
56
|
+
term_index_cleanup(xmldoc)
|
55
57
|
bpart_cleanup(xmldoc)
|
56
58
|
quotesource_cleanup(xmldoc)
|
57
59
|
callout_cleanup(xmldoc)
|
@@ -0,0 +1,204 @@
|
|
1
|
+
require "set"
|
2
|
+
require "relaton_bib"
|
3
|
+
|
4
|
+
module Metanorma
|
5
|
+
module Standoc
|
6
|
+
module Cleanup
|
7
|
+
def ref_dl_cleanup(xmldoc)
|
8
|
+
xmldoc.xpath("//clause[@bibitem = 'true']").each do |c|
|
9
|
+
bib = dl_bib_extract(c) or next
|
10
|
+
validate_ref_dl(bib, c)
|
11
|
+
bibitemxml = RelatonBib::BibliographicItem.from_hash(bib).to_xml or next
|
12
|
+
bibitem = Nokogiri::XML(bibitemxml)
|
13
|
+
bibitem.root["id"] = c["id"] if c["id"] && !/^_/.match(c["id"])
|
14
|
+
c.replace(bibitem.root)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# do not accept implicit id
|
19
|
+
def validate_ref_dl(bib, clause)
|
20
|
+
id = bib["id"]
|
21
|
+
id ||= clause["id"] unless /^_/.match?(clause["id"])
|
22
|
+
unless id
|
23
|
+
@log.add("Anchors", clause,
|
24
|
+
"The following reference is missing an anchor:\n"\
|
25
|
+
"#{clause.to_xml}")
|
26
|
+
return
|
27
|
+
end
|
28
|
+
@refids << id
|
29
|
+
validate_ref_dl1(bib, id, clause)
|
30
|
+
end
|
31
|
+
|
32
|
+
def validate_ref_dl1(bib, id, clause)
|
33
|
+
bib["title"] or
|
34
|
+
@log.add("Bibliography", clause, "Reference #{id} is missing a title")
|
35
|
+
bib["docid"] or
|
36
|
+
@log.add("Bibliography", clause,
|
37
|
+
"Reference #{id} is missing a document identifier (docid)")
|
38
|
+
end
|
39
|
+
|
40
|
+
def extract_from_p(tag, bib, key)
|
41
|
+
return unless bib[tag]
|
42
|
+
|
43
|
+
"<#{key}>#{bib[tag].at('p').children}</#{key}>"
|
44
|
+
end
|
45
|
+
|
46
|
+
# if the content is a single paragraph, replace it with its children
|
47
|
+
# single links replaced with uri
|
48
|
+
def p_unwrap(para)
|
49
|
+
elems = para.elements
|
50
|
+
if elems.size == 1 && elems[0].name == "p"
|
51
|
+
link_unwrap(elems[0]).children.to_xml.strip
|
52
|
+
else
|
53
|
+
para.to_xml.strip
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def link_unwrap(para)
|
58
|
+
elems = para.elements
|
59
|
+
if elems.size == 1 && elems[0].name == "link"
|
60
|
+
para.at("./link").replace(elems[0]["target"].strip)
|
61
|
+
end
|
62
|
+
para
|
63
|
+
end
|
64
|
+
|
65
|
+
def dd_bib_extract(dtd)
|
66
|
+
return nil if dtd.children.empty?
|
67
|
+
|
68
|
+
dtd.at("./dl") and return dl_bib_extract(dtd)
|
69
|
+
elems = dtd.remove.elements
|
70
|
+
return p_unwrap(dtd) unless elems.size == 1 &&
|
71
|
+
%w(ol ul).include?(elems[0].name)
|
72
|
+
|
73
|
+
elems[0].xpath("./li").each_with_object([]) do |li, ret|
|
74
|
+
ret << p_unwrap(li)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def add_to_hash(bib, key, val)
|
79
|
+
Metanorma::Utils::set_nested_value(bib, key.split("."), val)
|
80
|
+
end
|
81
|
+
|
82
|
+
# definition list, with at most one level of unordered lists
|
83
|
+
def dl_bib_extract(clause, nested = false)
|
84
|
+
dl = clause.at("./dl") or return
|
85
|
+
key = ""
|
86
|
+
bib = dl.xpath("./dt | ./dd").each_with_object({}) do |dtd, m|
|
87
|
+
(dtd.name == "dt" and key = dtd.text.sub(/:+$/, "")) or
|
88
|
+
add_to_hash(m, key, dd_bib_extract(dtd))
|
89
|
+
end
|
90
|
+
clause.xpath("./clause").each do |c1|
|
91
|
+
key = c1&.at("./title")&.text&.downcase&.strip
|
92
|
+
next unless %w(contributor relation series).include? key
|
93
|
+
|
94
|
+
add_to_hash(bib, key, dl_bib_extract(c1, true))
|
95
|
+
end
|
96
|
+
dl_bib_extract_title(bib, clause, nested)
|
97
|
+
end
|
98
|
+
|
99
|
+
def dl_bib_extract_title(bib, clause, nested)
|
100
|
+
(!nested && clause.at("./title")) or return bib
|
101
|
+
title = clause.at("./title").remove.children.to_xml
|
102
|
+
bib["title"] = [bib["title"]] if bib["title"].is_a?(Hash) ||
|
103
|
+
bib["title"].is_a?(String)
|
104
|
+
bib["title"] ||= []
|
105
|
+
bib["title"] << title if !title.empty?
|
106
|
+
bib
|
107
|
+
end
|
108
|
+
|
109
|
+
# ---
|
110
|
+
|
111
|
+
def formattedref_spans(xmldoc)
|
112
|
+
xmldoc.xpath("//bibitem[formattedref//span]").each do |b|
|
113
|
+
spans_to_bibitem(b, spans_preprocess(extract_content(b)))
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def extract_content(bib)
|
118
|
+
extract_docid(bib) + extract_spans(bib)
|
119
|
+
end
|
120
|
+
|
121
|
+
def extract_spans(bib)
|
122
|
+
bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
|
123
|
+
keys = s["class"].split(".", 2)
|
124
|
+
m << { key: keys[0], type: keys[1],
|
125
|
+
val: s.children.to_xml }
|
126
|
+
(s["class"] == "type" and s.remove) or s.replace(s.children)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def extract_docid(bib)
|
131
|
+
bib.xpath("./docidentifier").each_with_object([]) do |d, m|
|
132
|
+
m << { key: "docid", type: d["type"], val: d.text }
|
133
|
+
d.remove
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def spans_preprocess(spans)
|
138
|
+
ret = { contributor: [], docid: [], uri: [], date: [] }
|
139
|
+
spans.each do |s|
|
140
|
+
case s[:key]
|
141
|
+
when "uri", "docid"
|
142
|
+
ret[s[:key].to_sym] << { type: s[:type], val: s[:val] }
|
143
|
+
when "pubyear" then ret[:date] << { type: "published", val: s[:val] }
|
144
|
+
when "pubplace", "title", "type" then ret[s[:key].to_sym] = s[:val]
|
145
|
+
when "publisher"
|
146
|
+
ret[:contributor] << { role: "publisher", entity: "organization",
|
147
|
+
name: s[:val] }
|
148
|
+
when "surname", "initials", "givenname"
|
149
|
+
ret[:contributor] = spans_preprocess_contrib(s, ret[:contributor])
|
150
|
+
end
|
151
|
+
end
|
152
|
+
ret
|
153
|
+
end
|
154
|
+
|
155
|
+
def spans_preprocess_contrib(span, contrib)
|
156
|
+
spans_preprocess_new_contrib?(span, contrib) and
|
157
|
+
contrib << { role: span[:type] || "author", entity: "person" }
|
158
|
+
contrib[-1][span[:key].to_sym] = span[:val]
|
159
|
+
contrib
|
160
|
+
end
|
161
|
+
|
162
|
+
def spans_preprocess_new_contrib?(span, contrib)
|
163
|
+
contrib.empty? ||
|
164
|
+
(if span[:key] == "surname" then contrib[-1][:surname]
|
165
|
+
else (contrib[-1][:initials] || contrib[-1][:givenname])
|
166
|
+
end) ||
|
167
|
+
contrib[-1][:role] != (span[:type] || "author")
|
168
|
+
end
|
169
|
+
|
170
|
+
def spans_to_bibitem(bib, spans)
|
171
|
+
ret = ""
|
172
|
+
spans[:title] and ret += "<title>#{spans[:title]}</title>"
|
173
|
+
spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
|
174
|
+
spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
|
175
|
+
spans[:date].each { |s| ret += span_to_docid(s, "date") }
|
176
|
+
spans[:contributor].each { |s| ret += span_to_contrib(s) }
|
177
|
+
spans[:pubplace] and ret += "<place>#{spans[:place]}</place>"
|
178
|
+
spans[:type] and bib["type"] = spans[:type]
|
179
|
+
bib << ret
|
180
|
+
end
|
181
|
+
|
182
|
+
def span_to_docid(span, key)
|
183
|
+
if span[:type]
|
184
|
+
"<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
|
185
|
+
else
|
186
|
+
"<#{key}>#{span[:val]}</#{key}>"
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def span_to_contrib(span)
|
191
|
+
e = if span[:entity] == "organization"
|
192
|
+
"<organization><name>#{span[:name]}</name></organization>"
|
193
|
+
else
|
194
|
+
pre = (span[:initials] and
|
195
|
+
"<initial>#{span[:initials]}</initial>") ||
|
196
|
+
"<forename>#{span[:givenname]}</forename>"
|
197
|
+
"<person><name>#{pre}<surname>#{span[:surname]}</surname></name>"\
|
198
|
+
"</person>"
|
199
|
+
end
|
200
|
+
"<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
@@ -45,7 +45,8 @@ module Metanorma
|
|
45
45
|
|
46
46
|
def figure_dl_cleanup1(xmldoc)
|
47
47
|
q = "//figure/following-sibling::*[self::dl]"
|
48
|
-
|
48
|
+
q1 = "//figure/figure/following-sibling::*[self::dl]"
|
49
|
+
(xmldoc.xpath(q) - xmldoc.xpath(q1)).each do |s|
|
49
50
|
s["key"] == "true" and s.previous_element << s.remove
|
50
51
|
end
|
51
52
|
end
|
@@ -65,8 +66,9 @@ module Metanorma
|
|
65
66
|
# examples containing only figures become subfigures of figures
|
66
67
|
def subfigure_cleanup(xmldoc)
|
67
68
|
xmldoc.xpath("//example[figure]").each do |e|
|
68
|
-
next unless e.elements.
|
69
|
-
%w(name figure).include?
|
69
|
+
next unless e.elements.reject do |m|
|
70
|
+
%w(name figure index note).include?(m.name) ||
|
71
|
+
(m.name == "dl" && m["key"] == "true")
|
70
72
|
end.empty?
|
71
73
|
|
72
74
|
e.name = "figure"
|
@@ -84,9 +86,9 @@ module Metanorma
|
|
84
86
|
|
85
87
|
def figure_cleanup(xmldoc)
|
86
88
|
figure_footnote_cleanup(xmldoc)
|
89
|
+
subfigure_cleanup(xmldoc)
|
87
90
|
figure_dl_cleanup1(xmldoc)
|
88
91
|
figure_dl_cleanup2(xmldoc)
|
89
|
-
subfigure_cleanup(xmldoc)
|
90
92
|
single_subfigure_cleanup(xmldoc)
|
91
93
|
end
|
92
94
|
|
@@ -178,6 +180,46 @@ module Metanorma
|
|
178
180
|
end
|
179
181
|
end
|
180
182
|
end
|
183
|
+
|
184
|
+
def block_index_cleanup(xmldoc)
|
185
|
+
xmldoc.xpath("//quote | //td | //th | //formula | //li | //dt | "\
|
186
|
+
"//dd | //example | //note | //figure | //sourcecode | "\
|
187
|
+
"//admonition | //termnote | //termexample | //form | "\
|
188
|
+
"//requirement | //recommendation | //permission | "\
|
189
|
+
"//imagemap | //svgmap").each do |b|
|
190
|
+
b.xpath("./p[indexterm]").each do |p|
|
191
|
+
indexterm_para?(p) or next
|
192
|
+
p.replace(p.children)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def indexterm_para?(para)
|
198
|
+
p = para.dup
|
199
|
+
p.xpath("./index").each(&:remove)
|
200
|
+
p.text.strip.empty?
|
201
|
+
end
|
202
|
+
|
203
|
+
def include_indexterm?(elem)
|
204
|
+
return false if elem.nil?
|
205
|
+
|
206
|
+
!%w(image literal sourcecode).include?(elem.name)
|
207
|
+
end
|
208
|
+
|
209
|
+
def para_index_cleanup(xmldoc)
|
210
|
+
xmldoc.xpath("//p[index]").select { |p| indexterm_para?(p) }
|
211
|
+
.each do |p|
|
212
|
+
para_index_cleanup1(p, p.previous_element, p.next_element)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
def para_index_cleanup1(para, prev, foll)
|
217
|
+
if include_indexterm?(prev)
|
218
|
+
prev << para.remove.children
|
219
|
+
elsif include_indexterm?(foll) && !foll.children.empty?
|
220
|
+
foll.children.first.previous = para.remove.children
|
221
|
+
end
|
222
|
+
end
|
181
223
|
end
|
182
224
|
end
|
183
225
|
end
|
@@ -1,24 +1,11 @@
|
|
1
|
-
require "nokogiri"
|
2
|
-
require "pathname"
|
3
|
-
require "html2doc"
|
4
1
|
require "asciimath2unitsml"
|
5
|
-
require_relative "./cleanup_block"
|
6
|
-
require_relative "./cleanup_footnotes"
|
7
|
-
require_relative "./cleanup_ref"
|
8
|
-
require_relative "./cleanup_ref_dl"
|
9
|
-
require_relative "./cleanup_boilerplate"
|
10
|
-
require_relative "./cleanup_section"
|
11
|
-
require_relative "./cleanup_terms"
|
12
|
-
require_relative "./cleanup_inline"
|
13
|
-
require_relative "./cleanup_amend"
|
14
|
-
require "relaton_iev"
|
15
2
|
|
16
3
|
module Metanorma
|
17
4
|
module Standoc
|
18
5
|
module Cleanup
|
19
6
|
def asciimath2mathml(text)
|
20
7
|
text = text.gsub(%r{<stem type="AsciiMath">(.+?)</stem>}m) do
|
21
|
-
"<amathstem>#{
|
8
|
+
"<amathstem>#{@c.decode($1)}</amathstem>"
|
22
9
|
end
|
23
10
|
text = Html2Doc.new({})
|
24
11
|
.asciimath_to_mathml(text, ["<amathstem>", "</amathstem>"])
|
@@ -60,7 +47,7 @@ module Metanorma
|
|
60
47
|
def mathml_italicise(xml)
|
61
48
|
xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
|
62
49
|
"m" => MATHML_NS).each do |i|
|
63
|
-
char =
|
50
|
+
char = @c.decode(i.text)
|
64
51
|
i["mathvariant"] = "normal" if mi_italicise?(char)
|
65
52
|
end
|
66
53
|
end
|
@@ -153,29 +153,28 @@ module Metanorma
|
|
153
153
|
|
154
154
|
def reference_names(xmldoc)
|
155
155
|
xmldoc.xpath("//bibitem[not(ancestor::bibitem)]").each do |ref|
|
156
|
-
docid = ref
|
157
|
-
ref.at("./docidentifier[@primary = 'true'][@language = '#{@lang}']") ||
|
158
|
-
ref.at("./docidentifier[@primary = 'true'][not(@language)]") ||
|
159
|
-
ref.at("./docidentifier[@primary = 'true']") ||
|
160
|
-
ref.at("./docidentifier[not(@type = 'DOI')][@language = '#{@lang}']") ||
|
161
|
-
ref.at("./docidentifier[not(@type = 'DOI')][not(@language)]") ||
|
162
|
-
ref.at("./docidentifier[not(@type = 'DOI')]") or next
|
156
|
+
docid = select_docid(ref) or next
|
163
157
|
reference = format_ref(docid.children.to_xml, docid["type"])
|
164
158
|
@anchors[ref["id"]] = { xref: reference }
|
165
159
|
end
|
166
160
|
end
|
167
161
|
|
162
|
+
def select_docid(ref)
|
163
|
+
ref.at("./docidentifier[@type = 'metanorma']") ||
|
164
|
+
ref.at("./docidentifier[@primary = 'true'][@language = '#{@lang}']") ||
|
165
|
+
ref.at("./docidentifier[@primary = 'true'][not(@language)]") ||
|
166
|
+
ref.at("./docidentifier[@primary = 'true']") ||
|
167
|
+
ref.at("./docidentifier[not(@type = 'DOI')][@language = '#{@lang}']") ||
|
168
|
+
ref.at("./docidentifier[not(@type = 'DOI')][not(@language)]") ||
|
169
|
+
ref.at("./docidentifier[not(@type = 'DOI')]")
|
170
|
+
end
|
171
|
+
|
168
172
|
def fetch_termbase(_termbase, _id)
|
169
173
|
""
|
170
174
|
end
|
171
175
|
|
172
176
|
def read_local_bibitem(uri)
|
173
|
-
|
174
|
-
|
175
|
-
file = "#{@localdir}#{uri}.rxl"
|
176
|
-
File.file?(file) or file = "#{@localdir}#{uri}.xml"
|
177
|
-
File.file?(file) or return nil
|
178
|
-
xml = Nokogiri::XML(File.read(file, encoding: "utf-8"))
|
177
|
+
xml = read_local_bibitem_file(uri) or return nil
|
179
178
|
ret = xml.at("//*[local-name() = 'bibdata']") or return nil
|
180
179
|
ret = Nokogiri::XML(ret.to_xml
|
181
180
|
.sub(%r{(<bibdata[^>]*?) xmlns=("[^"]+"|'[^']+')}, "\\1")).root
|
@@ -186,6 +185,15 @@ module Metanorma
|
|
186
185
|
ret
|
187
186
|
end
|
188
187
|
|
188
|
+
def read_local_bibitem_file(uri)
|
189
|
+
return nil if %r{^https?://}.match?(uri)
|
190
|
+
|
191
|
+
file = "#{@localdir}#{uri}.rxl"
|
192
|
+
File.file?(file) or file = "#{@localdir}#{uri}.xml"
|
193
|
+
File.file?(file) or return nil
|
194
|
+
Nokogiri::XML(File.read(file, encoding: "utf-8"))
|
195
|
+
end
|
196
|
+
|
189
197
|
# if citation uri points to local file, get bibitem from it
|
190
198
|
def fetch_local_bibitem(xmldoc)
|
191
199
|
xmldoc.xpath("//bibitem[formattedref][uri[@type = 'citation']]")
|
@@ -206,6 +214,7 @@ module Metanorma
|
|
206
214
|
def bibitem_cleanup(xmldoc)
|
207
215
|
bibitem_nested_id(xmldoc)
|
208
216
|
ref_dl_cleanup(xmldoc)
|
217
|
+
formattedref_spans(xmldoc)
|
209
218
|
fetch_local_bibitem(xmldoc)
|
210
219
|
end
|
211
220
|
end
|
@@ -5,10 +5,19 @@ module Metanorma
|
|
5
5
|
requirement_metadata(xmldoc)
|
6
6
|
requirement_inherit(xmldoc)
|
7
7
|
requirement_descriptions(xmldoc)
|
8
|
+
requirement_identifier(xmldoc)
|
8
9
|
end
|
9
10
|
|
10
11
|
REQRECPER = "//requirement | //recommendation | //permission".freeze
|
11
12
|
|
13
|
+
def requirement_identifier(xmldoc)
|
14
|
+
xmldoc.xpath(REQRECPER).each do |r|
|
15
|
+
r.xpath("./identifier[link] | ./inherit[link]").each do |i|
|
16
|
+
i.children = i.at("./link/@target").text
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
12
21
|
def requirement_inherit(xmldoc)
|
13
22
|
xmldoc.xpath(REQRECPER).each do |r|
|
14
23
|
ins = requirement_inherit_insert(r)
|
@@ -76,8 +85,16 @@ module Metanorma
|
|
76
85
|
end
|
77
86
|
end
|
78
87
|
|
88
|
+
def requirement_metadata1_attrs
|
89
|
+
%w(obligation model type)
|
90
|
+
end
|
91
|
+
|
79
92
|
def requirement_metadata1_tags
|
80
|
-
%w(
|
93
|
+
%w(identifier subject inherit)
|
94
|
+
end
|
95
|
+
|
96
|
+
def requirement_metadata_component_tags
|
97
|
+
[]
|
81
98
|
end
|
82
99
|
|
83
100
|
def requirement_metadata1(reqt, dlist, ins)
|
@@ -85,20 +102,21 @@ module Metanorma
|
|
85
102
|
reqt.children.first.previous = " "
|
86
103
|
ins = reqt.children.first
|
87
104
|
end
|
88
|
-
|
105
|
+
requirement_metadata1_attrs.each do |a|
|
89
106
|
dl_to_attrs(reqt, dlist, a)
|
90
107
|
end
|
91
108
|
requirement_metadata1_tags.each do |a|
|
92
109
|
ins = dl_to_elems(ins, reqt, dlist, a)
|
93
110
|
end
|
94
|
-
reqt_dl_to_classif(ins, reqt, dlist)
|
111
|
+
ins = reqt_dl_to_classif(ins, reqt, dlist)
|
112
|
+
reqt_dl_to_classif1(ins, reqt, dlist)
|
95
113
|
end
|
96
114
|
|
97
115
|
def reqt_dl_to_classif(ins, reqt, dlist)
|
98
116
|
if a = reqt.at("./classification[last()]") then ins = a end
|
99
117
|
dlist.xpath("./dt[text()='classification']").each do |e|
|
100
118
|
val = e.at("./following::dd/p") || e.at("./following::dd")
|
101
|
-
req_classif_parse(val.
|
119
|
+
req_classif_parse(val.children.to_xml).each do |r|
|
102
120
|
ins.next = "<classification><tag>#{r[0]}</tag>"\
|
103
121
|
"<value>#{r[1]}</value></classification>"
|
104
122
|
ins = ins.next
|
@@ -106,6 +124,21 @@ module Metanorma
|
|
106
124
|
end
|
107
125
|
ins
|
108
126
|
end
|
127
|
+
|
128
|
+
def reqt_dl_to_classif1(ins, reqt, dlist)
|
129
|
+
if a = reqt.at("./classification[last()]") then ins = a end
|
130
|
+
dlist.xpath("./dt").each do |e|
|
131
|
+
next if (requirement_metadata1_attrs + requirement_metadata1_tags +
|
132
|
+
requirement_metadata_component_tags + %w(classification))
|
133
|
+
.include?(e.text)
|
134
|
+
|
135
|
+
val = e.at("./following::dd/p") || e.at("./following::dd")
|
136
|
+
ins.next = "<classification><tag>#{e.text}</tag>"\
|
137
|
+
"<value>#{val.children.to_xml}</value></classification>"
|
138
|
+
ins = ins.next
|
139
|
+
end
|
140
|
+
ins
|
141
|
+
end
|
109
142
|
end
|
110
143
|
end
|
111
144
|
end
|
@@ -11,7 +11,7 @@ module Metanorma
|
|
11
11
|
n.replace(grkletters(MathML2AsciiMath.m2a(n.to_xml)))
|
12
12
|
end
|
13
13
|
ret = Nokogiri::XML(key.to_xml)
|
14
|
-
|
14
|
+
@c.decode(ret.text.downcase)
|
15
15
|
.gsub(/[\[\]{}<>()]/, "").gsub(/\s/m, "")
|
16
16
|
.gsub(/[[:punct:]]|[_^]/, ":\\0").gsub(/`/, "")
|
17
17
|
.gsub(/[0-9]+/, "þ\\0")
|
@@ -100,9 +100,13 @@ module Metanorma
|
|
100
100
|
end
|
101
101
|
end
|
102
102
|
|
103
|
+
def termlookup_cleanup(xmldoc)
|
104
|
+
Metanorma::Standoc::TermLookupCleanup.new(xmldoc, @log).call
|
105
|
+
end
|
106
|
+
|
103
107
|
def termdef_cleanup(xmldoc)
|
104
108
|
termdef_unnest_cleanup(xmldoc)
|
105
|
-
|
109
|
+
termlookup_cleanup(xmldoc)
|
106
110
|
term_nonverbal_designations(xmldoc)
|
107
111
|
term_dl_to_metadata(xmldoc)
|
108
112
|
term_termsource_to_designation(xmldoc)
|
@@ -117,7 +121,7 @@ module Metanorma
|
|
117
121
|
termdocsource_cleanup(xmldoc)
|
118
122
|
end
|
119
123
|
|
120
|
-
def
|
124
|
+
def term_index_cleanup(xmldoc)
|
121
125
|
return unless @index_terms
|
122
126
|
|
123
127
|
xmldoc.xpath("//preferred").each do |p|
|
@@ -7,7 +7,7 @@ module Metanorma
|
|
7
7
|
text = text.gsub(/\s+<fn /, "<fn ")
|
8
8
|
%w(passthrough passthrough-inline).each do |v|
|
9
9
|
text.gsub!(%r{<#{v}\s+formats="metanorma">([^<]*)
|
10
|
-
</#{v}>}mx) {
|
10
|
+
</#{v}>}mx) { @c.decode($1) }
|
11
11
|
end
|
12
12
|
text
|
13
13
|
end
|
@@ -34,11 +34,13 @@ module Metanorma
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
+
IGNORE_QUOTES_ELEMENTS =
|
38
|
+
%w(pre tt sourcecode stem figure bibdata passthrough identifier).freeze
|
39
|
+
|
37
40
|
def uninterrupt_quotes_around_xml_skip(elem)
|
38
41
|
!(/\A['"]/.match?(elem.text) &&
|
39
|
-
elem.previous.
|
40
|
-
|
41
|
-
.empty? &&
|
42
|
+
elem.previous.path.split(%r{/})[1..-2]
|
43
|
+
.intersection(IGNORE_QUOTES_ELEMENTS).empty? &&
|
42
44
|
((elem.previous.text.strip.empty? &&
|
43
45
|
!empty_tag_with_text_content?(elem.previous)) ||
|
44
46
|
elem.previous.name == "index"))
|
@@ -49,7 +51,7 @@ module Metanorma
|
|
49
51
|
/\S\Z/.match?(prev.text) or return
|
50
52
|
foll = elem.at(".//following::text()[1]")
|
51
53
|
m = /\A(["'][[:punct:]]*)(\s|\Z)/
|
52
|
-
.match(
|
54
|
+
.match(@c.decode(foll&.text)) or return
|
53
55
|
foll.content = foll.text.sub(/\A(["'][[:punct:]]*)/, "")
|
54
56
|
prev.content = "#{prev.text}#{m[1]}"
|
55
57
|
end
|
@@ -74,10 +76,10 @@ module Metanorma
|
|
74
76
|
empty_tag_with_text_content?(x) and prev = "dummy"
|
75
77
|
next unless x.text?
|
76
78
|
|
77
|
-
x.
|
78
|
-
|
79
|
+
ancestors = x.path.split(%r{/})[1..-2]
|
80
|
+
ancestors.intersection(IGNORE_QUOTES_ELEMENTS).empty? and
|
79
81
|
dumb2smart_quotes1(x, prev)
|
80
|
-
prev = x.text
|
82
|
+
prev = x.text unless ancestors.include?("index")
|
81
83
|
end
|
82
84
|
end
|
83
85
|
|
@@ -68,10 +68,9 @@ module Metanorma
|
|
68
68
|
end
|
69
69
|
|
70
70
|
def xref_to_eref(elem)
|
71
|
-
c = HTMLEntities.new
|
72
71
|
elem["bibitemid"] = elem["target"]
|
73
72
|
if ref = @anchors&.dig(elem["target"], :xref)
|
74
|
-
elem["citeas"] = c.decode(ref)
|
73
|
+
elem["citeas"] = @c.decode(ref)
|
75
74
|
else
|
76
75
|
elem["citeas"] = ""
|
77
76
|
xref_to_eref1(elem)
|
@@ -29,6 +29,7 @@ module Metanorma
|
|
29
29
|
preprocessor Metanorma::Plugin::Lutaml::LutamlUmlAttributesTablePreprocessor
|
30
30
|
preprocessor Metanorma::Plugin::Lutaml::LutamlUmlDatamodelDescriptionPreprocessor
|
31
31
|
inline_macro Metanorma::Standoc::PreferredTermInlineMacro
|
32
|
+
inline_macro Metanorma::Standoc::SpanInlineMacro
|
32
33
|
inline_macro Metanorma::Standoc::AltTermInlineMacro
|
33
34
|
inline_macro Metanorma::Standoc::AdmittedTermInlineMacro
|
34
35
|
inline_macro Metanorma::Standoc::DeprecatedTermInlineMacro
|
@@ -93,6 +94,7 @@ module Metanorma
|
|
93
94
|
basebackend "html"
|
94
95
|
outfilesuffix ".xml"
|
95
96
|
@libdir = File.dirname(self.class::_file || __FILE__)
|
97
|
+
@c = HTMLEntities.new
|
96
98
|
end
|
97
99
|
|
98
100
|
class << self
|