metanorma-standoc 2.1.4 → 2.2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/metanorma/standoc/base.rb +1 -0
- data/lib/metanorma/standoc/blocks.rb +3 -7
- data/lib/metanorma/standoc/cleanup.rb +4 -2
- data/lib/metanorma/standoc/cleanup_biblio.rb +204 -0
- data/lib/metanorma/standoc/cleanup_block.rb +46 -4
- data/lib/metanorma/standoc/cleanup_maths.rb +2 -15
- data/lib/metanorma/standoc/cleanup_ref.rb +22 -13
- data/lib/metanorma/standoc/cleanup_reqt.rb +37 -4
- data/lib/metanorma/standoc/cleanup_symbols.rb +1 -1
- data/lib/metanorma/standoc/cleanup_terms.rb +6 -2
- data/lib/metanorma/standoc/cleanup_text.rb +10 -8
- data/lib/metanorma/standoc/cleanup_xref.rb +1 -2
- data/lib/metanorma/standoc/converter.rb +2 -0
- data/lib/metanorma/standoc/front.rb +1 -1
- data/lib/metanorma/standoc/inline.rb +8 -4
- data/lib/metanorma/standoc/isodoc.rng +16 -1
- data/lib/metanorma/standoc/macros.rb +1 -180
- data/lib/metanorma/standoc/macros_inline.rb +194 -0
- data/lib/metanorma/standoc/ref_sect.rb +2 -2
- data/lib/metanorma/standoc/ref_utility.rb +5 -6
- data/lib/metanorma/standoc/reqt.rb +5 -5
- data/lib/metanorma/standoc/reqt.rng +1 -1
- data/lib/metanorma/standoc/section.rb +2 -0
- data/lib/metanorma/standoc/term_lookup_cleanup.rb +1 -1
- data/lib/metanorma/standoc/utils.rb +1 -1
- data/lib/metanorma/standoc/validate.rb +1 -69
- data/lib/metanorma/standoc/validate_table.rb +91 -0
- data/lib/metanorma/standoc/version.rb +1 -1
- data/metanorma-standoc.gemspec +2 -3
- data/spec/metanorma/{refs_dl_spec.rb → biblio_spec.rb} +84 -1
- data/spec/metanorma/blocks_spec.rb +68 -8
- data/spec/metanorma/cleanup_blocks_spec.rb +107 -27
- data/spec/metanorma/inline_spec.rb +4 -0
- data/spec/metanorma/isobib_cache_spec.rb +6 -4
- data/spec/metanorma/macros_spec.rb +6 -2
- data/spec/metanorma/refs_spec.rb +261 -232
- data/spec/metanorma/validate_spec.rb +106 -7
- data/spec/vcr_cassettes/bsi16341.yml +63 -51
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +62 -62
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +12 -12
- data/spec/vcr_cassettes/hide_refs.yml +58 -58
- data/spec/vcr_cassettes/isobib_get_123.yml +12 -12
- data/spec/vcr_cassettes/isobib_get_123_1.yml +22 -22
- data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +33 -33
- data/spec/vcr_cassettes/isobib_get_123_2.yml +295 -0
- data/spec/vcr_cassettes/isobib_get_123_2001.yml +11 -11
- data/spec/vcr_cassettes/isobib_get_124.yml +12 -12
- data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +24 -30
- data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +46 -46
- data/spec/vcr_cassettes/std-link.yml +14 -72
- metadata +9 -6
- data/lib/metanorma/standoc/cleanup_ref_dl.rb +0 -113
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e6a6d6a98133ba0cf81631287aff2157906da7d0978899abe5c00c78f0da6b2d
|
4
|
+
data.tar.gz: 81c33e16f952e56dcff81bd0485ead5ba998839b307e30eb8de500bf2448a600
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b36c1f1766233c69f9b5a63298d8f7cd8745cac7f7186f210500bc23a27983b3202a724ebd799f6d305b928377ed5d2ef28dcb43aac4e9c91f65b9300a98ac19
|
7
|
+
data.tar.gz: e4b1be10928b81269486083791e7dd23fab26b2697d1b1a82fa607dfa4c6f46a00bdc4373403605c2807a993ab2f4015f34f643607610d10344cfbd95a6a83b3
|
@@ -15,8 +15,7 @@ module Metanorma
|
|
15
15
|
attr_code(id_attr(node).merge(
|
16
16
|
unnumbered: node.option?("unnumbered") ? "true" : nil,
|
17
17
|
number: node.attr("number"),
|
18
|
-
subsequence: node.attr("subsequence")
|
19
|
-
))
|
18
|
+
subsequence: node.attr("subsequence")))
|
20
19
|
end
|
21
20
|
|
22
21
|
def formula_attrs(node)
|
@@ -105,9 +104,8 @@ module Metanorma
|
|
105
104
|
|
106
105
|
def svgmap_attrs(node)
|
107
106
|
attr_code(id_attr(node)
|
108
|
-
.merge(id: node.id,
|
107
|
+
.merge(id: node.id, number: node.attr("number"),
|
109
108
|
unnumbered: node.option?("unnumbered") ? "true" : nil,
|
110
|
-
number: node.attr("number"),
|
111
109
|
subsequence: node.attr("subsequence"))
|
112
110
|
.merge(keep_attrs(node)))
|
113
111
|
end
|
@@ -225,7 +223,6 @@ module Metanorma
|
|
225
223
|
filename: node.attr("filename"))))
|
226
224
|
end
|
227
225
|
|
228
|
-
# NOTE: html escaping is performed by Nokogiri
|
229
226
|
def listing(node)
|
230
227
|
fragment = ::Nokogiri::XML::Builder.new do |xml|
|
231
228
|
xml.sourcecode **listing_attrs(node) do |s|
|
@@ -238,11 +235,10 @@ module Metanorma
|
|
238
235
|
end
|
239
236
|
|
240
237
|
def pass(node)
|
241
|
-
c = HTMLEntities.new
|
242
238
|
noko do |xml|
|
243
239
|
xml.passthrough **attr_code(formats:
|
244
240
|
node.attr("format") || "metanorma") do |p|
|
245
|
-
p << c.encode(c.decode(node.content), :basic, :hexadecimal)
|
241
|
+
p << @c.encode(@c.decode(node.content), :basic, :hexadecimal)
|
246
242
|
end
|
247
243
|
end
|
248
244
|
end
|
@@ -5,7 +5,7 @@ require_relative "./cleanup_block"
|
|
5
5
|
require_relative "./cleanup_table"
|
6
6
|
require_relative "./cleanup_footnotes"
|
7
7
|
require_relative "./cleanup_ref"
|
8
|
-
require_relative "./
|
8
|
+
require_relative "./cleanup_biblio"
|
9
9
|
require_relative "./cleanup_boilerplate"
|
10
10
|
require_relative "./cleanup_section"
|
11
11
|
require_relative "./cleanup_terms"
|
@@ -28,6 +28,8 @@ module Metanorma
|
|
28
28
|
passthrough_cleanup(xmldoc)
|
29
29
|
sections_cleanup(xmldoc)
|
30
30
|
obligations_cleanup(xmldoc)
|
31
|
+
para_index_cleanup(xmldoc)
|
32
|
+
block_index_cleanup(xmldoc)
|
31
33
|
table_cleanup(xmldoc)
|
32
34
|
formula_cleanup(xmldoc)
|
33
35
|
form_cleanup(xmldoc)
|
@@ -51,7 +53,7 @@ module Metanorma
|
|
51
53
|
termdef_cleanup(xmldoc)
|
52
54
|
RelatonIev::iev_cleanup(xmldoc, @bibdb)
|
53
55
|
element_name_cleanup(xmldoc)
|
54
|
-
|
56
|
+
term_index_cleanup(xmldoc)
|
55
57
|
bpart_cleanup(xmldoc)
|
56
58
|
quotesource_cleanup(xmldoc)
|
57
59
|
callout_cleanup(xmldoc)
|
@@ -0,0 +1,204 @@
|
|
1
|
+
require "set"
|
2
|
+
require "relaton_bib"
|
3
|
+
|
4
|
+
module Metanorma
|
5
|
+
module Standoc
|
6
|
+
module Cleanup
|
7
|
+
def ref_dl_cleanup(xmldoc)
|
8
|
+
xmldoc.xpath("//clause[@bibitem = 'true']").each do |c|
|
9
|
+
bib = dl_bib_extract(c) or next
|
10
|
+
validate_ref_dl(bib, c)
|
11
|
+
bibitemxml = RelatonBib::BibliographicItem.from_hash(bib).to_xml or next
|
12
|
+
bibitem = Nokogiri::XML(bibitemxml)
|
13
|
+
bibitem.root["id"] = c["id"] if c["id"] && !/^_/.match(c["id"])
|
14
|
+
c.replace(bibitem.root)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# do not accept implicit id
|
19
|
+
def validate_ref_dl(bib, clause)
|
20
|
+
id = bib["id"]
|
21
|
+
id ||= clause["id"] unless /^_/.match?(clause["id"])
|
22
|
+
unless id
|
23
|
+
@log.add("Anchors", clause,
|
24
|
+
"The following reference is missing an anchor:\n"\
|
25
|
+
"#{clause.to_xml}")
|
26
|
+
return
|
27
|
+
end
|
28
|
+
@refids << id
|
29
|
+
validate_ref_dl1(bib, id, clause)
|
30
|
+
end
|
31
|
+
|
32
|
+
def validate_ref_dl1(bib, id, clause)
|
33
|
+
bib["title"] or
|
34
|
+
@log.add("Bibliography", clause, "Reference #{id} is missing a title")
|
35
|
+
bib["docid"] or
|
36
|
+
@log.add("Bibliography", clause,
|
37
|
+
"Reference #{id} is missing a document identifier (docid)")
|
38
|
+
end
|
39
|
+
|
40
|
+
def extract_from_p(tag, bib, key)
|
41
|
+
return unless bib[tag]
|
42
|
+
|
43
|
+
"<#{key}>#{bib[tag].at('p').children}</#{key}>"
|
44
|
+
end
|
45
|
+
|
46
|
+
# if the content is a single paragraph, replace it with its children
|
47
|
+
# single links replaced with uri
|
48
|
+
def p_unwrap(para)
|
49
|
+
elems = para.elements
|
50
|
+
if elems.size == 1 && elems[0].name == "p"
|
51
|
+
link_unwrap(elems[0]).children.to_xml.strip
|
52
|
+
else
|
53
|
+
para.to_xml.strip
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def link_unwrap(para)
|
58
|
+
elems = para.elements
|
59
|
+
if elems.size == 1 && elems[0].name == "link"
|
60
|
+
para.at("./link").replace(elems[0]["target"].strip)
|
61
|
+
end
|
62
|
+
para
|
63
|
+
end
|
64
|
+
|
65
|
+
def dd_bib_extract(dtd)
|
66
|
+
return nil if dtd.children.empty?
|
67
|
+
|
68
|
+
dtd.at("./dl") and return dl_bib_extract(dtd)
|
69
|
+
elems = dtd.remove.elements
|
70
|
+
return p_unwrap(dtd) unless elems.size == 1 &&
|
71
|
+
%w(ol ul).include?(elems[0].name)
|
72
|
+
|
73
|
+
elems[0].xpath("./li").each_with_object([]) do |li, ret|
|
74
|
+
ret << p_unwrap(li)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def add_to_hash(bib, key, val)
|
79
|
+
Metanorma::Utils::set_nested_value(bib, key.split("."), val)
|
80
|
+
end
|
81
|
+
|
82
|
+
# definition list, with at most one level of unordered lists
|
83
|
+
def dl_bib_extract(clause, nested = false)
|
84
|
+
dl = clause.at("./dl") or return
|
85
|
+
key = ""
|
86
|
+
bib = dl.xpath("./dt | ./dd").each_with_object({}) do |dtd, m|
|
87
|
+
(dtd.name == "dt" and key = dtd.text.sub(/:+$/, "")) or
|
88
|
+
add_to_hash(m, key, dd_bib_extract(dtd))
|
89
|
+
end
|
90
|
+
clause.xpath("./clause").each do |c1|
|
91
|
+
key = c1&.at("./title")&.text&.downcase&.strip
|
92
|
+
next unless %w(contributor relation series).include? key
|
93
|
+
|
94
|
+
add_to_hash(bib, key, dl_bib_extract(c1, true))
|
95
|
+
end
|
96
|
+
dl_bib_extract_title(bib, clause, nested)
|
97
|
+
end
|
98
|
+
|
99
|
+
def dl_bib_extract_title(bib, clause, nested)
|
100
|
+
(!nested && clause.at("./title")) or return bib
|
101
|
+
title = clause.at("./title").remove.children.to_xml
|
102
|
+
bib["title"] = [bib["title"]] if bib["title"].is_a?(Hash) ||
|
103
|
+
bib["title"].is_a?(String)
|
104
|
+
bib["title"] ||= []
|
105
|
+
bib["title"] << title if !title.empty?
|
106
|
+
bib
|
107
|
+
end
|
108
|
+
|
109
|
+
# ---
|
110
|
+
|
111
|
+
def formattedref_spans(xmldoc)
|
112
|
+
xmldoc.xpath("//bibitem[formattedref//span]").each do |b|
|
113
|
+
spans_to_bibitem(b, spans_preprocess(extract_content(b)))
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def extract_content(bib)
|
118
|
+
extract_docid(bib) + extract_spans(bib)
|
119
|
+
end
|
120
|
+
|
121
|
+
def extract_spans(bib)
|
122
|
+
bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
|
123
|
+
keys = s["class"].split(".", 2)
|
124
|
+
m << { key: keys[0], type: keys[1],
|
125
|
+
val: s.children.to_xml }
|
126
|
+
(s["class"] == "type" and s.remove) or s.replace(s.children)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def extract_docid(bib)
|
131
|
+
bib.xpath("./docidentifier").each_with_object([]) do |d, m|
|
132
|
+
m << { key: "docid", type: d["type"], val: d.text }
|
133
|
+
d.remove
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def spans_preprocess(spans)
|
138
|
+
ret = { contributor: [], docid: [], uri: [], date: [] }
|
139
|
+
spans.each do |s|
|
140
|
+
case s[:key]
|
141
|
+
when "uri", "docid"
|
142
|
+
ret[s[:key].to_sym] << { type: s[:type], val: s[:val] }
|
143
|
+
when "pubyear" then ret[:date] << { type: "published", val: s[:val] }
|
144
|
+
when "pubplace", "title", "type" then ret[s[:key].to_sym] = s[:val]
|
145
|
+
when "publisher"
|
146
|
+
ret[:contributor] << { role: "publisher", entity: "organization",
|
147
|
+
name: s[:val] }
|
148
|
+
when "surname", "initials", "givenname"
|
149
|
+
ret[:contributor] = spans_preprocess_contrib(s, ret[:contributor])
|
150
|
+
end
|
151
|
+
end
|
152
|
+
ret
|
153
|
+
end
|
154
|
+
|
155
|
+
def spans_preprocess_contrib(span, contrib)
|
156
|
+
spans_preprocess_new_contrib?(span, contrib) and
|
157
|
+
contrib << { role: span[:type] || "author", entity: "person" }
|
158
|
+
contrib[-1][span[:key].to_sym] = span[:val]
|
159
|
+
contrib
|
160
|
+
end
|
161
|
+
|
162
|
+
def spans_preprocess_new_contrib?(span, contrib)
|
163
|
+
contrib.empty? ||
|
164
|
+
(if span[:key] == "surname" then contrib[-1][:surname]
|
165
|
+
else (contrib[-1][:initials] || contrib[-1][:givenname])
|
166
|
+
end) ||
|
167
|
+
contrib[-1][:role] != (span[:type] || "author")
|
168
|
+
end
|
169
|
+
|
170
|
+
def spans_to_bibitem(bib, spans)
|
171
|
+
ret = ""
|
172
|
+
spans[:title] and ret += "<title>#{spans[:title]}</title>"
|
173
|
+
spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
|
174
|
+
spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
|
175
|
+
spans[:date].each { |s| ret += span_to_docid(s, "date") }
|
176
|
+
spans[:contributor].each { |s| ret += span_to_contrib(s) }
|
177
|
+
spans[:pubplace] and ret += "<place>#{spans[:place]}</place>"
|
178
|
+
spans[:type] and bib["type"] = spans[:type]
|
179
|
+
bib << ret
|
180
|
+
end
|
181
|
+
|
182
|
+
def span_to_docid(span, key)
|
183
|
+
if span[:type]
|
184
|
+
"<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
|
185
|
+
else
|
186
|
+
"<#{key}>#{span[:val]}</#{key}>"
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def span_to_contrib(span)
|
191
|
+
e = if span[:entity] == "organization"
|
192
|
+
"<organization><name>#{span[:name]}</name></organization>"
|
193
|
+
else
|
194
|
+
pre = (span[:initials] and
|
195
|
+
"<initial>#{span[:initials]}</initial>") ||
|
196
|
+
"<forename>#{span[:givenname]}</forename>"
|
197
|
+
"<person><name>#{pre}<surname>#{span[:surname]}</surname></name>"\
|
198
|
+
"</person>"
|
199
|
+
end
|
200
|
+
"<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
@@ -45,7 +45,8 @@ module Metanorma
|
|
45
45
|
|
46
46
|
def figure_dl_cleanup1(xmldoc)
|
47
47
|
q = "//figure/following-sibling::*[self::dl]"
|
48
|
-
|
48
|
+
q1 = "//figure/figure/following-sibling::*[self::dl]"
|
49
|
+
(xmldoc.xpath(q) - xmldoc.xpath(q1)).each do |s|
|
49
50
|
s["key"] == "true" and s.previous_element << s.remove
|
50
51
|
end
|
51
52
|
end
|
@@ -65,8 +66,9 @@ module Metanorma
|
|
65
66
|
# examples containing only figures become subfigures of figures
|
66
67
|
def subfigure_cleanup(xmldoc)
|
67
68
|
xmldoc.xpath("//example[figure]").each do |e|
|
68
|
-
next unless e.elements.
|
69
|
-
%w(name figure).include?
|
69
|
+
next unless e.elements.reject do |m|
|
70
|
+
%w(name figure index note).include?(m.name) ||
|
71
|
+
(m.name == "dl" && m["key"] == "true")
|
70
72
|
end.empty?
|
71
73
|
|
72
74
|
e.name = "figure"
|
@@ -84,9 +86,9 @@ module Metanorma
|
|
84
86
|
|
85
87
|
def figure_cleanup(xmldoc)
|
86
88
|
figure_footnote_cleanup(xmldoc)
|
89
|
+
subfigure_cleanup(xmldoc)
|
87
90
|
figure_dl_cleanup1(xmldoc)
|
88
91
|
figure_dl_cleanup2(xmldoc)
|
89
|
-
subfigure_cleanup(xmldoc)
|
90
92
|
single_subfigure_cleanup(xmldoc)
|
91
93
|
end
|
92
94
|
|
@@ -178,6 +180,46 @@ module Metanorma
|
|
178
180
|
end
|
179
181
|
end
|
180
182
|
end
|
183
|
+
|
184
|
+
def block_index_cleanup(xmldoc)
|
185
|
+
xmldoc.xpath("//quote | //td | //th | //formula | //li | //dt | "\
|
186
|
+
"//dd | //example | //note | //figure | //sourcecode | "\
|
187
|
+
"//admonition | //termnote | //termexample | //form | "\
|
188
|
+
"//requirement | //recommendation | //permission | "\
|
189
|
+
"//imagemap | //svgmap").each do |b|
|
190
|
+
b.xpath("./p[indexterm]").each do |p|
|
191
|
+
indexterm_para?(p) or next
|
192
|
+
p.replace(p.children)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def indexterm_para?(para)
|
198
|
+
p = para.dup
|
199
|
+
p.xpath("./index").each(&:remove)
|
200
|
+
p.text.strip.empty?
|
201
|
+
end
|
202
|
+
|
203
|
+
def include_indexterm?(elem)
|
204
|
+
return false if elem.nil?
|
205
|
+
|
206
|
+
!%w(image literal sourcecode).include?(elem.name)
|
207
|
+
end
|
208
|
+
|
209
|
+
def para_index_cleanup(xmldoc)
|
210
|
+
xmldoc.xpath("//p[index]").select { |p| indexterm_para?(p) }
|
211
|
+
.each do |p|
|
212
|
+
para_index_cleanup1(p, p.previous_element, p.next_element)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
def para_index_cleanup1(para, prev, foll)
|
217
|
+
if include_indexterm?(prev)
|
218
|
+
prev << para.remove.children
|
219
|
+
elsif include_indexterm?(foll) && !foll.children.empty?
|
220
|
+
foll.children.first.previous = para.remove.children
|
221
|
+
end
|
222
|
+
end
|
181
223
|
end
|
182
224
|
end
|
183
225
|
end
|
@@ -1,24 +1,11 @@
|
|
1
|
-
require "nokogiri"
|
2
|
-
require "pathname"
|
3
|
-
require "html2doc"
|
4
1
|
require "asciimath2unitsml"
|
5
|
-
require_relative "./cleanup_block"
|
6
|
-
require_relative "./cleanup_footnotes"
|
7
|
-
require_relative "./cleanup_ref"
|
8
|
-
require_relative "./cleanup_ref_dl"
|
9
|
-
require_relative "./cleanup_boilerplate"
|
10
|
-
require_relative "./cleanup_section"
|
11
|
-
require_relative "./cleanup_terms"
|
12
|
-
require_relative "./cleanup_inline"
|
13
|
-
require_relative "./cleanup_amend"
|
14
|
-
require "relaton_iev"
|
15
2
|
|
16
3
|
module Metanorma
|
17
4
|
module Standoc
|
18
5
|
module Cleanup
|
19
6
|
def asciimath2mathml(text)
|
20
7
|
text = text.gsub(%r{<stem type="AsciiMath">(.+?)</stem>}m) do
|
21
|
-
"<amathstem>#{
|
8
|
+
"<amathstem>#{@c.decode($1)}</amathstem>"
|
22
9
|
end
|
23
10
|
text = Html2Doc.new({})
|
24
11
|
.asciimath_to_mathml(text, ["<amathstem>", "</amathstem>"])
|
@@ -60,7 +47,7 @@ module Metanorma
|
|
60
47
|
def mathml_italicise(xml)
|
61
48
|
xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
|
62
49
|
"m" => MATHML_NS).each do |i|
|
63
|
-
char =
|
50
|
+
char = @c.decode(i.text)
|
64
51
|
i["mathvariant"] = "normal" if mi_italicise?(char)
|
65
52
|
end
|
66
53
|
end
|
@@ -153,29 +153,28 @@ module Metanorma
|
|
153
153
|
|
154
154
|
def reference_names(xmldoc)
|
155
155
|
xmldoc.xpath("//bibitem[not(ancestor::bibitem)]").each do |ref|
|
156
|
-
docid = ref
|
157
|
-
ref.at("./docidentifier[@primary = 'true'][@language = '#{@lang}']") ||
|
158
|
-
ref.at("./docidentifier[@primary = 'true'][not(@language)]") ||
|
159
|
-
ref.at("./docidentifier[@primary = 'true']") ||
|
160
|
-
ref.at("./docidentifier[not(@type = 'DOI')][@language = '#{@lang}']") ||
|
161
|
-
ref.at("./docidentifier[not(@type = 'DOI')][not(@language)]") ||
|
162
|
-
ref.at("./docidentifier[not(@type = 'DOI')]") or next
|
156
|
+
docid = select_docid(ref) or next
|
163
157
|
reference = format_ref(docid.children.to_xml, docid["type"])
|
164
158
|
@anchors[ref["id"]] = { xref: reference }
|
165
159
|
end
|
166
160
|
end
|
167
161
|
|
162
|
+
def select_docid(ref)
|
163
|
+
ref.at("./docidentifier[@type = 'metanorma']") ||
|
164
|
+
ref.at("./docidentifier[@primary = 'true'][@language = '#{@lang}']") ||
|
165
|
+
ref.at("./docidentifier[@primary = 'true'][not(@language)]") ||
|
166
|
+
ref.at("./docidentifier[@primary = 'true']") ||
|
167
|
+
ref.at("./docidentifier[not(@type = 'DOI')][@language = '#{@lang}']") ||
|
168
|
+
ref.at("./docidentifier[not(@type = 'DOI')][not(@language)]") ||
|
169
|
+
ref.at("./docidentifier[not(@type = 'DOI')]")
|
170
|
+
end
|
171
|
+
|
168
172
|
def fetch_termbase(_termbase, _id)
|
169
173
|
""
|
170
174
|
end
|
171
175
|
|
172
176
|
def read_local_bibitem(uri)
|
173
|
-
|
174
|
-
|
175
|
-
file = "#{@localdir}#{uri}.rxl"
|
176
|
-
File.file?(file) or file = "#{@localdir}#{uri}.xml"
|
177
|
-
File.file?(file) or return nil
|
178
|
-
xml = Nokogiri::XML(File.read(file, encoding: "utf-8"))
|
177
|
+
xml = read_local_bibitem_file(uri) or return nil
|
179
178
|
ret = xml.at("//*[local-name() = 'bibdata']") or return nil
|
180
179
|
ret = Nokogiri::XML(ret.to_xml
|
181
180
|
.sub(%r{(<bibdata[^>]*?) xmlns=("[^"]+"|'[^']+')}, "\\1")).root
|
@@ -186,6 +185,15 @@ module Metanorma
|
|
186
185
|
ret
|
187
186
|
end
|
188
187
|
|
188
|
+
def read_local_bibitem_file(uri)
|
189
|
+
return nil if %r{^https?://}.match?(uri)
|
190
|
+
|
191
|
+
file = "#{@localdir}#{uri}.rxl"
|
192
|
+
File.file?(file) or file = "#{@localdir}#{uri}.xml"
|
193
|
+
File.file?(file) or return nil
|
194
|
+
Nokogiri::XML(File.read(file, encoding: "utf-8"))
|
195
|
+
end
|
196
|
+
|
189
197
|
# if citation uri points to local file, get bibitem from it
|
190
198
|
def fetch_local_bibitem(xmldoc)
|
191
199
|
xmldoc.xpath("//bibitem[formattedref][uri[@type = 'citation']]")
|
@@ -206,6 +214,7 @@ module Metanorma
|
|
206
214
|
def bibitem_cleanup(xmldoc)
|
207
215
|
bibitem_nested_id(xmldoc)
|
208
216
|
ref_dl_cleanup(xmldoc)
|
217
|
+
formattedref_spans(xmldoc)
|
209
218
|
fetch_local_bibitem(xmldoc)
|
210
219
|
end
|
211
220
|
end
|
@@ -5,10 +5,19 @@ module Metanorma
|
|
5
5
|
requirement_metadata(xmldoc)
|
6
6
|
requirement_inherit(xmldoc)
|
7
7
|
requirement_descriptions(xmldoc)
|
8
|
+
requirement_identifier(xmldoc)
|
8
9
|
end
|
9
10
|
|
10
11
|
REQRECPER = "//requirement | //recommendation | //permission".freeze
|
11
12
|
|
13
|
+
def requirement_identifier(xmldoc)
|
14
|
+
xmldoc.xpath(REQRECPER).each do |r|
|
15
|
+
r.xpath("./identifier[link] | ./inherit[link]").each do |i|
|
16
|
+
i.children = i.at("./link/@target").text
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
12
21
|
def requirement_inherit(xmldoc)
|
13
22
|
xmldoc.xpath(REQRECPER).each do |r|
|
14
23
|
ins = requirement_inherit_insert(r)
|
@@ -76,8 +85,16 @@ module Metanorma
|
|
76
85
|
end
|
77
86
|
end
|
78
87
|
|
88
|
+
def requirement_metadata1_attrs
|
89
|
+
%w(obligation model type)
|
90
|
+
end
|
91
|
+
|
79
92
|
def requirement_metadata1_tags
|
80
|
-
%w(
|
93
|
+
%w(identifier subject inherit)
|
94
|
+
end
|
95
|
+
|
96
|
+
def requirement_metadata_component_tags
|
97
|
+
[]
|
81
98
|
end
|
82
99
|
|
83
100
|
def requirement_metadata1(reqt, dlist, ins)
|
@@ -85,20 +102,21 @@ module Metanorma
|
|
85
102
|
reqt.children.first.previous = " "
|
86
103
|
ins = reqt.children.first
|
87
104
|
end
|
88
|
-
|
105
|
+
requirement_metadata1_attrs.each do |a|
|
89
106
|
dl_to_attrs(reqt, dlist, a)
|
90
107
|
end
|
91
108
|
requirement_metadata1_tags.each do |a|
|
92
109
|
ins = dl_to_elems(ins, reqt, dlist, a)
|
93
110
|
end
|
94
|
-
reqt_dl_to_classif(ins, reqt, dlist)
|
111
|
+
ins = reqt_dl_to_classif(ins, reqt, dlist)
|
112
|
+
reqt_dl_to_classif1(ins, reqt, dlist)
|
95
113
|
end
|
96
114
|
|
97
115
|
def reqt_dl_to_classif(ins, reqt, dlist)
|
98
116
|
if a = reqt.at("./classification[last()]") then ins = a end
|
99
117
|
dlist.xpath("./dt[text()='classification']").each do |e|
|
100
118
|
val = e.at("./following::dd/p") || e.at("./following::dd")
|
101
|
-
req_classif_parse(val.
|
119
|
+
req_classif_parse(val.children.to_xml).each do |r|
|
102
120
|
ins.next = "<classification><tag>#{r[0]}</tag>"\
|
103
121
|
"<value>#{r[1]}</value></classification>"
|
104
122
|
ins = ins.next
|
@@ -106,6 +124,21 @@ module Metanorma
|
|
106
124
|
end
|
107
125
|
ins
|
108
126
|
end
|
127
|
+
|
128
|
+
def reqt_dl_to_classif1(ins, reqt, dlist)
|
129
|
+
if a = reqt.at("./classification[last()]") then ins = a end
|
130
|
+
dlist.xpath("./dt").each do |e|
|
131
|
+
next if (requirement_metadata1_attrs + requirement_metadata1_tags +
|
132
|
+
requirement_metadata_component_tags + %w(classification))
|
133
|
+
.include?(e.text)
|
134
|
+
|
135
|
+
val = e.at("./following::dd/p") || e.at("./following::dd")
|
136
|
+
ins.next = "<classification><tag>#{e.text}</tag>"\
|
137
|
+
"<value>#{val.children.to_xml}</value></classification>"
|
138
|
+
ins = ins.next
|
139
|
+
end
|
140
|
+
ins
|
141
|
+
end
|
109
142
|
end
|
110
143
|
end
|
111
144
|
end
|
@@ -11,7 +11,7 @@ module Metanorma
|
|
11
11
|
n.replace(grkletters(MathML2AsciiMath.m2a(n.to_xml)))
|
12
12
|
end
|
13
13
|
ret = Nokogiri::XML(key.to_xml)
|
14
|
-
|
14
|
+
@c.decode(ret.text.downcase)
|
15
15
|
.gsub(/[\[\]{}<>()]/, "").gsub(/\s/m, "")
|
16
16
|
.gsub(/[[:punct:]]|[_^]/, ":\\0").gsub(/`/, "")
|
17
17
|
.gsub(/[0-9]+/, "þ\\0")
|
@@ -100,9 +100,13 @@ module Metanorma
|
|
100
100
|
end
|
101
101
|
end
|
102
102
|
|
103
|
+
def termlookup_cleanup(xmldoc)
|
104
|
+
Metanorma::Standoc::TermLookupCleanup.new(xmldoc, @log).call
|
105
|
+
end
|
106
|
+
|
103
107
|
def termdef_cleanup(xmldoc)
|
104
108
|
termdef_unnest_cleanup(xmldoc)
|
105
|
-
|
109
|
+
termlookup_cleanup(xmldoc)
|
106
110
|
term_nonverbal_designations(xmldoc)
|
107
111
|
term_dl_to_metadata(xmldoc)
|
108
112
|
term_termsource_to_designation(xmldoc)
|
@@ -117,7 +121,7 @@ module Metanorma
|
|
117
121
|
termdocsource_cleanup(xmldoc)
|
118
122
|
end
|
119
123
|
|
120
|
-
def
|
124
|
+
def term_index_cleanup(xmldoc)
|
121
125
|
return unless @index_terms
|
122
126
|
|
123
127
|
xmldoc.xpath("//preferred").each do |p|
|
@@ -7,7 +7,7 @@ module Metanorma
|
|
7
7
|
text = text.gsub(/\s+<fn /, "<fn ")
|
8
8
|
%w(passthrough passthrough-inline).each do |v|
|
9
9
|
text.gsub!(%r{<#{v}\s+formats="metanorma">([^<]*)
|
10
|
-
</#{v}>}mx) {
|
10
|
+
</#{v}>}mx) { @c.decode($1) }
|
11
11
|
end
|
12
12
|
text
|
13
13
|
end
|
@@ -34,11 +34,13 @@ module Metanorma
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
+
IGNORE_QUOTES_ELEMENTS =
|
38
|
+
%w(pre tt sourcecode stem figure bibdata passthrough identifier).freeze
|
39
|
+
|
37
40
|
def uninterrupt_quotes_around_xml_skip(elem)
|
38
41
|
!(/\A['"]/.match?(elem.text) &&
|
39
|
-
elem.previous.
|
40
|
-
|
41
|
-
.empty? &&
|
42
|
+
elem.previous.path.split(%r{/})[1..-2]
|
43
|
+
.intersection(IGNORE_QUOTES_ELEMENTS).empty? &&
|
42
44
|
((elem.previous.text.strip.empty? &&
|
43
45
|
!empty_tag_with_text_content?(elem.previous)) ||
|
44
46
|
elem.previous.name == "index"))
|
@@ -49,7 +51,7 @@ module Metanorma
|
|
49
51
|
/\S\Z/.match?(prev.text) or return
|
50
52
|
foll = elem.at(".//following::text()[1]")
|
51
53
|
m = /\A(["'][[:punct:]]*)(\s|\Z)/
|
52
|
-
.match(
|
54
|
+
.match(@c.decode(foll&.text)) or return
|
53
55
|
foll.content = foll.text.sub(/\A(["'][[:punct:]]*)/, "")
|
54
56
|
prev.content = "#{prev.text}#{m[1]}"
|
55
57
|
end
|
@@ -74,10 +76,10 @@ module Metanorma
|
|
74
76
|
empty_tag_with_text_content?(x) and prev = "dummy"
|
75
77
|
next unless x.text?
|
76
78
|
|
77
|
-
x.
|
78
|
-
|
79
|
+
ancestors = x.path.split(%r{/})[1..-2]
|
80
|
+
ancestors.intersection(IGNORE_QUOTES_ELEMENTS).empty? and
|
79
81
|
dumb2smart_quotes1(x, prev)
|
80
|
-
prev = x.text
|
82
|
+
prev = x.text unless ancestors.include?("index")
|
81
83
|
end
|
82
84
|
end
|
83
85
|
|
@@ -68,10 +68,9 @@ module Metanorma
|
|
68
68
|
end
|
69
69
|
|
70
70
|
def xref_to_eref(elem)
|
71
|
-
c = HTMLEntities.new
|
72
71
|
elem["bibitemid"] = elem["target"]
|
73
72
|
if ref = @anchors&.dig(elem["target"], :xref)
|
74
|
-
elem["citeas"] = c.decode(ref)
|
73
|
+
elem["citeas"] = @c.decode(ref)
|
75
74
|
else
|
76
75
|
elem["citeas"] = ""
|
77
76
|
xref_to_eref1(elem)
|
@@ -29,6 +29,7 @@ module Metanorma
|
|
29
29
|
preprocessor Metanorma::Plugin::Lutaml::LutamlUmlAttributesTablePreprocessor
|
30
30
|
preprocessor Metanorma::Plugin::Lutaml::LutamlUmlDatamodelDescriptionPreprocessor
|
31
31
|
inline_macro Metanorma::Standoc::PreferredTermInlineMacro
|
32
|
+
inline_macro Metanorma::Standoc::SpanInlineMacro
|
32
33
|
inline_macro Metanorma::Standoc::AltTermInlineMacro
|
33
34
|
inline_macro Metanorma::Standoc::AdmittedTermInlineMacro
|
34
35
|
inline_macro Metanorma::Standoc::DeprecatedTermInlineMacro
|
@@ -93,6 +94,7 @@ module Metanorma
|
|
93
94
|
basebackend "html"
|
94
95
|
outfilesuffix ".xml"
|
95
96
|
@libdir = File.dirname(self.class::_file || __FILE__)
|
97
|
+
@c = HTMLEntities.new
|
96
98
|
end
|
97
99
|
|
98
100
|
class << self
|