metanorma-standoc 2.1.5 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/metanorma/standoc/base.rb +13 -0
- data/lib/metanorma/standoc/blocks.rb +26 -17
- data/lib/metanorma/standoc/cleanup.rb +1 -1
- data/lib/metanorma/standoc/cleanup_biblio.rb +210 -0
- data/lib/metanorma/standoc/cleanup_block.rb +6 -4
- data/lib/metanorma/standoc/cleanup_maths.rb +2 -15
- data/lib/metanorma/standoc/cleanup_ref.rb +22 -13
- data/lib/metanorma/standoc/cleanup_reqt.rb +3 -103
- data/lib/metanorma/standoc/cleanup_symbols.rb +1 -1
- data/lib/metanorma/standoc/cleanup_text.rb +10 -8
- data/lib/metanorma/standoc/cleanup_xref.rb +1 -2
- data/lib/metanorma/standoc/converter.rb +2 -0
- data/lib/metanorma/standoc/front.rb +1 -1
- data/lib/metanorma/standoc/front_contributor.rb +0 -10
- data/lib/metanorma/standoc/inline.rb +8 -4
- data/lib/metanorma/standoc/isodoc.rng +6 -1
- data/lib/metanorma/standoc/macros.rb +1 -180
- data/lib/metanorma/standoc/macros_inline.rb +194 -0
- data/lib/metanorma/standoc/ref_sect.rb +2 -2
- data/lib/metanorma/standoc/ref_utility.rb +1 -1
- data/lib/metanorma/standoc/reqt.rb +19 -75
- data/lib/metanorma/standoc/reqt.rng +1 -1
- data/lib/metanorma/standoc/section.rb +35 -3
- data/lib/metanorma/standoc/utils.rb +9 -43
- data/lib/metanorma/standoc/validate.rb +1 -69
- data/lib/metanorma/standoc/validate_table.rb +91 -0
- data/lib/metanorma/standoc/version.rb +1 -1
- data/metanorma-standoc.gemspec +4 -5
- data/spec/metanorma/{refs_dl_spec.rb → biblio_spec.rb} +90 -7
- data/spec/metanorma/blocks_spec.rb +31 -267
- data/spec/metanorma/cleanup_blocks_spec.rb +0 -171
- data/spec/metanorma/inline_spec.rb +4 -0
- data/spec/metanorma/macros_concept_spec.rb +1033 -0
- data/spec/metanorma/macros_spec.rb +2 -1030
- data/spec/metanorma/refs_spec.rb +0 -2
- data/spec/metanorma/reqt_spec.rb +130 -0
- data/spec/metanorma/section_spec.rb +5 -0
- data/spec/metanorma/validate_spec.rb +46 -6
- data/spec/vcr_cassettes/bsi16341.yml +80 -52
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +94 -94
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +12 -12
- data/spec/vcr_cassettes/hide_refs.yml +70 -70
- data/spec/vcr_cassettes/isobib_get_123.yml +11 -11
- data/spec/vcr_cassettes/isobib_get_123_1.yml +23 -23
- data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +35 -35
- data/spec/vcr_cassettes/isobib_get_123_2.yml +22 -22
- data/spec/vcr_cassettes/isobib_get_123_2001.yml +13 -13
- data/spec/vcr_cassettes/isobib_get_124.yml +11 -11
- data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +34 -64
- data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +45 -45
- data/spec/vcr_cassettes/std-link.yml +12 -12
- metadata +15 -11
- data/lib/metanorma/standoc/cleanup_ref_dl.rb +0 -113
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 837bde4e0b1dcb3e1eeb5ffa7c56d063456f4a4cb3666eb00c18aaf6cae7d164
|
4
|
+
data.tar.gz: 69dcdc35a59b471bb2c6ac3cfd52a84b15625a45402c66f83c9e1ec70296b4f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 88ff22adce3bd32d0c2875acf926520a4950beb6e36e3b7e70cd7ffce68c75869fac1e6449ea0b975d2c87a2cce1b2d0e2898f7d7c29d30065abf6708df863f8
|
7
|
+
data.tar.gz: 86493fb18c13786a09a019a9a184684d90c19957c4154f01f13acd4d4b5cb1e7999cc8f0d0413f0b10ea870e676ab2bbe3e78cacc848a9799e0f9b5189d1efe3
|
@@ -9,6 +9,7 @@ require "fileutils"
|
|
9
9
|
require "metanorma-utils"
|
10
10
|
require "isodoc/xslfo_convert"
|
11
11
|
require_relative "render"
|
12
|
+
require "mn-requirements"
|
12
13
|
|
13
14
|
module Metanorma
|
14
15
|
module Standoc
|
@@ -29,6 +30,7 @@ module Metanorma
|
|
29
30
|
init_vars
|
30
31
|
init_misc(node)
|
31
32
|
init_processing(node)
|
33
|
+
init_reqt(node)
|
32
34
|
init_toc(node)
|
33
35
|
init_output(node)
|
34
36
|
init_i18n(node)
|
@@ -42,7 +44,10 @@ module Metanorma
|
|
42
44
|
@anchors = {}
|
43
45
|
@internal_eref_namespaces = []
|
44
46
|
@seen_headers = []
|
47
|
+
@seen_headers_canonical = []
|
45
48
|
@embed_hdr = []
|
49
|
+
@reqt_model = nil
|
50
|
+
@preface = true
|
46
51
|
end
|
47
52
|
|
48
53
|
def init_misc(node)
|
@@ -50,6 +55,7 @@ module Metanorma
|
|
50
55
|
@index_terms = node.attr("index-terms")
|
51
56
|
@boilerplateauthority = node.attr("boilerplate-authority")
|
52
57
|
@embed_hdr = node.attr("embed_hdr")
|
58
|
+
@document_scheme = node.attr("document-scheme")
|
53
59
|
end
|
54
60
|
|
55
61
|
def init_processing(node)
|
@@ -62,6 +68,13 @@ module Metanorma
|
|
62
68
|
@datauriimage = node.attr("data-uri-image") != "false"
|
63
69
|
end
|
64
70
|
|
71
|
+
def init_reqt(node)
|
72
|
+
@default_requirement_model = (node.attr("requirements-model") ||
|
73
|
+
default_requirement_model)
|
74
|
+
@reqt_models = Metanorma::Requirements
|
75
|
+
.new({ default: @default_requirement_model })
|
76
|
+
end
|
77
|
+
|
65
78
|
def init_toc(node)
|
66
79
|
@htmltoclevels = node.attr("htmltoclevels")
|
67
80
|
@doctoclevels = node.attr("doctoclevels")
|
@@ -34,7 +34,7 @@ module Metanorma
|
|
34
34
|
# We append each contained block to its parent
|
35
35
|
def open(node)
|
36
36
|
role = node.role || node.attr("style")
|
37
|
-
reqt_subpart(role) and return requirement_subpart(node)
|
37
|
+
reqt_subpart?(role) and return requirement_subpart(node)
|
38
38
|
role == "form" and return form(node)
|
39
39
|
role == "definition" and return termdefinition(node)
|
40
40
|
result = []
|
@@ -88,26 +88,37 @@ module Metanorma
|
|
88
88
|
end
|
89
89
|
|
90
90
|
def example(node)
|
91
|
-
|
92
|
-
|
91
|
+
(in_terms? || node.option?("termexample")) and return term_example(node)
|
93
92
|
role = node.role || node.attr("style")
|
94
|
-
|
95
|
-
|
96
|
-
return
|
97
|
-
return svgmap_example(node) if role == "svgmap"
|
98
|
-
return form(node) if role == "form"
|
99
|
-
return termdefinition(node) if role == "definition"
|
100
|
-
return figure_example(node) if role == "figure"
|
101
|
-
|
102
|
-
reqt_subpart(role) and return requirement_subpart(node)
|
93
|
+
ret = example_to_requirement(node, role) ||
|
94
|
+
example_by_role(node, role) and return ret
|
95
|
+
reqt_subpart?(role) and return requirement_subpart(node)
|
103
96
|
example_proper(node)
|
104
97
|
end
|
105
98
|
|
99
|
+
def example_by_role(node, role)
|
100
|
+
case role
|
101
|
+
when "pseudocode" then pseudocode_example(node)
|
102
|
+
when "svgmap" then svgmap_example(node)
|
103
|
+
when "form" then form(node)
|
104
|
+
when "definition" then termdefinition(node)
|
105
|
+
when "figure" then figure_example(node)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def example_to_requirement(node, role)
|
110
|
+
return unless @reqt_models.requirement_roles.key?(role&.to_sym)
|
111
|
+
|
112
|
+
# need to call here for proper recursion ordering
|
113
|
+
select_requirement_model(node)
|
114
|
+
requirement(node,
|
115
|
+
@reqt_models.requirement_roles[role.to_sym], role)
|
116
|
+
end
|
117
|
+
|
106
118
|
def svgmap_attrs(node)
|
107
119
|
attr_code(id_attr(node)
|
108
|
-
.merge(id: node.id,
|
120
|
+
.merge(id: node.id, number: node.attr("number"),
|
109
121
|
unnumbered: node.option?("unnumbered") ? "true" : nil,
|
110
|
-
number: node.attr("number"),
|
111
122
|
subsequence: node.attr("subsequence"))
|
112
123
|
.merge(keep_attrs(node)))
|
113
124
|
end
|
@@ -225,7 +236,6 @@ module Metanorma
|
|
225
236
|
filename: node.attr("filename"))))
|
226
237
|
end
|
227
238
|
|
228
|
-
# NOTE: html escaping is performed by Nokogiri
|
229
239
|
def listing(node)
|
230
240
|
fragment = ::Nokogiri::XML::Builder.new do |xml|
|
231
241
|
xml.sourcecode **listing_attrs(node) do |s|
|
@@ -238,11 +248,10 @@ module Metanorma
|
|
238
248
|
end
|
239
249
|
|
240
250
|
def pass(node)
|
241
|
-
c = HTMLEntities.new
|
242
251
|
noko do |xml|
|
243
252
|
xml.passthrough **attr_code(formats:
|
244
253
|
node.attr("format") || "metanorma") do |p|
|
245
|
-
p << c.encode(c.decode(node.content), :basic, :hexadecimal)
|
254
|
+
p << @c.encode(@c.decode(node.content), :basic, :hexadecimal)
|
246
255
|
end
|
247
256
|
end
|
248
257
|
end
|
@@ -5,7 +5,7 @@ require_relative "./cleanup_block"
|
|
5
5
|
require_relative "./cleanup_table"
|
6
6
|
require_relative "./cleanup_footnotes"
|
7
7
|
require_relative "./cleanup_ref"
|
8
|
-
require_relative "./
|
8
|
+
require_relative "./cleanup_biblio"
|
9
9
|
require_relative "./cleanup_boilerplate"
|
10
10
|
require_relative "./cleanup_section"
|
11
11
|
require_relative "./cleanup_terms"
|
@@ -0,0 +1,210 @@
|
|
1
|
+
require "set"
|
2
|
+
require "relaton_bib"
|
3
|
+
|
4
|
+
module Metanorma
|
5
|
+
module Standoc
|
6
|
+
module Cleanup
|
7
|
+
def ref_dl_cleanup(xmldoc)
|
8
|
+
xmldoc.xpath("//clause[@bibitem = 'true']").each do |c|
|
9
|
+
bib = dl_bib_extract(c) or next
|
10
|
+
validate_ref_dl(bib, c)
|
11
|
+
bibitemxml = RelatonBib::BibliographicItem.from_hash(bib).to_xml or next
|
12
|
+
bibitem = Nokogiri::XML(bibitemxml)
|
13
|
+
bibitem.root["id"] = c["id"] if c["id"] && !/^_/.match(c["id"])
|
14
|
+
c.replace(bibitem.root)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# do not accept implicit id
|
19
|
+
def validate_ref_dl(bib, clause)
|
20
|
+
id = bib["id"]
|
21
|
+
id ||= clause["id"] unless /^_/.match?(clause["id"])
|
22
|
+
unless id
|
23
|
+
@log.add("Anchors", clause,
|
24
|
+
"The following reference is missing an anchor:\n"\
|
25
|
+
"#{clause.to_xml}")
|
26
|
+
return
|
27
|
+
end
|
28
|
+
@refids << id
|
29
|
+
validate_ref_dl1(bib, id, clause)
|
30
|
+
end
|
31
|
+
|
32
|
+
def validate_ref_dl1(bib, id, clause)
|
33
|
+
bib["title"] or
|
34
|
+
@log.add("Bibliography", clause, "Reference #{id} is missing a title")
|
35
|
+
bib["docid"] or
|
36
|
+
@log.add("Bibliography", clause,
|
37
|
+
"Reference #{id} is missing a document identifier (docid)")
|
38
|
+
end
|
39
|
+
|
40
|
+
def extract_from_p(tag, bib, key)
|
41
|
+
return unless bib[tag]
|
42
|
+
|
43
|
+
"<#{key}>#{bib[tag].at('p').children}</#{key}>"
|
44
|
+
end
|
45
|
+
|
46
|
+
# if the content is a single paragraph, replace it with its children
|
47
|
+
# single links replaced with uri
|
48
|
+
def p_unwrap(para)
|
49
|
+
elems = para.elements
|
50
|
+
if elems.size == 1 && elems[0].name == "p"
|
51
|
+
link_unwrap(elems[0]).children.to_xml.strip
|
52
|
+
else
|
53
|
+
para.to_xml.strip
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def link_unwrap(para)
|
58
|
+
elems = para.elements
|
59
|
+
if elems.size == 1 && elems[0].name == "link"
|
60
|
+
para.at("./link").replace(elems[0]["target"].strip)
|
61
|
+
end
|
62
|
+
para
|
63
|
+
end
|
64
|
+
|
65
|
+
def dd_bib_extract(dtd)
|
66
|
+
return nil if dtd.children.empty?
|
67
|
+
|
68
|
+
dtd.at("./dl") and return dl_bib_extract(dtd)
|
69
|
+
elems = dtd.remove.elements
|
70
|
+
return p_unwrap(dtd) unless elems.size == 1 &&
|
71
|
+
%w(ol ul).include?(elems[0].name)
|
72
|
+
|
73
|
+
elems[0].xpath("./li").each_with_object([]) do |li, ret|
|
74
|
+
ret << p_unwrap(li)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def add_to_hash(bib, key, val)
|
79
|
+
Metanorma::Utils::set_nested_value(bib, key.split("."), val)
|
80
|
+
end
|
81
|
+
|
82
|
+
# definition list, with at most one level of unordered lists
|
83
|
+
def dl_bib_extract(clause, nested = false)
|
84
|
+
dl = clause.at("./dl") or return
|
85
|
+
key = ""
|
86
|
+
bib = dl.xpath("./dt | ./dd").each_with_object({}) do |dtd, m|
|
87
|
+
(dtd.name == "dt" and key = dtd.text.sub(/:+$/, "")) or
|
88
|
+
add_to_hash(m, key, dd_bib_extract(dtd))
|
89
|
+
end
|
90
|
+
clause.xpath("./clause").each do |c1|
|
91
|
+
key = c1&.at("./title")&.text&.downcase&.strip
|
92
|
+
next unless %w(contributor relation series).include? key
|
93
|
+
|
94
|
+
add_to_hash(bib, key, dl_bib_extract(c1, true))
|
95
|
+
end
|
96
|
+
dl_bib_extract_title(bib, clause, nested)
|
97
|
+
end
|
98
|
+
|
99
|
+
def dl_bib_extract_title(bib, clause, nested)
|
100
|
+
(!nested && clause.at("./title")) or return bib
|
101
|
+
title = clause.at("./title").remove.children.to_xml
|
102
|
+
bib["title"] = [bib["title"]] if bib["title"].is_a?(Hash) ||
|
103
|
+
bib["title"].is_a?(String)
|
104
|
+
bib["title"] ||= []
|
105
|
+
bib["title"] << title if !title.empty?
|
106
|
+
bib
|
107
|
+
end
|
108
|
+
|
109
|
+
# ---
|
110
|
+
|
111
|
+
def formattedref_spans(xmldoc)
|
112
|
+
xmldoc.xpath("//bibitem[formattedref//span]").each do |b|
|
113
|
+
spans_to_bibitem(b, spans_preprocess(extract_content(b)))
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def extract_content(bib)
|
118
|
+
extract_docid(bib) + extract_spans(bib)
|
119
|
+
end
|
120
|
+
|
121
|
+
def extract_spans(bib)
|
122
|
+
bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
|
123
|
+
keys = s["class"].split(".", 2)
|
124
|
+
m << { key: keys[0], type: keys[1],
|
125
|
+
val: s.children.to_xml }
|
126
|
+
(s["class"] == "type" and s.remove) or s.replace(s.children)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def extract_docid(bib)
|
131
|
+
bib.xpath("./docidentifier").each_with_object([]) do |d, m|
|
132
|
+
m << { key: "docid", type: d["type"], val: d.text }
|
133
|
+
d.remove
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def spans_preprocess(spans)
|
138
|
+
ret = { contributor: [], docid: [], uri: [], date: [] }
|
139
|
+
spans.each do |s|
|
140
|
+
case s[:key]
|
141
|
+
when "uri", "docid"
|
142
|
+
ret[s[:key].to_sym] << { type: s[:type], val: s[:val] }
|
143
|
+
when "pubyear" then ret[:date] << { type: "published", val: s[:val] }
|
144
|
+
when "pubplace", "title", "type" then ret[s[:key].to_sym] = s[:val]
|
145
|
+
when "publisher"
|
146
|
+
ret[:contributor] << { role: "publisher", entity: "organization",
|
147
|
+
name: s[:val] }
|
148
|
+
when "surname", "initials", "givenname", "formatted-initials"
|
149
|
+
ret[:contributor] = spans_preprocess_contrib(s, ret[:contributor])
|
150
|
+
end
|
151
|
+
end
|
152
|
+
ret
|
153
|
+
end
|
154
|
+
|
155
|
+
def spans_preprocess_contrib(span, contrib)
|
156
|
+
span[:key] = "formatted-initials" if span[:key] == "initials"
|
157
|
+
|
158
|
+
spans_preprocess_new_contrib?(span, contrib) and
|
159
|
+
contrib << { role: span[:type] || "author", entity: "person" }
|
160
|
+
contrib[-1][span[:key].to_sym] = span[:val]
|
161
|
+
contrib
|
162
|
+
end
|
163
|
+
|
164
|
+
def spans_preprocess_new_contrib?(span, contrib)
|
165
|
+
contrib.empty? ||
|
166
|
+
(if span[:key] == "surname" then contrib[-1][:surname]
|
167
|
+
else (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])
|
168
|
+
end) ||
|
169
|
+
contrib[-1][:role] != (span[:type] || "author")
|
170
|
+
end
|
171
|
+
|
172
|
+
def spans_to_bibitem(bib, spans)
|
173
|
+
ret = ""
|
174
|
+
spans[:title] and ret += "<title>#{spans[:title]}</title>"
|
175
|
+
spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
|
176
|
+
spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
|
177
|
+
spans[:date].each { |s| ret += span_to_docid(s, "date") }
|
178
|
+
spans[:contributor].each { |s| ret += span_to_contrib(s) }
|
179
|
+
spans[:pubplace] and ret += "<place>#{spans[:place]}</place>"
|
180
|
+
spans[:type] and bib["type"] = spans[:type]
|
181
|
+
bib << ret
|
182
|
+
end
|
183
|
+
|
184
|
+
def span_to_docid(span, key)
|
185
|
+
if span[:type]
|
186
|
+
"<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
|
187
|
+
else
|
188
|
+
"<#{key}>#{span[:val]}</#{key}>"
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def span_to_contrib(span)
|
193
|
+
e = if span[:entity] == "organization"
|
194
|
+
"<organization><name>#{span[:name]}</name></organization>"
|
195
|
+
else span_to_person(span)
|
196
|
+
end
|
197
|
+
"<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
|
198
|
+
end
|
199
|
+
|
200
|
+
def span_to_person(span)
|
201
|
+
pre = (span[:"formatted-initials"] and
|
202
|
+
"<formatted-initials>"\
|
203
|
+
"#{span[:"formatted-initials"]}</formatted-initials>") ||
|
204
|
+
"<forename>#{span[:givenname]}</forename>"
|
205
|
+
"<person><name>#{pre}<surname>#{span[:surname]}</surname></name>"\
|
206
|
+
"</person>"
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
@@ -45,7 +45,8 @@ module Metanorma
|
|
45
45
|
|
46
46
|
def figure_dl_cleanup1(xmldoc)
|
47
47
|
q = "//figure/following-sibling::*[self::dl]"
|
48
|
-
|
48
|
+
q1 = "//figure/figure/following-sibling::*[self::dl]"
|
49
|
+
(xmldoc.xpath(q) - xmldoc.xpath(q1)).each do |s|
|
49
50
|
s["key"] == "true" and s.previous_element << s.remove
|
50
51
|
end
|
51
52
|
end
|
@@ -65,8 +66,9 @@ module Metanorma
|
|
65
66
|
# examples containing only figures become subfigures of figures
|
66
67
|
def subfigure_cleanup(xmldoc)
|
67
68
|
xmldoc.xpath("//example[figure]").each do |e|
|
68
|
-
next unless e.elements.
|
69
|
-
%w(name figure index).include?
|
69
|
+
next unless e.elements.reject do |m|
|
70
|
+
%w(name figure index note).include?(m.name) ||
|
71
|
+
(m.name == "dl" && m["key"] == "true")
|
70
72
|
end.empty?
|
71
73
|
|
72
74
|
e.name = "figure"
|
@@ -84,9 +86,9 @@ module Metanorma
|
|
84
86
|
|
85
87
|
def figure_cleanup(xmldoc)
|
86
88
|
figure_footnote_cleanup(xmldoc)
|
89
|
+
subfigure_cleanup(xmldoc)
|
87
90
|
figure_dl_cleanup1(xmldoc)
|
88
91
|
figure_dl_cleanup2(xmldoc)
|
89
|
-
subfigure_cleanup(xmldoc)
|
90
92
|
single_subfigure_cleanup(xmldoc)
|
91
93
|
end
|
92
94
|
|
@@ -1,24 +1,11 @@
|
|
1
|
-
require "nokogiri"
|
2
|
-
require "pathname"
|
3
|
-
require "html2doc"
|
4
1
|
require "asciimath2unitsml"
|
5
|
-
require_relative "./cleanup_block"
|
6
|
-
require_relative "./cleanup_footnotes"
|
7
|
-
require_relative "./cleanup_ref"
|
8
|
-
require_relative "./cleanup_ref_dl"
|
9
|
-
require_relative "./cleanup_boilerplate"
|
10
|
-
require_relative "./cleanup_section"
|
11
|
-
require_relative "./cleanup_terms"
|
12
|
-
require_relative "./cleanup_inline"
|
13
|
-
require_relative "./cleanup_amend"
|
14
|
-
require "relaton_iev"
|
15
2
|
|
16
3
|
module Metanorma
|
17
4
|
module Standoc
|
18
5
|
module Cleanup
|
19
6
|
def asciimath2mathml(text)
|
20
7
|
text = text.gsub(%r{<stem type="AsciiMath">(.+?)</stem>}m) do
|
21
|
-
"<amathstem>#{
|
8
|
+
"<amathstem>#{@c.decode($1)}</amathstem>"
|
22
9
|
end
|
23
10
|
text = Html2Doc.new({})
|
24
11
|
.asciimath_to_mathml(text, ["<amathstem>", "</amathstem>"])
|
@@ -60,7 +47,7 @@ module Metanorma
|
|
60
47
|
def mathml_italicise(xml)
|
61
48
|
xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
|
62
49
|
"m" => MATHML_NS).each do |i|
|
63
|
-
char =
|
50
|
+
char = @c.decode(i.text)
|
64
51
|
i["mathvariant"] = "normal" if mi_italicise?(char)
|
65
52
|
end
|
66
53
|
end
|
@@ -153,29 +153,28 @@ module Metanorma
|
|
153
153
|
|
154
154
|
def reference_names(xmldoc)
|
155
155
|
xmldoc.xpath("//bibitem[not(ancestor::bibitem)]").each do |ref|
|
156
|
-
docid = ref
|
157
|
-
ref.at("./docidentifier[@primary = 'true'][@language = '#{@lang}']") ||
|
158
|
-
ref.at("./docidentifier[@primary = 'true'][not(@language)]") ||
|
159
|
-
ref.at("./docidentifier[@primary = 'true']") ||
|
160
|
-
ref.at("./docidentifier[not(@type = 'DOI')][@language = '#{@lang}']") ||
|
161
|
-
ref.at("./docidentifier[not(@type = 'DOI')][not(@language)]") ||
|
162
|
-
ref.at("./docidentifier[not(@type = 'DOI')]") or next
|
156
|
+
docid = select_docid(ref) or next
|
163
157
|
reference = format_ref(docid.children.to_xml, docid["type"])
|
164
158
|
@anchors[ref["id"]] = { xref: reference }
|
165
159
|
end
|
166
160
|
end
|
167
161
|
|
162
|
+
def select_docid(ref)
|
163
|
+
ref.at("./docidentifier[@type = 'metanorma']") ||
|
164
|
+
ref.at("./docidentifier[@primary = 'true'][@language = '#{@lang}']") ||
|
165
|
+
ref.at("./docidentifier[@primary = 'true'][not(@language)]") ||
|
166
|
+
ref.at("./docidentifier[@primary = 'true']") ||
|
167
|
+
ref.at("./docidentifier[not(@type = 'DOI')][@language = '#{@lang}']") ||
|
168
|
+
ref.at("./docidentifier[not(@type = 'DOI')][not(@language)]") ||
|
169
|
+
ref.at("./docidentifier[not(@type = 'DOI')]")
|
170
|
+
end
|
171
|
+
|
168
172
|
def fetch_termbase(_termbase, _id)
|
169
173
|
""
|
170
174
|
end
|
171
175
|
|
172
176
|
def read_local_bibitem(uri)
|
173
|
-
|
174
|
-
|
175
|
-
file = "#{@localdir}#{uri}.rxl"
|
176
|
-
File.file?(file) or file = "#{@localdir}#{uri}.xml"
|
177
|
-
File.file?(file) or return nil
|
178
|
-
xml = Nokogiri::XML(File.read(file, encoding: "utf-8"))
|
177
|
+
xml = read_local_bibitem_file(uri) or return nil
|
179
178
|
ret = xml.at("//*[local-name() = 'bibdata']") or return nil
|
180
179
|
ret = Nokogiri::XML(ret.to_xml
|
181
180
|
.sub(%r{(<bibdata[^>]*?) xmlns=("[^"]+"|'[^']+')}, "\\1")).root
|
@@ -186,6 +185,15 @@ module Metanorma
|
|
186
185
|
ret
|
187
186
|
end
|
188
187
|
|
188
|
+
def read_local_bibitem_file(uri)
|
189
|
+
return nil if %r{^https?://}.match?(uri)
|
190
|
+
|
191
|
+
file = "#{@localdir}#{uri}.rxl"
|
192
|
+
File.file?(file) or file = "#{@localdir}#{uri}.xml"
|
193
|
+
File.file?(file) or return nil
|
194
|
+
Nokogiri::XML(File.read(file, encoding: "utf-8"))
|
195
|
+
end
|
196
|
+
|
189
197
|
# if citation uri points to local file, get bibitem from it
|
190
198
|
def fetch_local_bibitem(xmldoc)
|
191
199
|
xmldoc.xpath("//bibitem[formattedref][uri[@type = 'citation']]")
|
@@ -206,6 +214,7 @@ module Metanorma
|
|
206
214
|
def bibitem_cleanup(xmldoc)
|
207
215
|
bibitem_nested_id(xmldoc)
|
208
216
|
ref_dl_cleanup(xmldoc)
|
217
|
+
formattedref_spans(xmldoc)
|
209
218
|
fetch_local_bibitem(xmldoc)
|
210
219
|
end
|
211
220
|
end
|
@@ -2,109 +2,9 @@ module Metanorma
|
|
2
2
|
module Standoc
|
3
3
|
module Cleanup
|
4
4
|
def requirement_cleanup(xmldoc)
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
end
|
9
|
-
|
10
|
-
REQRECPER = "//requirement | //recommendation | //permission".freeze
|
11
|
-
|
12
|
-
def requirement_inherit(xmldoc)
|
13
|
-
xmldoc.xpath(REQRECPER).each do |r|
|
14
|
-
ins = requirement_inherit_insert(r)
|
15
|
-
r.xpath("./*//inherit").each { |i| ins.previous = i }
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
def requirement_inherit_insert(reqt)
|
20
|
-
ins = reqt.at("./classification") || reqt.at(
|
21
|
-
"./description | ./measurementtarget | ./specification | "\
|
22
|
-
"./verification | ./import | ./description | ./component | "\
|
23
|
-
"./requirement | ./recommendation | ./permission",
|
24
|
-
) and return ins
|
25
|
-
requirement_inherit_insert1(reqt)
|
26
|
-
end
|
27
|
-
|
28
|
-
def requirement_inherit_insert1(reqt)
|
29
|
-
if t = reqt.at("./title")
|
30
|
-
t.next = " "
|
31
|
-
t.next
|
32
|
-
else
|
33
|
-
if reqt.children.empty? then reqt.add_child(" ")
|
34
|
-
else reqt.children.first.previous = " "
|
35
|
-
end
|
36
|
-
reqt.children.first
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def requirement_descriptions(xmldoc)
|
41
|
-
xmldoc.xpath(REQRECPER).each do |r|
|
42
|
-
r.xpath(".//p[not(./*)][normalize-space(.)='']").each(&:remove)
|
43
|
-
r.children.each do |e|
|
44
|
-
requirement_description_wrap(r, e)
|
45
|
-
end
|
46
|
-
requirement_description_cleanup1(r)
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def requirement_description_wrap(reqt, text)
|
51
|
-
return if (text.element? && (reqt_subpart(text.name) ||
|
52
|
-
%w(requirement recommendation
|
53
|
-
permission).include?(text.name))) ||
|
54
|
-
(text.text.strip.empty? && !text.at(".//xref | .//eref | .//link"))
|
55
|
-
|
56
|
-
t = Nokogiri::XML::Element.new("description", reqt.document)
|
57
|
-
text.before(t)
|
58
|
-
t.children = text.remove
|
59
|
-
end
|
60
|
-
|
61
|
-
def requirement_description_cleanup1(reqt)
|
62
|
-
while d = reqt.at("./description[following-sibling::*[1]"\
|
63
|
-
"[self::description]]")
|
64
|
-
n = d.next.remove
|
65
|
-
d << n.children
|
66
|
-
end
|
67
|
-
reqt.xpath("./description[normalize-space(.)='']").each do |r|
|
68
|
-
r.replace("\n")
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
def requirement_metadata(xmldoc)
|
73
|
-
xmldoc.xpath(REQRECPER).each do |r|
|
74
|
-
dl = r&.at("./dl[@metadata = 'true']")&.remove or next
|
75
|
-
requirement_metadata1(r, dl, r.at("./title"))
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
def requirement_metadata1_tags
|
80
|
-
%w(label subject inherit)
|
81
|
-
end
|
82
|
-
|
83
|
-
def requirement_metadata1(reqt, dlist, ins)
|
84
|
-
unless ins
|
85
|
-
reqt.children.first.previous = " "
|
86
|
-
ins = reqt.children.first
|
87
|
-
end
|
88
|
-
%w(obligation model type).each do |a|
|
89
|
-
dl_to_attrs(reqt, dlist, a)
|
90
|
-
end
|
91
|
-
requirement_metadata1_tags.each do |a|
|
92
|
-
ins = dl_to_elems(ins, reqt, dlist, a)
|
93
|
-
end
|
94
|
-
reqt_dl_to_classif(ins, reqt, dlist)
|
95
|
-
end
|
96
|
-
|
97
|
-
def reqt_dl_to_classif(ins, reqt, dlist)
|
98
|
-
if a = reqt.at("./classification[last()]") then ins = a end
|
99
|
-
dlist.xpath("./dt[text()='classification']").each do |e|
|
100
|
-
val = e.at("./following::dd/p") || e.at("./following::dd")
|
101
|
-
req_classif_parse(val.text).each do |r|
|
102
|
-
ins.next = "<classification><tag>#{r[0]}</tag>"\
|
103
|
-
"<value>#{r[1]}</value></classification>"
|
104
|
-
ins = ins.next
|
105
|
-
end
|
106
|
-
end
|
107
|
-
ins
|
5
|
+
@reqt_models ||=
|
6
|
+
Metanorma::Requirements.new({ default: @default_requirement_model })
|
7
|
+
@reqt_models.requirement_cleanup(xmldoc)
|
108
8
|
end
|
109
9
|
end
|
110
10
|
end
|
@@ -11,7 +11,7 @@ module Metanorma
|
|
11
11
|
n.replace(grkletters(MathML2AsciiMath.m2a(n.to_xml)))
|
12
12
|
end
|
13
13
|
ret = Nokogiri::XML(key.to_xml)
|
14
|
-
|
14
|
+
@c.decode(ret.text.downcase)
|
15
15
|
.gsub(/[\[\]{}<>()]/, "").gsub(/\s/m, "")
|
16
16
|
.gsub(/[[:punct:]]|[_^]/, ":\\0").gsub(/`/, "")
|
17
17
|
.gsub(/[0-9]+/, "þ\\0")
|
@@ -7,7 +7,7 @@ module Metanorma
|
|
7
7
|
text = text.gsub(/\s+<fn /, "<fn ")
|
8
8
|
%w(passthrough passthrough-inline).each do |v|
|
9
9
|
text.gsub!(%r{<#{v}\s+formats="metanorma">([^<]*)
|
10
|
-
</#{v}>}mx) {
|
10
|
+
</#{v}>}mx) { @c.decode($1) }
|
11
11
|
end
|
12
12
|
text
|
13
13
|
end
|
@@ -34,11 +34,13 @@ module Metanorma
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
+
IGNORE_QUOTES_ELEMENTS =
|
38
|
+
%w(pre tt sourcecode stem figure bibdata passthrough identifier).freeze
|
39
|
+
|
37
40
|
def uninterrupt_quotes_around_xml_skip(elem)
|
38
41
|
!(/\A['"]/.match?(elem.text) &&
|
39
|
-
elem.previous.
|
40
|
-
|
41
|
-
.empty? &&
|
42
|
+
elem.previous.path.split(%r{/})[1..-2]
|
43
|
+
.intersection(IGNORE_QUOTES_ELEMENTS).empty? &&
|
42
44
|
((elem.previous.text.strip.empty? &&
|
43
45
|
!empty_tag_with_text_content?(elem.previous)) ||
|
44
46
|
elem.previous.name == "index"))
|
@@ -49,7 +51,7 @@ module Metanorma
|
|
49
51
|
/\S\Z/.match?(prev.text) or return
|
50
52
|
foll = elem.at(".//following::text()[1]")
|
51
53
|
m = /\A(["'][[:punct:]]*)(\s|\Z)/
|
52
|
-
.match(
|
54
|
+
.match(@c.decode(foll&.text)) or return
|
53
55
|
foll.content = foll.text.sub(/\A(["'][[:punct:]]*)/, "")
|
54
56
|
prev.content = "#{prev.text}#{m[1]}"
|
55
57
|
end
|
@@ -74,10 +76,10 @@ module Metanorma
|
|
74
76
|
empty_tag_with_text_content?(x) and prev = "dummy"
|
75
77
|
next unless x.text?
|
76
78
|
|
77
|
-
x.
|
78
|
-
|
79
|
+
ancestors = x.path.split(%r{/})[1..-2]
|
80
|
+
ancestors.intersection(IGNORE_QUOTES_ELEMENTS).empty? and
|
79
81
|
dumb2smart_quotes1(x, prev)
|
80
|
-
prev = x.text
|
82
|
+
prev = x.text unless ancestors.include?("index")
|
81
83
|
end
|
82
84
|
end
|
83
85
|
|
@@ -68,10 +68,9 @@ module Metanorma
|
|
68
68
|
end
|
69
69
|
|
70
70
|
def xref_to_eref(elem)
|
71
|
-
c = HTMLEntities.new
|
72
71
|
elem["bibitemid"] = elem["target"]
|
73
72
|
if ref = @anchors&.dig(elem["target"], :xref)
|
74
|
-
elem["citeas"] = c.decode(ref)
|
73
|
+
elem["citeas"] = @c.decode(ref)
|
75
74
|
else
|
76
75
|
elem["citeas"] = ""
|
77
76
|
xref_to_eref1(elem)
|