metanorma-standoc 1.11.3 → 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +3 -31
- data/.gitignore +23 -0
- data/Gemfile +0 -1
- data/lib/asciidoctor/standoc/base.rb +2 -145
- data/lib/asciidoctor/standoc/blocks.rb +2 -238
- data/lib/asciidoctor/standoc/blocks_notes.rb +2 -100
- data/lib/asciidoctor/standoc/cleanup.rb +2 -208
- data/lib/asciidoctor/standoc/cleanup_amend.rb +2 -53
- data/lib/asciidoctor/standoc/cleanup_block.rb +2 -172
- data/lib/asciidoctor/standoc/cleanup_boilerplate.rb +2 -212
- data/lib/asciidoctor/standoc/cleanup_footnotes.rb +2 -108
- data/lib/asciidoctor/standoc/cleanup_image.rb +2 -69
- data/lib/asciidoctor/standoc/cleanup_inline.rb +2 -189
- data/lib/asciidoctor/standoc/cleanup_maths.rb +2 -221
- data/lib/asciidoctor/standoc/cleanup_ref.rb +2 -169
- data/lib/asciidoctor/standoc/cleanup_ref_dl.rb +2 -103
- data/lib/asciidoctor/standoc/cleanup_reqt.rb +2 -110
- data/lib/asciidoctor/standoc/cleanup_section.rb +2 -184
- data/lib/asciidoctor/standoc/cleanup_section_names.rb +2 -91
- data/lib/asciidoctor/standoc/cleanup_symbols.rb +2 -47
- data/lib/asciidoctor/standoc/cleanup_table.rb +2 -67
- data/lib/asciidoctor/standoc/cleanup_terms.rb +2 -139
- data/lib/asciidoctor/standoc/cleanup_terms_designations.rb +2 -192
- data/lib/asciidoctor/standoc/cleanup_text.rb +2 -95
- data/lib/asciidoctor/standoc/cleanup_toc.rb +3 -0
- data/lib/asciidoctor/standoc/cleanup_xref.rb +2 -106
- data/lib/asciidoctor/standoc/converter.rb +2 -123
- data/lib/asciidoctor/standoc/datamodel/attributes_table_preprocessor.rb +2 -56
- data/lib/asciidoctor/standoc/datamodel/diagram_preprocessor.rb +2 -102
- data/lib/asciidoctor/standoc/datamodel/plantuml_renderer.rb +3 -404
- data/lib/asciidoctor/standoc/deprecated.rb +5 -0
- data/lib/asciidoctor/standoc/front.rb +2 -219
- data/lib/asciidoctor/standoc/front_contributor.rb +2 -191
- data/lib/asciidoctor/standoc/inline.rb +2 -231
- data/lib/asciidoctor/standoc/lists.rb +2 -119
- data/lib/asciidoctor/standoc/macros.rb +2 -203
- data/lib/asciidoctor/standoc/macros_form.rb +2 -62
- data/lib/asciidoctor/standoc/macros_note.rb +2 -44
- data/lib/asciidoctor/standoc/macros_plantuml.rb +2 -112
- data/lib/asciidoctor/standoc/macros_terms.rb +2 -180
- data/lib/asciidoctor/standoc/ref.rb +2 -251
- data/lib/asciidoctor/standoc/ref_sect.rb +2 -153
- data/lib/asciidoctor/standoc/ref_utility.rb +2 -0
- data/lib/asciidoctor/standoc/render.rb +2 -116
- data/lib/asciidoctor/standoc/reqt.rb +2 -89
- data/lib/asciidoctor/standoc/section.rb +2 -194
- data/lib/asciidoctor/standoc/table.rb +2 -84
- data/lib/asciidoctor/standoc/term_lookup_cleanup.rb +2 -178
- data/lib/asciidoctor/standoc/terms.rb +2 -153
- data/lib/asciidoctor/standoc/utils.rb +2 -100
- data/lib/asciidoctor/standoc/validate.rb +2 -157
- data/lib/asciidoctor/standoc/validate_section.rb +2 -54
- data/lib/isodoc/html/htmlstyle.css +44 -29
- data/lib/isodoc/html/htmlstyle.scss +17 -12
- data/lib/metanorma/standoc/base.rb +163 -0
- data/lib/{asciidoctor → metanorma}/standoc/basicdoc.rng +0 -0
- data/lib/{asciidoctor → metanorma}/standoc/biblio.rng +2 -2
- data/lib/metanorma/standoc/blocks.rb +239 -0
- data/lib/metanorma/standoc/blocks_notes.rb +101 -0
- data/lib/metanorma/standoc/cleanup.rb +157 -0
- data/lib/metanorma/standoc/cleanup_amend.rb +54 -0
- data/lib/metanorma/standoc/cleanup_block.rb +173 -0
- data/lib/metanorma/standoc/cleanup_boilerplate.rb +213 -0
- data/lib/metanorma/standoc/cleanup_footnotes.rb +109 -0
- data/lib/metanorma/standoc/cleanup_image.rb +70 -0
- data/lib/metanorma/standoc/cleanup_inline.rb +190 -0
- data/lib/metanorma/standoc/cleanup_maths.rb +222 -0
- data/lib/metanorma/standoc/cleanup_ref.rb +170 -0
- data/lib/metanorma/standoc/cleanup_ref_dl.rb +104 -0
- data/lib/metanorma/standoc/cleanup_reqt.rb +111 -0
- data/lib/metanorma/standoc/cleanup_section.rb +212 -0
- data/lib/metanorma/standoc/cleanup_section_names.rb +92 -0
- data/lib/metanorma/standoc/cleanup_symbols.rb +48 -0
- data/lib/metanorma/standoc/cleanup_table.rb +68 -0
- data/lib/metanorma/standoc/cleanup_terms.rb +140 -0
- data/lib/metanorma/standoc/cleanup_terms_designations.rb +199 -0
- data/lib/metanorma/standoc/cleanup_text.rb +74 -0
- data/lib/metanorma/standoc/cleanup_toc.rb +98 -0
- data/lib/metanorma/standoc/cleanup_xref.rb +107 -0
- data/lib/metanorma/standoc/converter.rb +126 -0
- data/lib/metanorma/standoc/datamodel/attributes_table_preprocessor.rb +57 -0
- data/lib/metanorma/standoc/datamodel/diagram_preprocessor.rb +103 -0
- data/lib/metanorma/standoc/datamodel/plantuml_renderer.rb +409 -0
- data/lib/metanorma/standoc/front.rb +224 -0
- data/lib/metanorma/standoc/front_contributor.rb +192 -0
- data/lib/metanorma/standoc/inline.rb +232 -0
- data/lib/{asciidoctor → metanorma}/standoc/isodoc.rng +104 -3
- data/lib/metanorma/standoc/lists.rb +120 -0
- data/lib/metanorma/standoc/macros.rb +205 -0
- data/lib/metanorma/standoc/macros_embed.rb +72 -0
- data/lib/metanorma/standoc/macros_form.rb +63 -0
- data/lib/metanorma/standoc/macros_note.rb +45 -0
- data/lib/metanorma/standoc/macros_plantuml.rb +113 -0
- data/lib/metanorma/standoc/macros_terms.rb +194 -0
- data/lib/metanorma/standoc/ref.rb +243 -0
- data/lib/metanorma/standoc/ref_sect.rb +153 -0
- data/lib/{asciidoctor/standoc/ref_date_id.rb → metanorma/standoc/ref_utility.rb} +43 -5
- data/lib/metanorma/standoc/render.rb +115 -0
- data/lib/metanorma/standoc/reqt.rb +90 -0
- data/lib/{asciidoctor → metanorma}/standoc/reqt.rng +0 -0
- data/lib/metanorma/standoc/section.rb +209 -0
- data/lib/metanorma/standoc/table.rb +85 -0
- data/lib/metanorma/standoc/term_lookup_cleanup.rb +179 -0
- data/lib/metanorma/standoc/terms.rb +160 -0
- data/lib/metanorma/standoc/utils.rb +101 -0
- data/lib/metanorma/standoc/validate.rb +158 -0
- data/lib/metanorma/standoc/validate_section.rb +55 -0
- data/lib/metanorma/standoc/version.rb +1 -1
- data/lib/{asciidoctor → metanorma}/standoc/views/datamodel/model_representation.adoc.erb +0 -0
- data/lib/{asciidoctor → metanorma}/standoc/views/datamodel/plantuml_representation.adoc.erb +0 -0
- data/lib/metanorma-standoc.rb +1 -1
- data/metanorma-standoc.gemspec +1 -1
- data/spec/assets/a1.adoc +8 -0
- data/spec/assets/a2.adoc +8 -0
- data/spec/assets/a3.adoc +9 -0
- data/spec/assets/a4.adoc +4 -0
- data/spec/{asciidoctor → metanorma}/base_spec.rb +499 -407
- data/spec/{asciidoctor → metanorma}/blank_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/blocks_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/cleanup_blocks_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/cleanup_sections_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/cleanup_spec.rb +5 -5
- data/spec/{asciidoctor → metanorma}/cleanup_terms_spec.rb +227 -119
- data/spec/{asciidoctor → metanorma}/datamodel/attributes_table_preprocessor_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/datamodel/diagram_preprocessor_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/inline_spec.rb +170 -1
- data/spec/{asciidoctor → metanorma}/isobib_cache_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/lists_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/macros_json2text_spec.rb +0 -0
- data/spec/{asciidoctor → metanorma}/macros_plantuml_spec.rb +3 -3
- data/spec/{asciidoctor → metanorma}/macros_spec.rb +97 -6
- data/spec/{asciidoctor → metanorma}/macros_yaml2text_spec.rb +0 -0
- data/spec/metanorma/refs_dl_spec.rb +863 -0
- data/spec/{asciidoctor → metanorma}/refs_spec.rb +522 -15
- data/spec/{asciidoctor → metanorma}/section_spec.rb +59 -1
- data/spec/{asciidoctor → metanorma}/table_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/validate_spec.rb +2 -2
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +46 -46
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +12 -12
- data/spec/vcr_cassettes/hide_refs.yml +599 -0
- data/spec/vcr_cassettes/isobib_get_123.yml +12 -12
- data/spec/vcr_cassettes/isobib_get_123_1.yml +24 -24
- data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +35 -35
- data/spec/vcr_cassettes/isobib_get_123_2001.yml +13 -13
- data/spec/vcr_cassettes/isobib_get_124.yml +10 -10
- data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +18 -18
- data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +46 -46
- metadata +88 -32
- data/spec/asciidoctor/refs_dl_spec.rb +0 -864
@@ -0,0 +1,170 @@
|
|
1
|
+
require "set"
|
2
|
+
require "relaton_bib"
|
3
|
+
|
4
|
+
module Metanorma
|
5
|
+
module Standoc
|
6
|
+
module Cleanup
|
7
|
+
def biblio_reorder(xmldoc)
|
8
|
+
xmldoc.xpath("//references[@normative = 'false']").each do |r|
|
9
|
+
biblio_reorder1(r)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def biblio_reorder1(refs)
|
14
|
+
fold_notes_into_biblio(refs)
|
15
|
+
bib = sort_biblio(refs.xpath("./bibitem"))
|
16
|
+
insert = refs&.at("./bibitem")&.previous_element
|
17
|
+
refs.xpath("./bibitem").each(&:remove)
|
18
|
+
bib.reverse.each do |b|
|
19
|
+
insert and insert.next = b.to_xml or
|
20
|
+
refs.children.first.add_previous_sibling b.to_xml
|
21
|
+
end
|
22
|
+
extract_notes_from_biblio(refs)
|
23
|
+
refs.xpath("./references").each { |r| biblio_reorder1(r) }
|
24
|
+
end
|
25
|
+
|
26
|
+
def fold_notes_into_biblio(refs)
|
27
|
+
refs.xpath("./bibitem").each do |r|
|
28
|
+
while r&.next_element&.name == "note"
|
29
|
+
r.next_element["appended"] = true
|
30
|
+
r << r.next_element.remove
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def extract_notes_from_biblio(refs)
|
36
|
+
refs.xpath("./bibitem").each do |r|
|
37
|
+
r.xpath("./note[@appended]").reverse.each do |n|
|
38
|
+
n.delete("appended")
|
39
|
+
r.next = n
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def sort_biblio(bib)
|
45
|
+
bib
|
46
|
+
end
|
47
|
+
|
48
|
+
# default presuppose that all citations in biblio numbered
|
49
|
+
# consecutively, but that standards codes are preserved as is:
|
50
|
+
# only numeric references are renumbered
|
51
|
+
def biblio_renumber(xmldoc)
|
52
|
+
i = 0
|
53
|
+
xmldoc.xpath("//bibliography//references | //clause//references | "\
|
54
|
+
"//annex//references").each do |r|
|
55
|
+
next if r["normative"] == "true"
|
56
|
+
|
57
|
+
r.xpath("./bibitem[not(@hidden = 'true')]").each do |b|
|
58
|
+
i += 1
|
59
|
+
next unless docid = b.at("./docidentifier[@type = 'metanorma']")
|
60
|
+
next unless /^\[\d+\]$/.match?(docid.text)
|
61
|
+
|
62
|
+
docid.children = "[#{i}]"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# move ref before p
|
68
|
+
def ref_cleanup(xmldoc)
|
69
|
+
xmldoc.xpath("//p/ref").each do |r|
|
70
|
+
parent = r.parent
|
71
|
+
parent.previous = r.remove
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def normref_cleanup(xmldoc)
|
76
|
+
r = xmldoc.at(self.class::NORM_REF) || return
|
77
|
+
preface = ((r.xpath("./title/following-sibling::*") & # intersection
|
78
|
+
r.xpath("./bibitem[1]/preceding-sibling::*")) -
|
79
|
+
r.xpath("./note[@type = 'boilerplate']/descendant-or-self::*"))
|
80
|
+
preface.each(&:remove)
|
81
|
+
end
|
82
|
+
|
83
|
+
def biblio_cleanup(xmldoc)
|
84
|
+
biblio_reorder(xmldoc)
|
85
|
+
biblio_nested(xmldoc)
|
86
|
+
biblio_renumber(xmldoc)
|
87
|
+
biblio_no_ext(xmldoc)
|
88
|
+
end
|
89
|
+
|
90
|
+
def biblio_no_ext(xmldoc)
|
91
|
+
xmldoc.xpath("//bibitem/ext").each(&:remove)
|
92
|
+
end
|
93
|
+
|
94
|
+
def biblio_nested(xmldoc)
|
95
|
+
xmldoc.xpath("//references[references]").each do |t|
|
96
|
+
t.name = "clause"
|
97
|
+
t.xpath("./references").each { |r| r["normative"] = t["normative"] }
|
98
|
+
t.delete("normative")
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def format_ref(ref, type)
|
103
|
+
return @isodoc.docid_prefix(type, ref) if type != "metanorma"
|
104
|
+
return "[#{ref}]" if /^\d+$/.match(ref) && !/^\[.*\]$/.match(ref)
|
105
|
+
|
106
|
+
ref
|
107
|
+
end
|
108
|
+
|
109
|
+
ISO_PUBLISHER_XPATH =
|
110
|
+
"./contributor[role/@type = 'publisher']/"\
|
111
|
+
"organization[abbreviation = 'ISO' or abbreviation = 'IEC' or "\
|
112
|
+
"name = 'International Organization for Standardization' or "\
|
113
|
+
"name = 'International Electrotechnical Commission']".freeze
|
114
|
+
|
115
|
+
def reference_names(xmldoc)
|
116
|
+
xmldoc.xpath("//bibitem[not(ancestor::bibitem)]").each do |ref|
|
117
|
+
# isopub = ref.at(ISO_PUBLISHER_XPATH)
|
118
|
+
docid = ref.at("./docidentifier[@type = 'metanorma']") ||
|
119
|
+
ref.at("./docidentifier[not(@type = 'DOI')]") or next
|
120
|
+
reference = format_ref(docid.text, docid["type"])
|
121
|
+
@anchors[ref["id"]] = { xref: reference }
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def fetch_termbase(_termbase, _id)
|
126
|
+
""
|
127
|
+
end
|
128
|
+
|
129
|
+
def read_local_bibitem(uri)
|
130
|
+
return nil if %r{^https?://}.match?(uri)
|
131
|
+
|
132
|
+
file = "#{@localdir}#{uri}.rxl"
|
133
|
+
File.file?(file) or file = "#{@localdir}#{uri}.xml"
|
134
|
+
File.file?(file) or return nil
|
135
|
+
xml = Nokogiri::XML(File.read(file, encoding: "utf-8"))
|
136
|
+
ret = xml.at("//*[local-name() = 'bibdata']") or return nil
|
137
|
+
ret = Nokogiri::XML(ret.to_xml
|
138
|
+
.sub(%r{(<bibdata[^>]*?) xmlns=("[^"]+"|'[^']+')}, "\\1")).root
|
139
|
+
ret.name = "bibitem"
|
140
|
+
ins = ret.at("./*[local-name() = 'docidentifier']") or return nil
|
141
|
+
ins.previous = %{<uri type="citation">#{uri}</uri>}
|
142
|
+
ret&.at("./*[local-name() = 'ext']")&.remove
|
143
|
+
ret
|
144
|
+
end
|
145
|
+
|
146
|
+
# if citation uri points to local file, get bibitem from it
|
147
|
+
def fetch_local_bibitem(xmldoc)
|
148
|
+
xmldoc.xpath("//bibitem[formattedref][uri[@type = 'citation']]")
|
149
|
+
.each do |b|
|
150
|
+
uri = b&.at("./uri[@type = 'citation']")&.text
|
151
|
+
bibitem = read_local_bibitem(uri) or next
|
152
|
+
bibitem["id"] = b["id"]
|
153
|
+
b.replace(bibitem)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def bibitem_nested_id(xmldoc)
|
158
|
+
xmldoc.xpath("//bibitem//bibitem").each do |b|
|
159
|
+
b.delete("id")
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def bibitem_cleanup(xmldoc)
|
164
|
+
bibitem_nested_id(xmldoc)
|
165
|
+
ref_dl_cleanup(xmldoc)
|
166
|
+
fetch_local_bibitem(xmldoc)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
require "set"
|
2
|
+
require "relaton_bib"
|
3
|
+
|
4
|
+
module Metanorma
|
5
|
+
module Standoc
|
6
|
+
module Cleanup
|
7
|
+
def ref_dl_cleanup(xmldoc)
|
8
|
+
xmldoc.xpath("//clause[@bibitem = 'true']").each do |c|
|
9
|
+
bib = dl_bib_extract(c) or next
|
10
|
+
validate_ref_dl(bib, c)
|
11
|
+
bibitemxml = RelatonBib::BibliographicItem.from_hash(bib).to_xml or next
|
12
|
+
bibitem = Nokogiri::XML(bibitemxml)
|
13
|
+
bibitem.root["id"] = c["id"] if c["id"] && !/^_/.match(c["id"])
|
14
|
+
c.replace(bibitem.root)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def validate_ref_dl(bib, c)
|
19
|
+
id = bib["id"]
|
20
|
+
id ||= c["id"] unless /^_/.match?(c["id"]) # do not accept implicit id
|
21
|
+
unless id
|
22
|
+
@log.add("Anchors", c,
|
23
|
+
"The following reference is missing an anchor:\n" + c.to_xml)
|
24
|
+
return
|
25
|
+
end
|
26
|
+
@refids << id
|
27
|
+
bib["title"] or
|
28
|
+
@log.add("Bibliography", c, "Reference #{id} is missing a title")
|
29
|
+
bib["docid"] or
|
30
|
+
@log.add("Bibliography", c,
|
31
|
+
"Reference #{id} is missing a document identifier (docid)")
|
32
|
+
end
|
33
|
+
|
34
|
+
def extract_from_p(tag, bib, key)
|
35
|
+
return unless bib[tag]
|
36
|
+
|
37
|
+
"<#{key}>#{bib[tag].at('p').children}</#{key}>"
|
38
|
+
end
|
39
|
+
|
40
|
+
# if the content is a single paragraph, replace it with its children
|
41
|
+
# single links replaced with uri
|
42
|
+
def p_unwrap(para)
|
43
|
+
elems = para.elements
|
44
|
+
if elems.size == 1 && elems[0].name == "p"
|
45
|
+
link_unwrap(elems[0]).children.to_xml.strip
|
46
|
+
else
|
47
|
+
para.to_xml.strip
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def link_unwrap(para)
|
52
|
+
elems = para.elements
|
53
|
+
if elems.size == 1 && elems[0].name == "link"
|
54
|
+
para.at("./link").replace(elems[0]["target"].strip)
|
55
|
+
end
|
56
|
+
para
|
57
|
+
end
|
58
|
+
|
59
|
+
def dd_bib_extract(dtd)
|
60
|
+
return nil if dtd.children.empty?
|
61
|
+
|
62
|
+
dtd.at("./dl") and return dl_bib_extract(dtd)
|
63
|
+
elems = dtd.remove.elements
|
64
|
+
return p_unwrap(dtd) unless elems.size == 1 &&
|
65
|
+
%w(ol ul).include?(elems[0].name)
|
66
|
+
|
67
|
+
ret = []
|
68
|
+
elems[0].xpath("./li").each do |li|
|
69
|
+
ret << p_unwrap(li)
|
70
|
+
end
|
71
|
+
ret
|
72
|
+
end
|
73
|
+
|
74
|
+
def add_to_hash(bib, key, val)
|
75
|
+
Metanorma::Utils::set_nested_value(bib, key.split("."), val)
|
76
|
+
end
|
77
|
+
|
78
|
+
# definition list, with at most one level of unordered lists
|
79
|
+
def dl_bib_extract(c, nested = false)
|
80
|
+
dl = c.at("./dl") or return
|
81
|
+
bib = {}
|
82
|
+
key = ""
|
83
|
+
dl.xpath("./dt | ./dd").each do |dtd|
|
84
|
+
(dtd.name == "dt" and key = dtd.text.sub(/:+$/, "")) or
|
85
|
+
add_to_hash(bib, key, dd_bib_extract(dtd))
|
86
|
+
end
|
87
|
+
c.xpath("./clause").each do |c1|
|
88
|
+
key = c1&.at("./title")&.text&.downcase&.strip
|
89
|
+
next unless %w(contributor relation series).include? key
|
90
|
+
|
91
|
+
add_to_hash(bib, key, dl_bib_extract(c1, true))
|
92
|
+
end
|
93
|
+
if !nested && c.at("./title")
|
94
|
+
title = c.at("./title").remove.children.to_xml
|
95
|
+
bib["title"] = [bib["title"]] if bib["title"].is_a? Hash
|
96
|
+
bib["title"] = [bib["title"]] if bib["title"].is_a? String
|
97
|
+
bib["title"] = [] unless bib["title"]
|
98
|
+
bib["title"] << title if !title.empty?
|
99
|
+
end
|
100
|
+
bib
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
module Metanorma
|
2
|
+
module Standoc
|
3
|
+
module Cleanup
|
4
|
+
def requirement_cleanup(xmldoc)
|
5
|
+
requirement_metadata(xmldoc)
|
6
|
+
requirement_inherit(xmldoc)
|
7
|
+
requirement_descriptions(xmldoc)
|
8
|
+
end
|
9
|
+
|
10
|
+
REQRECPER = "//requirement | //recommendation | //permission".freeze
|
11
|
+
|
12
|
+
def requirement_inherit(xmldoc)
|
13
|
+
xmldoc.xpath(REQRECPER).each do |r|
|
14
|
+
ins = requirement_inherit_insert(r)
|
15
|
+
r.xpath("./*//inherit").each { |i| ins.previous = i }
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def requirement_inherit_insert(reqt)
|
20
|
+
ins = reqt.at("./classification") || reqt.at(
|
21
|
+
"./description | ./measurementtarget | ./specification | "\
|
22
|
+
"./verification | ./import | ./description | ./component | "\
|
23
|
+
"./requirement | ./recommendation | ./permission",
|
24
|
+
) and return ins
|
25
|
+
requirement_inherit_insert1(reqt)
|
26
|
+
end
|
27
|
+
|
28
|
+
def requirement_inherit_insert1(reqt)
|
29
|
+
if t = reqt.at("./title")
|
30
|
+
t.next = " "
|
31
|
+
t.next
|
32
|
+
else
|
33
|
+
if reqt.children.empty? then reqt.add_child(" ")
|
34
|
+
else reqt.children.first.previous = " "
|
35
|
+
end
|
36
|
+
reqt.children.first
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def requirement_descriptions(xmldoc)
|
41
|
+
xmldoc.xpath(REQRECPER).each do |r|
|
42
|
+
r.xpath(".//p[not(./*)][normalize-space(.)='']").each(&:remove)
|
43
|
+
r.children.each do |e|
|
44
|
+
requirement_description_wrap(r, e)
|
45
|
+
end
|
46
|
+
requirement_description_cleanup1(r)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def requirement_description_wrap(reqt, text)
|
51
|
+
return if (text.element? && (reqt_subpart(text.name) ||
|
52
|
+
%w(requirement recommendation
|
53
|
+
permission).include?(text.name))) ||
|
54
|
+
(text.text.strip.empty? && !text.at(".//xref | .//eref | .//link"))
|
55
|
+
|
56
|
+
t = Nokogiri::XML::Element.new("description", reqt)
|
57
|
+
text.before(t)
|
58
|
+
t.children = text.remove
|
59
|
+
end
|
60
|
+
|
61
|
+
def requirement_description_cleanup1(reqt)
|
62
|
+
while d = reqt.at("./description[following-sibling::*[1]"\
|
63
|
+
"[self::description]]")
|
64
|
+
n = d.next.remove
|
65
|
+
d << n.children
|
66
|
+
end
|
67
|
+
reqt.xpath("./description[normalize-space(.)='']").each do |r|
|
68
|
+
r.replace("\n")
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def requirement_metadata(xmldoc)
|
73
|
+
xmldoc.xpath(REQRECPER).each do |r|
|
74
|
+
dl = r&.at("./dl[@metadata = 'true']")&.remove or next
|
75
|
+
requirement_metadata1(r, dl, r.at("./title"))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def requirement_metadata1_tags
|
80
|
+
%w(label subject inherit)
|
81
|
+
end
|
82
|
+
|
83
|
+
def requirement_metadata1(reqt, dlist, ins)
|
84
|
+
unless ins
|
85
|
+
reqt.children.first.previous = " "
|
86
|
+
ins = reqt.children.first
|
87
|
+
end
|
88
|
+
%w(obligation model type).each do |a|
|
89
|
+
dl_to_attrs(reqt, dlist, a)
|
90
|
+
end
|
91
|
+
requirement_metadata1_tags.each do |a|
|
92
|
+
ins = dl_to_elems(ins, reqt, dlist, a)
|
93
|
+
end
|
94
|
+
reqt_dl_to_classif(ins, reqt, dlist)
|
95
|
+
end
|
96
|
+
|
97
|
+
def reqt_dl_to_classif(ins, reqt, dlist)
|
98
|
+
if a = reqt.at("./classification[last()]") then ins = a end
|
99
|
+
dlist.xpath("./dt[text()='classification']").each do |e|
|
100
|
+
val = e.at("./following::dd/p") || e.at("./following::dd")
|
101
|
+
req_classif_parse(val.text).each do |r|
|
102
|
+
ins.next = "<classification><tag>#{r[0]}</tag>"\
|
103
|
+
"<value>#{r[1]}</value></classification>"
|
104
|
+
ins = ins.next
|
105
|
+
end
|
106
|
+
end
|
107
|
+
ins
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,212 @@
|
|
1
|
+
require "date"
|
2
|
+
require "htmlentities"
|
3
|
+
require "json"
|
4
|
+
require "mathml2asciimath"
|
5
|
+
require_relative "cleanup_section_names"
|
6
|
+
|
7
|
+
module Metanorma
|
8
|
+
module Standoc
|
9
|
+
module Cleanup
|
10
|
+
def make_preface(xml, sect)
|
11
|
+
if xml.at("//foreword | //introduction | //acknowledgements | "\
|
12
|
+
"//*[@preface]")
|
13
|
+
preface = sect.add_previous_sibling("<preface/>").first
|
14
|
+
f = xml.at("//foreword") and preface.add_child f.remove
|
15
|
+
f = xml.at("//introduction") and preface.add_child f.remove
|
16
|
+
move_clauses_into_preface(xml, preface)
|
17
|
+
f = xml.at("//acknowledgements") and preface.add_child f.remove
|
18
|
+
end
|
19
|
+
make_abstract(xml, sect)
|
20
|
+
end
|
21
|
+
|
22
|
+
def move_clauses_into_preface(xml, preface)
|
23
|
+
xml.xpath("//*[@preface]").each do |c|
|
24
|
+
c.delete("preface")
|
25
|
+
preface.add_child c.remove
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def make_abstract(xml, sect)
|
30
|
+
if xml.at("//abstract[not(ancestor::bibitem)]")
|
31
|
+
preface = sect.at("//preface") ||
|
32
|
+
sect.add_previous_sibling("<preface/>").first
|
33
|
+
abstract = xml.at("//abstract[not(ancestor::bibitem)]").remove
|
34
|
+
preface.prepend_child abstract.remove
|
35
|
+
bibabstract = bibabstract_location(xml)
|
36
|
+
dupabstract = abstract.dup
|
37
|
+
dupabstract.traverse { |n| n.remove_attribute("id") }
|
38
|
+
dupabstract.remove_attribute("language")
|
39
|
+
dupabstract.remove_attribute("script")
|
40
|
+
dupabstract&.at("./title")&.remove
|
41
|
+
bibabstract.next = dupabstract
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def bibabstract_location(xml)
|
46
|
+
xml.at("//bibdata/script") || xml.at("//bibdata/language") ||
|
47
|
+
xml.at("//bibdata/contributor[not(following-sibling::contributor)]") ||
|
48
|
+
xml.at("//bibdata/date[not(following-sibling::date)]") ||
|
49
|
+
xml.at("//docnumber") ||
|
50
|
+
xml.at("//bibdata/docidentifier"\
|
51
|
+
"[not(following-sibling::docidentifier)]") ||
|
52
|
+
xml.at("//bibdata/uri[not(following-sibling::uri)]") ||
|
53
|
+
xml.at("//bibdata/title[not(following-sibling::title)]")
|
54
|
+
end
|
55
|
+
|
56
|
+
def make_bibliography(xml, sect)
|
57
|
+
if xml.at("//sections/references")
|
58
|
+
biblio = sect.add_next_sibling("<bibliography/>").first
|
59
|
+
xml.xpath("//sections/references").each do |r|
|
60
|
+
biblio.add_child r.remove
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def make_indexsect(xml, sect)
|
66
|
+
xml.xpath("//sections/indexsect").reverse_each do |r|
|
67
|
+
sect.next = r.remove
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def sections_order_cleanup(xml)
|
72
|
+
s = xml.at("//sections")
|
73
|
+
make_preface(xml, s)
|
74
|
+
make_annexes(xml)
|
75
|
+
make_indexsect(xml, s)
|
76
|
+
make_bibliography(xml, s)
|
77
|
+
xml.xpath("//sections/annex").reverse_each { |r| s.next = r.remove }
|
78
|
+
end
|
79
|
+
|
80
|
+
def make_annexes(xml)
|
81
|
+
xml.xpath("//*[@annex]").each do |y|
|
82
|
+
y.delete("annex")
|
83
|
+
next if y.name == "annex" || !y.ancestors("annex").empty?
|
84
|
+
|
85
|
+
y.wrap("<annex/>")
|
86
|
+
y.parent["id"] = "_#{UUIDTools::UUID.random_create}"
|
87
|
+
y.parent["obligation"] = y["obligation"]
|
88
|
+
y.parent["language"] = y["language"]
|
89
|
+
y.parent["script"] = y["script"]
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def maxlevel(xml)
|
94
|
+
max = 5
|
95
|
+
xml.xpath("//clause[@level]").each do |c|
|
96
|
+
max = c["level"].to_i if max < c["level"].to_i
|
97
|
+
end
|
98
|
+
max
|
99
|
+
end
|
100
|
+
|
101
|
+
def sections_level_cleanup(xml)
|
102
|
+
m = maxlevel(xml)
|
103
|
+
return if m < 6
|
104
|
+
|
105
|
+
m.downto(6).each do |l|
|
106
|
+
xml.xpath("//clause[@level = '#{l}']").each do |c|
|
107
|
+
c.delete("level")
|
108
|
+
c.previous_element << c.remove
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def sections_cleanup(xml)
|
114
|
+
sections_order_cleanup(xml)
|
115
|
+
sections_level_cleanup(xml)
|
116
|
+
sections_names_cleanup(xml)
|
117
|
+
sections_variant_title_cleanup(xml)
|
118
|
+
change_clauses(xml)
|
119
|
+
end
|
120
|
+
|
121
|
+
def obligations_cleanup(xml)
|
122
|
+
obligations_cleanup_info(xml)
|
123
|
+
obligations_cleanup_norm(xml)
|
124
|
+
obligations_cleanup_inherit(xml)
|
125
|
+
end
|
126
|
+
|
127
|
+
def obligations_cleanup_info(xml)
|
128
|
+
s = xml.at("//foreword") and s["obligation"] = "informative"
|
129
|
+
s = xml.at("//introduction") and s["obligation"] = "informative"
|
130
|
+
s = xml.at("//acknowledgements") and s["obligation"] = "informative"
|
131
|
+
xml.xpath("//references").each { |r| r["obligation"] = "informative" }
|
132
|
+
xml.xpath("//preface//clause").each do |r|
|
133
|
+
r["obligation"] = "informative"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def obligations_cleanup_norm(xml)
|
138
|
+
s = xml.at("//clause[@type = 'scope']") and
|
139
|
+
s["obligation"] = "normative"
|
140
|
+
xml.xpath("//terms").each { |r| r["obligation"] = "normative" }
|
141
|
+
xml.xpath("//definitions").each { |r| r["obligation"] = "normative" }
|
142
|
+
end
|
143
|
+
|
144
|
+
def obligations_cleanup_inherit(xml)
|
145
|
+
xml.xpath("//annex | //clause[not(ancestor::boilerplate)]").each do |r|
|
146
|
+
r["obligation"] = "normative" unless r["obligation"]
|
147
|
+
end
|
148
|
+
xml.xpath(Utils::SUBCLAUSE_XPATH).each do |r|
|
149
|
+
o = r&.at("./ancestor::*/@obligation")&.text and r["obligation"] = o
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def clausebefore_cleanup(xmldoc)
|
154
|
+
preface_clausebefore_cleanup(xmldoc)
|
155
|
+
sections_clausebefore_cleanup(xmldoc)
|
156
|
+
end
|
157
|
+
|
158
|
+
def preface_clausebefore_cleanup(xmldoc)
|
159
|
+
return unless xmldoc.at("//preface")
|
160
|
+
|
161
|
+
unless ins = xmldoc.at("//preface").children.first
|
162
|
+
xmldoc.at("//preface") << " "
|
163
|
+
ins = xmldoc.at("//preface").children.first
|
164
|
+
end
|
165
|
+
xmldoc.xpath("//preface//*[@beforeclauses = 'true']").each do |x|
|
166
|
+
x.delete("beforeclauses")
|
167
|
+
ins.previous = x.remove
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def sections_clausebefore_cleanup(xmldoc)
|
172
|
+
return unless xmldoc.at("//sections")
|
173
|
+
|
174
|
+
unless ins = xmldoc.at("//sections").children.first
|
175
|
+
xmldoc.at("//sections") << " "
|
176
|
+
ins = xmldoc.at("//sections").children.first
|
177
|
+
end
|
178
|
+
xmldoc.xpath("//sections//*[@beforeclauses = 'true']").each do |x|
|
179
|
+
x.delete("beforeclauses")
|
180
|
+
ins.previous = x.remove
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def floatingtitle_cleanup(xmldoc)
|
185
|
+
pop_floating_title(xmldoc)
|
186
|
+
floating_title_preface2sections(xmldoc)
|
187
|
+
end
|
188
|
+
|
189
|
+
def pop_floating_title(xmldoc)
|
190
|
+
loop do
|
191
|
+
found = false
|
192
|
+
xmldoc.xpath("//floating-title").each do |t|
|
193
|
+
next unless t.next_element.nil?
|
194
|
+
next if %w(sections annex preface).include? t.parent.name
|
195
|
+
|
196
|
+
t.parent.parent << t
|
197
|
+
found = true
|
198
|
+
end
|
199
|
+
break unless found
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def floating_title_preface2sections(xmldoc)
|
204
|
+
t = xmldoc.at("//preface/floating-title") or return
|
205
|
+
s = xmldoc.at("//sections")
|
206
|
+
unless t.next_element
|
207
|
+
s.children.first.previous = t.remove
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module Metanorma
|
2
|
+
module Standoc
|
3
|
+
module Cleanup
|
4
|
+
def get_or_make_title(node)
|
5
|
+
unless node.at("./title")
|
6
|
+
if node.children.empty?
|
7
|
+
node << "<title/>"
|
8
|
+
else
|
9
|
+
node.children.first.previous = "<title/>"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
node.at("./title")
|
13
|
+
end
|
14
|
+
|
15
|
+
def replace_title(doc, xpath, text, first = false)
|
16
|
+
return unless text
|
17
|
+
|
18
|
+
doc.xpath(xpath).each_with_index do |node, i|
|
19
|
+
next if first && !i.zero?
|
20
|
+
|
21
|
+
title = get_or_make_title(node)
|
22
|
+
fn = title.xpath("./fn")
|
23
|
+
fn.each(&:remove)
|
24
|
+
title.children = text
|
25
|
+
fn.each { |n| title << n }
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def sections_names_cleanup(xml)
|
30
|
+
replace_title(xml, "//clause[@type = 'scope']", @i18n&.scope)
|
31
|
+
replace_title(xml, "//preface//abstract", @i18n&.abstract)
|
32
|
+
replace_title(xml, "//foreword", @i18n&.foreword)
|
33
|
+
replace_title(xml, "//introduction", @i18n&.introduction)
|
34
|
+
replace_title(xml, "//acknowledgements", @i18n&.acknowledgements)
|
35
|
+
section_names_refs_cleanup(xml)
|
36
|
+
section_names_terms_cleanup(xml)
|
37
|
+
end
|
38
|
+
|
39
|
+
def section_names_refs_cleanup(xml)
|
40
|
+
replace_title(xml, "//bibliography/references[@normative = 'true']",
|
41
|
+
@i18n&.normref, true)
|
42
|
+
replace_title(xml, "//bibliography/references[@normative = 'false']",
|
43
|
+
@i18n&.bibliography, true)
|
44
|
+
end
|
45
|
+
|
46
|
+
NO_SYMABBR = "[.//definitions[not(@type)]]".freeze
|
47
|
+
SYMABBR = "[.//definitions[@type = 'symbols']]"\
|
48
|
+
"[.//definitions[@type = 'abbreviated_terms']]".freeze
|
49
|
+
SYMnoABBR = "[.//definitions[@type = 'symbols']]"\
|
50
|
+
"[not(.//definitions[@type = 'abbreviated_terms'])]".freeze
|
51
|
+
ABBRnoSYM = "[.//definitions[@type = 'abbreviated_terms']]"\
|
52
|
+
"[not(.//definitions[@type = 'symbols'])]".freeze
|
53
|
+
|
54
|
+
def section_names_terms_cleanup(xml)
|
55
|
+
replace_title(xml, "//definitions[@type = 'symbols']", @i18n&.symbols)
|
56
|
+
replace_title(xml, "//definitions[@type = 'abbreviated_terms']",
|
57
|
+
@i18n&.abbrev)
|
58
|
+
replace_title(xml, "//definitions[not(@type)]", @i18n&.symbolsabbrev)
|
59
|
+
replace_title(xml, "//terms#{SYMnoABBR} | //clause[.//terms]#{SYMnoABBR}",
|
60
|
+
@i18n&.termsdefsymbols, true)
|
61
|
+
replace_title(xml, "//terms#{ABBRnoSYM} | //clause[.//terms]#{ABBRnoSYM}",
|
62
|
+
@i18n&.termsdefabbrev, true)
|
63
|
+
replace_title(xml, "//terms#{SYMABBR} | //clause[.//terms]#{SYMABBR}",
|
64
|
+
@i18n&.termsdefsymbolsabbrev, true)
|
65
|
+
replace_title(xml, "//terms#{NO_SYMABBR} | //clause[.//terms]#{NO_SYMABBR}",
|
66
|
+
@i18n&.termsdefsymbolsabbrev, true)
|
67
|
+
replace_title(
|
68
|
+
xml,
|
69
|
+
"//terms[not(.//definitions)] | //clause[.//terms][not(.//definitions)]",
|
70
|
+
@i18n&.termsdef, true
|
71
|
+
)
|
72
|
+
end
|
73
|
+
|
74
|
+
SECTION_CONTAINERS = %w(foreword introduction acknowledgements abstract
|
75
|
+
clause clause references terms definitions annex
|
76
|
+
appendix).freeze
|
77
|
+
|
78
|
+
def sections_variant_title_cleanup(xml)
|
79
|
+
path = SECTION_CONTAINERS.map { |x| "./ancestor::#{x}" }.join(" | ")
|
80
|
+
xml.xpath("//p[@variant_title]").each do |p|
|
81
|
+
p.name = "variant-title"
|
82
|
+
p.delete("id")
|
83
|
+
p.delete("variant_title")
|
84
|
+
p.xpath("(#{path})[last()]").each do |sect|
|
85
|
+
(ins = sect.at("./title") and ins.next = p) or
|
86
|
+
sect.children.first.previous = p
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|