metanorma-standoc 1.10.6 → 1.11.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +19 -23
- data/Rakefile +1 -1
- data/lib/asciidoctor/standoc/base.rb +10 -17
- data/lib/asciidoctor/standoc/basicdoc.rng +21 -4
- data/lib/asciidoctor/standoc/blocks.rb +23 -23
- data/lib/asciidoctor/standoc/blocks_notes.rb +17 -22
- data/lib/asciidoctor/standoc/cleanup.rb +46 -12
- data/lib/asciidoctor/standoc/cleanup_block.rb +3 -71
- data/lib/asciidoctor/standoc/cleanup_image.rb +6 -7
- data/lib/asciidoctor/standoc/cleanup_inline.rb +42 -106
- data/lib/asciidoctor/standoc/cleanup_maths.rb +5 -6
- data/lib/asciidoctor/standoc/cleanup_ref.rb +5 -0
- data/lib/asciidoctor/standoc/cleanup_reqt.rb +5 -24
- data/lib/asciidoctor/standoc/cleanup_section_names.rb +5 -5
- data/lib/asciidoctor/standoc/cleanup_symbols.rb +48 -0
- data/lib/asciidoctor/standoc/cleanup_table.rb +68 -0
- data/lib/asciidoctor/standoc/cleanup_terms.rb +37 -77
- data/lib/asciidoctor/standoc/cleanup_terms_designations.rb +162 -0
- data/lib/asciidoctor/standoc/cleanup_text.rb +5 -2
- data/lib/asciidoctor/standoc/cleanup_xref.rb +107 -0
- data/lib/asciidoctor/standoc/converter.rb +14 -0
- data/lib/asciidoctor/standoc/inline.rb +7 -5
- data/lib/asciidoctor/standoc/isodoc.rng +419 -77
- data/lib/asciidoctor/standoc/lists.rb +15 -15
- data/lib/asciidoctor/standoc/macros.rb +14 -43
- data/lib/asciidoctor/standoc/macros_note.rb +45 -0
- data/lib/asciidoctor/standoc/macros_plantuml.rb +29 -14
- data/lib/asciidoctor/standoc/macros_terms.rb +55 -8
- data/lib/asciidoctor/standoc/ref_sect.rb +26 -18
- data/lib/asciidoctor/standoc/reqt.rng +23 -2
- data/lib/asciidoctor/standoc/term_lookup_cleanup.rb +50 -11
- data/lib/asciidoctor/standoc/terms.rb +12 -2
- data/lib/asciidoctor/standoc/utils.rb +36 -23
- data/lib/asciidoctor/standoc/validate.rb +45 -27
- data/lib/asciidoctor/standoc/validate_section.rb +5 -2
- data/lib/metanorma/standoc/version.rb +1 -1
- data/metanorma-standoc.gemspec +1 -1
- data/spec/asciidoctor/base_spec.rb +4 -36
- data/spec/asciidoctor/blank_spec.rb +37 -0
- data/spec/asciidoctor/blocks_spec.rb +208 -49
- data/spec/asciidoctor/cleanup_sections_spec.rb +153 -12
- data/spec/asciidoctor/cleanup_spec.rb +104 -285
- data/spec/asciidoctor/cleanup_terms_spec.rb +990 -0
- data/spec/asciidoctor/inline_spec.rb +38 -2
- data/spec/asciidoctor/lists_spec.rb +6 -6
- data/spec/asciidoctor/macros_plantuml_spec.rb +37 -2
- data/spec/asciidoctor/macros_spec.rb +191 -114
- data/spec/asciidoctor/refs_spec.rb +12 -30
- data/spec/asciidoctor/section_spec.rb +18 -18
- data/spec/asciidoctor/validate_spec.rb +87 -2
- data/spec/fixtures/datamodel_description_sections_tree.xml +3 -2
- data/spec/spec_helper.rb +6 -7
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +51 -51
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +13 -13
- data/spec/vcr_cassettes/isobib_get_123.yml +13 -13
- data/spec/vcr_cassettes/isobib_get_123_1.yml +26 -26
- data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +34 -34
- data/spec/vcr_cassettes/isobib_get_123_2001.yml +12 -12
- data/spec/vcr_cassettes/isobib_get_124.yml +13 -13
- data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +16 -16
- data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +51 -49
- metadata +12 -5
@@ -1,4 +1,5 @@
|
|
1
1
|
require "metanorma-utils"
|
2
|
+
require "digest"
|
2
3
|
|
3
4
|
module Asciidoctor
|
4
5
|
module Standoc
|
@@ -55,119 +56,30 @@ module Asciidoctor
|
|
55
56
|
end
|
56
57
|
end
|
57
58
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
(?<ref>[^"][^ \\t\\n,:-]*|"[^"]+")
|
64
|
-
(-(?<to>[^"][^ \\t\\n,:-]*|"[^"]"))?|
|
65
|
-
(?<locality2>whole|locality:[^ \\t\\n\\r:,;=]+))(?<punct>[,:;]?)\\s*
|
66
|
-
(?<text>.*)$
|
67
|
-
REGEXP
|
68
|
-
LOCALITY_RE = Regexp.new(LOCALITY_REGEX_STR.gsub(/\s/, ""),
|
69
|
-
Regexp::IGNORECASE | Regexp::MULTILINE)
|
70
|
-
|
71
|
-
def tq(text)
|
72
|
-
text.sub(/^"/, "").sub(/"$/, "")
|
73
|
-
end
|
74
|
-
|
75
|
-
def extract_localities(elem)
|
76
|
-
f = elem&.children&.first or return
|
77
|
-
f.text? or return
|
78
|
-
head = f.remove.text
|
79
|
-
tail = elem&.children&.remove
|
80
|
-
extract_localities1(elem, head)
|
81
|
-
tail and elem << tail
|
82
|
-
end
|
83
|
-
|
84
|
-
def extract_localities1(elem, text)
|
85
|
-
b = elem.add_child("<localityStack/>").first if LOCALITY_RE.match text
|
86
|
-
while (m = LOCALITY_RE.match text)
|
87
|
-
ref = m[:ref] ? "<referenceFrom>#{tq m[:ref]}</referenceFrom>" : ""
|
88
|
-
refto = m[:to] ? "<referenceTo>#{tq m[:to]}</referenceTo>" : ""
|
89
|
-
b.add_child("<locality type='#{locality_label(m)}'>#{ref}#{refto}"\
|
90
|
-
"</locality>")
|
91
|
-
text = m[:text]
|
92
|
-
b = elem.add_child("<localityStack/>").first if m[:punct] == ";"
|
93
|
-
end
|
94
|
-
elem.add_child(text) if text
|
95
|
-
end
|
96
|
-
|
97
|
-
def locality_label(match)
|
98
|
-
loc = match[:locality] || match[:locality2]
|
99
|
-
/^locality:/.match?(loc) ? loc : loc&.downcase
|
100
|
-
end
|
101
|
-
|
102
|
-
def xref_to_eref(elem)
|
103
|
-
elem["bibitemid"] = elem["target"]
|
104
|
-
unless elem["citeas"] = @anchors&.dig(elem["target"], :xref)
|
105
|
-
@internal_eref_namespaces.include?(elem["type"]) or
|
106
|
-
@log.add("Crossreferences", elem,
|
107
|
-
"#{elem['target']} does not have a corresponding "\
|
108
|
-
"anchor ID in the bibliography!")
|
109
|
-
end
|
110
|
-
elem.delete("target")
|
111
|
-
extract_localities(elem) unless elem.children.empty?
|
112
|
-
end
|
113
|
-
|
114
|
-
def xref_cleanup(xmldoc)
|
115
|
-
xmldoc.xpath("//xref").each do |x|
|
116
|
-
/:/.match(x["target"]) and xref_to_internal_eref(x)
|
117
|
-
next unless x.name == "xref"
|
118
|
-
|
119
|
-
if refid? x["target"]
|
120
|
-
x.name = "eref"
|
121
|
-
xref_to_eref(x)
|
122
|
-
else x.delete("type")
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
def xref_to_internal_eref(elem)
|
128
|
-
a = elem["target"].split(":", 3)
|
129
|
-
unless a.size < 2 || a[0].empty? || a[1].empty?
|
130
|
-
elem["target"] = "#{a[0]}_#{a[1]}"
|
131
|
-
a.size > 2 and
|
132
|
-
elem.children = %{anchor="#{a[2..-1].join}",#{elem&.children&.text}}
|
133
|
-
elem["type"] = a[0]
|
134
|
-
@internal_eref_namespaces << a[0]
|
135
|
-
elem.name = "eref"
|
136
|
-
xref_to_eref(elem)
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
def quotesource_cleanup(xmldoc)
|
141
|
-
xmldoc.xpath("//quote/source | //terms/source").each do |x|
|
142
|
-
xref_to_eref(x)
|
59
|
+
def concept_cleanup(xmldoc)
|
60
|
+
xmldoc.xpath("//concept[not(termxref)]").each do |x|
|
61
|
+
term = x.at("./refterm")
|
62
|
+
term&.remove if term&.text&.empty?
|
63
|
+
concept_cleanup1(x)
|
143
64
|
end
|
144
65
|
end
|
145
66
|
|
146
|
-
def
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
x["citeas"] = @anchors&.dig(x["bibitemid"], :xref) or
|
153
|
-
@log.add("Crossreferences", x,
|
154
|
-
"#{x['bibitemid']} does not have a corresponding anchor "\
|
155
|
-
"ID in the bibliography!")
|
156
|
-
extract_localities(x) unless x.children.empty?
|
67
|
+
def concept_cleanup1(elem)
|
68
|
+
elem.children.remove if elem&.children&.text&.strip&.empty?
|
69
|
+
key_extract_locality(elem)
|
70
|
+
if /:/.match?(elem["key"]) then concept_termbase_cleanup(elem)
|
71
|
+
elsif refid? elem["key"] then concept_eref_cleanup(elem)
|
72
|
+
else concept_xref_cleanup(elem)
|
157
73
|
end
|
74
|
+
elem.delete("key")
|
158
75
|
end
|
159
76
|
|
160
|
-
def
|
161
|
-
xmldoc.xpath("//
|
77
|
+
def related_cleanup(xmldoc)
|
78
|
+
xmldoc.xpath("//related[not(termxref)]").each do |x|
|
162
79
|
term = x.at("./refterm")
|
163
|
-
term
|
164
|
-
|
165
|
-
|
166
|
-
if /:/.match?(x["key"]) then concept_termbase_cleanup(x)
|
167
|
-
elsif refid? x["key"] then concept_eref_cleanup(x)
|
168
|
-
else concept_xref_cleanup(x)
|
169
|
-
end
|
170
|
-
x.delete("key")
|
80
|
+
term.replace("<preferred>#{term_expr(term.children.to_xml)}"\
|
81
|
+
"</preferred>")
|
82
|
+
concept_cleanup1(x)
|
171
83
|
end
|
172
84
|
end
|
173
85
|
|
@@ -214,6 +126,7 @@ module Asciidoctor
|
|
214
126
|
def anchor_cleanup(elem)
|
215
127
|
anchor_cleanup1(elem)
|
216
128
|
xreftarget_cleanup(elem)
|
129
|
+
contenthash_id_cleanup(elem)
|
217
130
|
end
|
218
131
|
|
219
132
|
def anchor_cleanup1(elem)
|
@@ -239,6 +152,29 @@ module Asciidoctor
|
|
239
152
|
end
|
240
153
|
end
|
241
154
|
end
|
155
|
+
|
156
|
+
def guid?(str)
|
157
|
+
/^_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/
|
158
|
+
.match?(str)
|
159
|
+
end
|
160
|
+
|
161
|
+
def contenthash_id_cleanup(doc)
|
162
|
+
ids = doc.xpath("//*[@id]").each_with_object({}) do |x, m|
|
163
|
+
next unless guid?(x["id"])
|
164
|
+
|
165
|
+
m[x["id"]] = contenthash(x)
|
166
|
+
x["id"] = m[x["id"]]
|
167
|
+
end
|
168
|
+
[%w(review from), %(review to), %(callout target), %(eref bibitemid),
|
169
|
+
%(citation bibitemid), %(xref target), %(xref to)].each do |a|
|
170
|
+
doc.xpath("//#{a[0]}").each { |x| ids[a[1]] and x[a[1]] = ids[a[1]] }
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
def contenthash(elem)
|
175
|
+
Digest::MD5.hexdigest("#{elem.path}////#{elem.text}")
|
176
|
+
.sub(/^(.{8})(.{4})(.{4})(.{4})(.{12})$/, "_\\1-\\2-\\3-\\4-\\5")
|
177
|
+
end
|
242
178
|
end
|
243
179
|
end
|
244
180
|
end
|
@@ -69,13 +69,12 @@ module Asciidoctor
|
|
69
69
|
return false if char.length > 1
|
70
70
|
|
71
71
|
if /\p{Greek}/.match?(char)
|
72
|
-
/\p{Lower}/.match(char) && !mathml_mi_italics[:lowergreek] ||
|
73
|
-
/\p{Upper}/.match(char) && !mathml_mi_italics[:uppergreek]
|
72
|
+
(/\p{Lower}/.match(char) && !mathml_mi_italics[:lowergreek]) ||
|
73
|
+
(/\p{Upper}/.match(char) && !mathml_mi_italics[:uppergreek])
|
74
74
|
elsif /\p{Latin}/.match?(char)
|
75
|
-
/\p{Lower}/.match(char) && !mathml_mi_italics[:lowerroman] ||
|
76
|
-
/\p{Upper}/.match(char) && !mathml_mi_italics[:upperroman]
|
77
|
-
else
|
78
|
-
false
|
75
|
+
(/\p{Lower}/.match(char) && !mathml_mi_italics[:lowerroman]) ||
|
76
|
+
(/\p{Upper}/.match(char) && !mathml_mi_italics[:upperroman])
|
77
|
+
else false
|
79
78
|
end
|
80
79
|
end
|
81
80
|
|
@@ -72,7 +72,7 @@ module Asciidoctor
|
|
72
72
|
def requirement_metadata(xmldoc)
|
73
73
|
xmldoc.xpath(REQRECPER).each do |r|
|
74
74
|
dl = r&.at("./dl[@metadata = 'true']")&.remove or next
|
75
|
-
requirement_metadata1(r, dl)
|
75
|
+
requirement_metadata1(r, dl, r.at("./title"))
|
76
76
|
end
|
77
77
|
end
|
78
78
|
|
@@ -80,39 +80,20 @@ module Asciidoctor
|
|
80
80
|
%w(label subject inherit)
|
81
81
|
end
|
82
82
|
|
83
|
-
def requirement_metadata1(reqt, dlist)
|
84
|
-
unless ins
|
83
|
+
def requirement_metadata1(reqt, dlist, ins)
|
84
|
+
unless ins
|
85
85
|
reqt.children.first.previous = " "
|
86
86
|
ins = reqt.children.first
|
87
87
|
end
|
88
88
|
%w(obligation model type).each do |a|
|
89
|
-
|
89
|
+
dl_to_attrs(reqt, dlist, a)
|
90
90
|
end
|
91
91
|
requirement_metadata1_tags.each do |a|
|
92
|
-
ins =
|
92
|
+
ins = dl_to_elems(ins, reqt, dlist, a)
|
93
93
|
end
|
94
94
|
reqt_dl_to_classif(ins, reqt, dlist)
|
95
95
|
end
|
96
96
|
|
97
|
-
def reqt_dl_to_attrs(reqt, dlist, name)
|
98
|
-
e = dlist.at("./dt[text()='#{name}']") or return
|
99
|
-
val = e.at("./following::dd/p") || e.at("./following::dd") or return
|
100
|
-
reqt[name] = val.text
|
101
|
-
end
|
102
|
-
|
103
|
-
def reqt_dl_to_elems(ins, reqt, dlist, name)
|
104
|
-
if a = reqt.at("./#{name}[last()]")
|
105
|
-
ins = a
|
106
|
-
end
|
107
|
-
dlist.xpath("./dt[text()='#{name}']").each do |e|
|
108
|
-
val = e.at("./following::dd/p") || e.at("./following::dd")
|
109
|
-
val.name = name
|
110
|
-
ins.next = val
|
111
|
-
ins = ins.next
|
112
|
-
end
|
113
|
-
ins
|
114
|
-
end
|
115
|
-
|
116
97
|
def reqt_dl_to_classif(ins, reqt, dlist)
|
117
98
|
if a = reqt.at("./classification[last()]") then ins = a end
|
118
99
|
dlist.xpath("./dt[text()='classification']").each do |e|
|
@@ -78,12 +78,12 @@ module Asciidoctor
|
|
78
78
|
def sections_variant_title_cleanup(xml)
|
79
79
|
path = SECTION_CONTAINERS.map { |x| "./ancestor::#{x}" }.join(" | ")
|
80
80
|
xml.xpath("//p[@variant_title]").each do |p|
|
81
|
+
p.name = "variant-title"
|
82
|
+
p.delete("id")
|
83
|
+
p.delete("variant_title")
|
81
84
|
p.xpath("(#{path})[last()]").each do |sect|
|
82
|
-
|
83
|
-
|
84
|
-
if ins = sect.at("./title") then ins.next = p
|
85
|
-
else sect.children.first.previous = p
|
86
|
-
end
|
85
|
+
ins = sect.at("./title") and ins.next = p or
|
86
|
+
sect.children.first.previous = p
|
87
87
|
end
|
88
88
|
end
|
89
89
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Asciidoctor
|
2
|
+
module Standoc
|
3
|
+
module Cleanup
|
4
|
+
# Indices sort after letter but before any following
|
5
|
+
# letter (x, x_m, x_1, xa); we use colon to force that sort order.
|
6
|
+
# Numbers sort *after* letters; we use thorn to force that sort order.
|
7
|
+
def symbol_key(sym)
|
8
|
+
key = sym.dup
|
9
|
+
key.traverse do |n|
|
10
|
+
n.name == "math" and
|
11
|
+
n.replace(grkletters(MathML2AsciiMath.m2a(n.to_xml)))
|
12
|
+
end
|
13
|
+
ret = Nokogiri::XML(key.to_xml)
|
14
|
+
HTMLEntities.new.decode(ret.text.downcase)
|
15
|
+
.gsub(/[\[\]{}<>()]/, "").gsub(/\s/m, "")
|
16
|
+
.gsub(/[[:punct:]]|[_^]/, ":\\0").gsub(/`/, "")
|
17
|
+
.gsub(/[0-9]+/, "þ\\0")
|
18
|
+
end
|
19
|
+
|
20
|
+
def grkletters(text)
|
21
|
+
text.gsub(/\b(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|
|
22
|
+
lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|
|
23
|
+
psi|omega)\b/xi, "&\\1;")
|
24
|
+
end
|
25
|
+
|
26
|
+
def extract_symbols_list(dlist)
|
27
|
+
dl_out = []
|
28
|
+
dlist.xpath("./dt | ./dd").each do |dtd|
|
29
|
+
if dtd.name == "dt"
|
30
|
+
dl_out << { dt: dtd.remove, key: symbol_key(dtd) }
|
31
|
+
else
|
32
|
+
dl_out.last[:dd] = dtd.remove
|
33
|
+
end
|
34
|
+
end
|
35
|
+
dl_out
|
36
|
+
end
|
37
|
+
|
38
|
+
def symbols_cleanup(docxml)
|
39
|
+
docxml.xpath("//definitions/dl").each do |dl|
|
40
|
+
dl_out = extract_symbols_list(dl)
|
41
|
+
dl_out.sort! { |a, b| a[:key] <=> b[:key] || a[:dt] <=> b[:dt] }
|
42
|
+
dl.children = dl_out.map { |d| d[:dt].to_s + d[:dd].to_s }.join("\n")
|
43
|
+
end
|
44
|
+
docxml
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Asciidoctor
|
2
|
+
module Standoc
|
3
|
+
module Cleanup
|
4
|
+
def dl1_table_cleanup(xmldoc)
|
5
|
+
q = "//table/following-sibling::*[1][self::dl]"
|
6
|
+
xmldoc.xpath(q).each do |s|
|
7
|
+
s["key"] == "true" and s.previous_element << s.remove
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
# move Key dl after table footer
|
12
|
+
def dl2_table_cleanup(xmldoc)
|
13
|
+
q = "//table/following-sibling::*[1][self::p]"
|
14
|
+
xmldoc.xpath(q).each do |s|
|
15
|
+
if s.text =~ /^\s*key[^a-z]*$/i && s&.next_element&.name == "dl"
|
16
|
+
s.next_element["key"] = "true"
|
17
|
+
s.previous_element << s.next_element.remove
|
18
|
+
s.remove
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def insert_thead(table)
|
24
|
+
thead = table.at("./thead")
|
25
|
+
return thead unless thead.nil?
|
26
|
+
|
27
|
+
if tname = table.at("./name")
|
28
|
+
thead = tname.add_next_sibling("<thead/>").first
|
29
|
+
return thead
|
30
|
+
end
|
31
|
+
table.children.first.add_previous_sibling("<thead/>").first
|
32
|
+
end
|
33
|
+
|
34
|
+
def header_rows_cleanup(xmldoc)
|
35
|
+
xmldoc.xpath("//table[@headerrows]").each do |s|
|
36
|
+
thead = insert_thead(s)
|
37
|
+
(thead.xpath("./tr").size...s["headerrows"].to_i).each do
|
38
|
+
row = s.at("./tbody/tr")
|
39
|
+
row.parent = thead
|
40
|
+
end
|
41
|
+
thead.xpath(".//td").each { |n| n.name = "th" }
|
42
|
+
s.delete("headerrows")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def table_cleanup(xmldoc)
|
47
|
+
dl1_table_cleanup(xmldoc)
|
48
|
+
dl2_table_cleanup(xmldoc)
|
49
|
+
notes_table_cleanup(xmldoc)
|
50
|
+
header_rows_cleanup(xmldoc)
|
51
|
+
end
|
52
|
+
|
53
|
+
# move notes into table
|
54
|
+
def notes_table_cleanup(xmldoc)
|
55
|
+
nomatches = false
|
56
|
+
until nomatches
|
57
|
+
nomatches = true
|
58
|
+
xmldoc.xpath("//table/following-sibling::*[1]"\
|
59
|
+
"[self::note[not(@keep-separate = 'true')]]").each do |n|
|
60
|
+
n.delete("keep-separate")
|
61
|
+
n.previous_element << n.remove
|
62
|
+
nomatches = false
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -1,19 +1,9 @@
|
|
1
1
|
require_relative "term_lookup_cleanup"
|
2
|
+
require_relative "cleanup_terms_designations"
|
2
3
|
|
3
4
|
module Asciidoctor
|
4
5
|
module Standoc
|
5
6
|
module Cleanup
|
6
|
-
def termdef_stem_cleanup(xmldoc)
|
7
|
-
xmldoc.xpath("//term/p/stem").each do |a|
|
8
|
-
if a.parent.elements.size == 1 # para contains just a stem expression
|
9
|
-
t = Nokogiri::XML::Element.new("admitted", xmldoc)
|
10
|
-
parent = a.parent
|
11
|
-
t.children = a.remove
|
12
|
-
parent.replace(t)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
7
|
def termdomain_cleanup(xmldoc)
|
18
8
|
xmldoc.xpath("//p/domain").each do |a|
|
19
9
|
prev = a.parent.previous
|
@@ -22,43 +12,53 @@ module Asciidoctor
|
|
22
12
|
end
|
23
13
|
|
24
14
|
def termdomain1_cleanup(xmldoc)
|
25
|
-
xmldoc.xpath("//
|
26
|
-
|
27
|
-
|
15
|
+
xmldoc.xpath("//term").each do |t|
|
16
|
+
d = t.xpath("./domain | ./subject | ./usageinfo").last or next
|
17
|
+
defn = d.at("../definition") and defn.previous = d.remove
|
28
18
|
end
|
29
19
|
end
|
30
20
|
|
31
21
|
def termdefinition_cleanup(xmldoc)
|
22
|
+
generate_termdefinitions(xmldoc)
|
23
|
+
split_termdefinitions(xmldoc)
|
24
|
+
end
|
25
|
+
|
26
|
+
def generate_termdefinitions(xmldoc)
|
32
27
|
xmldoc.xpath("//term[not(definition)]").each do |d|
|
33
|
-
first_child = d.at("./p | ./figure | ./formula") || next
|
28
|
+
first_child = d.at("./p | ./figure | ./formula | ./table") || next
|
34
29
|
t = Nokogiri::XML::Element.new("definition", xmldoc)
|
35
30
|
first_child.replace(t)
|
36
31
|
t << first_child.remove
|
37
|
-
d.xpath("./p | ./figure | ./formula").each { |n| t << n.remove }
|
32
|
+
d.xpath("./p | ./figure | ./formula | ./table").each { |n| t << n.remove }
|
38
33
|
end
|
39
34
|
end
|
40
35
|
|
41
|
-
def
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
36
|
+
def split_termdefinitions(xmldoc)
|
37
|
+
xmldoc.xpath("//definition").each do |d|
|
38
|
+
n = d.children.first.add_previous_sibling("<nonverbalrepresentation/>").first
|
39
|
+
v = d.children.first.add_previous_sibling("<verbaldefinition/>").first
|
40
|
+
nonverb = false
|
41
|
+
d.elements.each do |e|
|
42
|
+
case e.name
|
43
|
+
when "nonverbalrepresentation", "verbaldefinition" then next
|
44
|
+
when "figure", "table", "formula"
|
45
|
+
n << e.remove
|
46
|
+
nonverb = true
|
47
|
+
when "termsource"
|
48
|
+
(nonverb ? n : v) << e.remove
|
49
|
+
else v << e.remove
|
50
|
+
end
|
51
|
+
end
|
47
52
|
end
|
48
53
|
end
|
49
54
|
|
50
|
-
def termdef_subclause_cleanup(xmldoc)
|
51
|
-
xmldoc.xpath("//terms[terms]").each { |t| t.name = "clause" }
|
52
|
-
end
|
53
|
-
|
54
55
|
def termdocsource_cleanup(xmldoc)
|
55
56
|
f = xmldoc.at("//preface | //sections")
|
56
|
-
xmldoc.xpath("//termdocsource").each
|
57
|
-
f.previous = s.remove
|
58
|
-
end
|
57
|
+
xmldoc.xpath("//termdocsource").each { |s| f.previous = s.remove }
|
59
58
|
end
|
60
59
|
|
61
60
|
def term_children_cleanup(xmldoc)
|
61
|
+
xmldoc.xpath("//terms[terms]").each { |t| t.name = "clause" }
|
62
62
|
xmldoc.xpath("//term").each do |t|
|
63
63
|
%w(termnote termexample termsource).each do |w|
|
64
64
|
t.xpath("./#{w}").each { |n| t << n.remove }
|
@@ -75,69 +75,29 @@ module Asciidoctor
|
|
75
75
|
end
|
76
76
|
|
77
77
|
def termnote_example_cleanup(xmldoc)
|
78
|
-
|
79
|
-
|
78
|
+
%w(note example).each do |w|
|
79
|
+
xmldoc.xpath("//term#{w}[not(ancestor::term)]").each do |x|
|
80
|
+
x.name = w
|
80
81
|
end
|
81
|
-
xmldoc.xpath("//termexample[not(ancestor::term)]").each do |x|
|
82
|
-
x.name = "example"
|
83
82
|
end
|
84
83
|
end
|
85
84
|
|
86
85
|
def termdef_cleanup(xmldoc)
|
86
|
+
termdef_unnest_cleanup(xmldoc)
|
87
87
|
Asciidoctor::Standoc::TermLookupCleanup.new(xmldoc, @log).call
|
88
|
+
term_nonverbal_designations(xmldoc)
|
89
|
+
term_dl_to_metadata(xmldoc)
|
90
|
+
term_termsource_to_designation(xmldoc)
|
91
|
+
term_designation_reorder(xmldoc)
|
88
92
|
termdef_from_termbase(xmldoc)
|
89
|
-
termdef_unnest_cleanup(xmldoc)
|
90
93
|
termdef_stem_cleanup(xmldoc)
|
91
94
|
termdomain_cleanup(xmldoc)
|
92
95
|
termdefinition_cleanup(xmldoc)
|
93
96
|
termdomain1_cleanup(xmldoc)
|
94
97
|
termnote_example_cleanup(xmldoc)
|
95
|
-
termdef_subclause_cleanup(xmldoc)
|
96
98
|
term_children_cleanup(xmldoc)
|
97
99
|
termdocsource_cleanup(xmldoc)
|
98
100
|
end
|
99
|
-
|
100
|
-
# Indices sort after letter but before any following
|
101
|
-
# letter (x, x_m, x_1, xa); we use colon to force that sort order.
|
102
|
-
# Numbers sort *after* letters; we use thorn to force that sort order.
|
103
|
-
def symbol_key(sym)
|
104
|
-
key = sym.dup
|
105
|
-
key.traverse do |n|
|
106
|
-
next unless n.name == "math"
|
107
|
-
|
108
|
-
n.replace(grkletters(MathML2AsciiMath.m2a(n.to_xml)))
|
109
|
-
end
|
110
|
-
ret = Nokogiri::XML(key.to_xml)
|
111
|
-
HTMLEntities.new.decode(ret.text.downcase)
|
112
|
-
.gsub(/[\[\]{}<>()]/, "").gsub(/\s/m, "")
|
113
|
-
.gsub(/[[:punct:]]|[_^]/, ":\\0").gsub(/`/, "")
|
114
|
-
.gsub(/[0-9]+/, "þ\\0")
|
115
|
-
end
|
116
|
-
|
117
|
-
def grkletters(x)
|
118
|
-
x.gsub(/\b(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)\b/i, "&\\1;")
|
119
|
-
end
|
120
|
-
|
121
|
-
def extract_symbols_list(dlist)
|
122
|
-
dl_out = []
|
123
|
-
dlist.xpath("./dt | ./dd").each do |dtd|
|
124
|
-
if dtd.name == "dt"
|
125
|
-
dl_out << { dt: dtd.remove, key: symbol_key(dtd) }
|
126
|
-
else
|
127
|
-
dl_out.last[:dd] = dtd.remove
|
128
|
-
end
|
129
|
-
end
|
130
|
-
dl_out
|
131
|
-
end
|
132
|
-
|
133
|
-
def symbols_cleanup(docxml)
|
134
|
-
docxml.xpath("//definitions/dl").each do |dl|
|
135
|
-
dl_out = extract_symbols_list(dl)
|
136
|
-
dl_out.sort! { |a, b| a[:key] <=> b[:key] || a[:dt] <=> b[:dt] }
|
137
|
-
dl.children = dl_out.map { |d| d[:dt].to_s + d[:dd].to_s }.join("\n")
|
138
|
-
end
|
139
|
-
docxml
|
140
|
-
end
|
141
101
|
end
|
142
102
|
end
|
143
103
|
end
|