metanorma-standoc 1.11.3 → 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +3 -31
- data/.gitignore +23 -0
- data/Gemfile +0 -1
- data/lib/asciidoctor/standoc/base.rb +2 -145
- data/lib/asciidoctor/standoc/blocks.rb +2 -238
- data/lib/asciidoctor/standoc/blocks_notes.rb +2 -100
- data/lib/asciidoctor/standoc/cleanup.rb +2 -208
- data/lib/asciidoctor/standoc/cleanup_amend.rb +2 -53
- data/lib/asciidoctor/standoc/cleanup_block.rb +2 -172
- data/lib/asciidoctor/standoc/cleanup_boilerplate.rb +2 -212
- data/lib/asciidoctor/standoc/cleanup_footnotes.rb +2 -108
- data/lib/asciidoctor/standoc/cleanup_image.rb +2 -69
- data/lib/asciidoctor/standoc/cleanup_inline.rb +2 -189
- data/lib/asciidoctor/standoc/cleanup_maths.rb +2 -221
- data/lib/asciidoctor/standoc/cleanup_ref.rb +2 -169
- data/lib/asciidoctor/standoc/cleanup_ref_dl.rb +2 -103
- data/lib/asciidoctor/standoc/cleanup_reqt.rb +2 -110
- data/lib/asciidoctor/standoc/cleanup_section.rb +2 -184
- data/lib/asciidoctor/standoc/cleanup_section_names.rb +2 -91
- data/lib/asciidoctor/standoc/cleanup_symbols.rb +2 -47
- data/lib/asciidoctor/standoc/cleanup_table.rb +2 -67
- data/lib/asciidoctor/standoc/cleanup_terms.rb +2 -139
- data/lib/asciidoctor/standoc/cleanup_terms_designations.rb +2 -192
- data/lib/asciidoctor/standoc/cleanup_text.rb +2 -95
- data/lib/asciidoctor/standoc/cleanup_toc.rb +3 -0
- data/lib/asciidoctor/standoc/cleanup_xref.rb +2 -106
- data/lib/asciidoctor/standoc/converter.rb +2 -123
- data/lib/asciidoctor/standoc/datamodel/attributes_table_preprocessor.rb +2 -56
- data/lib/asciidoctor/standoc/datamodel/diagram_preprocessor.rb +2 -102
- data/lib/asciidoctor/standoc/datamodel/plantuml_renderer.rb +3 -404
- data/lib/asciidoctor/standoc/deprecated.rb +5 -0
- data/lib/asciidoctor/standoc/front.rb +2 -219
- data/lib/asciidoctor/standoc/front_contributor.rb +2 -191
- data/lib/asciidoctor/standoc/inline.rb +2 -231
- data/lib/asciidoctor/standoc/lists.rb +2 -119
- data/lib/asciidoctor/standoc/macros.rb +2 -203
- data/lib/asciidoctor/standoc/macros_form.rb +2 -62
- data/lib/asciidoctor/standoc/macros_note.rb +2 -44
- data/lib/asciidoctor/standoc/macros_plantuml.rb +2 -112
- data/lib/asciidoctor/standoc/macros_terms.rb +2 -180
- data/lib/asciidoctor/standoc/ref.rb +2 -251
- data/lib/asciidoctor/standoc/ref_sect.rb +2 -153
- data/lib/asciidoctor/standoc/ref_utility.rb +2 -0
- data/lib/asciidoctor/standoc/render.rb +2 -116
- data/lib/asciidoctor/standoc/reqt.rb +2 -89
- data/lib/asciidoctor/standoc/section.rb +2 -194
- data/lib/asciidoctor/standoc/table.rb +2 -84
- data/lib/asciidoctor/standoc/term_lookup_cleanup.rb +2 -178
- data/lib/asciidoctor/standoc/terms.rb +2 -153
- data/lib/asciidoctor/standoc/utils.rb +2 -100
- data/lib/asciidoctor/standoc/validate.rb +2 -157
- data/lib/asciidoctor/standoc/validate_section.rb +2 -54
- data/lib/isodoc/html/htmlstyle.css +44 -29
- data/lib/isodoc/html/htmlstyle.scss +17 -12
- data/lib/metanorma/standoc/base.rb +163 -0
- data/lib/{asciidoctor → metanorma}/standoc/basicdoc.rng +0 -0
- data/lib/{asciidoctor → metanorma}/standoc/biblio.rng +2 -2
- data/lib/metanorma/standoc/blocks.rb +239 -0
- data/lib/metanorma/standoc/blocks_notes.rb +101 -0
- data/lib/metanorma/standoc/cleanup.rb +157 -0
- data/lib/metanorma/standoc/cleanup_amend.rb +54 -0
- data/lib/metanorma/standoc/cleanup_block.rb +173 -0
- data/lib/metanorma/standoc/cleanup_boilerplate.rb +213 -0
- data/lib/metanorma/standoc/cleanup_footnotes.rb +109 -0
- data/lib/metanorma/standoc/cleanup_image.rb +70 -0
- data/lib/metanorma/standoc/cleanup_inline.rb +190 -0
- data/lib/metanorma/standoc/cleanup_maths.rb +222 -0
- data/lib/metanorma/standoc/cleanup_ref.rb +170 -0
- data/lib/metanorma/standoc/cleanup_ref_dl.rb +104 -0
- data/lib/metanorma/standoc/cleanup_reqt.rb +111 -0
- data/lib/metanorma/standoc/cleanup_section.rb +212 -0
- data/lib/metanorma/standoc/cleanup_section_names.rb +92 -0
- data/lib/metanorma/standoc/cleanup_symbols.rb +48 -0
- data/lib/metanorma/standoc/cleanup_table.rb +68 -0
- data/lib/metanorma/standoc/cleanup_terms.rb +140 -0
- data/lib/metanorma/standoc/cleanup_terms_designations.rb +199 -0
- data/lib/metanorma/standoc/cleanup_text.rb +74 -0
- data/lib/metanorma/standoc/cleanup_toc.rb +98 -0
- data/lib/metanorma/standoc/cleanup_xref.rb +107 -0
- data/lib/metanorma/standoc/converter.rb +126 -0
- data/lib/metanorma/standoc/datamodel/attributes_table_preprocessor.rb +57 -0
- data/lib/metanorma/standoc/datamodel/diagram_preprocessor.rb +103 -0
- data/lib/metanorma/standoc/datamodel/plantuml_renderer.rb +409 -0
- data/lib/metanorma/standoc/front.rb +224 -0
- data/lib/metanorma/standoc/front_contributor.rb +192 -0
- data/lib/metanorma/standoc/inline.rb +232 -0
- data/lib/{asciidoctor → metanorma}/standoc/isodoc.rng +104 -3
- data/lib/metanorma/standoc/lists.rb +120 -0
- data/lib/metanorma/standoc/macros.rb +205 -0
- data/lib/metanorma/standoc/macros_embed.rb +72 -0
- data/lib/metanorma/standoc/macros_form.rb +63 -0
- data/lib/metanorma/standoc/macros_note.rb +45 -0
- data/lib/metanorma/standoc/macros_plantuml.rb +113 -0
- data/lib/metanorma/standoc/macros_terms.rb +194 -0
- data/lib/metanorma/standoc/ref.rb +243 -0
- data/lib/metanorma/standoc/ref_sect.rb +153 -0
- data/lib/{asciidoctor/standoc/ref_date_id.rb → metanorma/standoc/ref_utility.rb} +43 -5
- data/lib/metanorma/standoc/render.rb +115 -0
- data/lib/metanorma/standoc/reqt.rb +90 -0
- data/lib/{asciidoctor → metanorma}/standoc/reqt.rng +0 -0
- data/lib/metanorma/standoc/section.rb +209 -0
- data/lib/metanorma/standoc/table.rb +85 -0
- data/lib/metanorma/standoc/term_lookup_cleanup.rb +179 -0
- data/lib/metanorma/standoc/terms.rb +160 -0
- data/lib/metanorma/standoc/utils.rb +101 -0
- data/lib/metanorma/standoc/validate.rb +158 -0
- data/lib/metanorma/standoc/validate_section.rb +55 -0
- data/lib/metanorma/standoc/version.rb +1 -1
- data/lib/{asciidoctor → metanorma}/standoc/views/datamodel/model_representation.adoc.erb +0 -0
- data/lib/{asciidoctor → metanorma}/standoc/views/datamodel/plantuml_representation.adoc.erb +0 -0
- data/lib/metanorma-standoc.rb +1 -1
- data/metanorma-standoc.gemspec +1 -1
- data/spec/assets/a1.adoc +8 -0
- data/spec/assets/a2.adoc +8 -0
- data/spec/assets/a3.adoc +9 -0
- data/spec/assets/a4.adoc +4 -0
- data/spec/{asciidoctor → metanorma}/base_spec.rb +499 -407
- data/spec/{asciidoctor → metanorma}/blank_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/blocks_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/cleanup_blocks_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/cleanup_sections_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/cleanup_spec.rb +5 -5
- data/spec/{asciidoctor → metanorma}/cleanup_terms_spec.rb +227 -119
- data/spec/{asciidoctor → metanorma}/datamodel/attributes_table_preprocessor_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/datamodel/diagram_preprocessor_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/inline_spec.rb +170 -1
- data/spec/{asciidoctor → metanorma}/isobib_cache_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/lists_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/macros_json2text_spec.rb +0 -0
- data/spec/{asciidoctor → metanorma}/macros_plantuml_spec.rb +3 -3
- data/spec/{asciidoctor → metanorma}/macros_spec.rb +97 -6
- data/spec/{asciidoctor → metanorma}/macros_yaml2text_spec.rb +0 -0
- data/spec/metanorma/refs_dl_spec.rb +863 -0
- data/spec/{asciidoctor → metanorma}/refs_spec.rb +522 -15
- data/spec/{asciidoctor → metanorma}/section_spec.rb +59 -1
- data/spec/{asciidoctor → metanorma}/table_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/validate_spec.rb +2 -2
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +46 -46
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +12 -12
- data/spec/vcr_cassettes/hide_refs.yml +599 -0
- data/spec/vcr_cassettes/isobib_get_123.yml +12 -12
- data/spec/vcr_cassettes/isobib_get_123_1.yml +24 -24
- data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +35 -35
- data/spec/vcr_cassettes/isobib_get_123_2001.yml +13 -13
- data/spec/vcr_cassettes/isobib_get_124.yml +10 -10
- data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +18 -18
- data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +46 -46
- metadata +88 -32
- data/spec/asciidoctor/refs_dl_spec.rb +0 -864
@@ -0,0 +1,48 @@
|
|
1
|
+
module Metanorma
|
2
|
+
module Standoc
|
3
|
+
module Cleanup
|
4
|
+
# Indices sort after letter but before any following
|
5
|
+
# letter (x, x_m, x_1, xa); we use colon to force that sort order.
|
6
|
+
# Numbers sort *after* letters; we use thorn to force that sort order.
|
7
|
+
def symbol_key(sym)
|
8
|
+
key = sym.dup
|
9
|
+
key.traverse do |n|
|
10
|
+
n.name == "math" and
|
11
|
+
n.replace(grkletters(MathML2AsciiMath.m2a(n.to_xml)))
|
12
|
+
end
|
13
|
+
ret = Nokogiri::XML(key.to_xml)
|
14
|
+
HTMLEntities.new.decode(ret.text.downcase)
|
15
|
+
.gsub(/[\[\]{}<>()]/, "").gsub(/\s/m, "")
|
16
|
+
.gsub(/[[:punct:]]|[_^]/, ":\\0").gsub(/`/, "")
|
17
|
+
.gsub(/[0-9]+/, "þ\\0")
|
18
|
+
end
|
19
|
+
|
20
|
+
def grkletters(text)
|
21
|
+
text.gsub(/\b(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|
|
22
|
+
lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|
|
23
|
+
psi|omega)\b/xi, "&\\1;")
|
24
|
+
end
|
25
|
+
|
26
|
+
def extract_symbols_list(dlist)
|
27
|
+
dl_out = []
|
28
|
+
dlist.xpath("./dt | ./dd").each do |dtd|
|
29
|
+
if dtd.name == "dt"
|
30
|
+
dl_out << { dt: dtd.remove, key: symbol_key(dtd) }
|
31
|
+
else
|
32
|
+
dl_out.last[:dd] = dtd.remove
|
33
|
+
end
|
34
|
+
end
|
35
|
+
dl_out
|
36
|
+
end
|
37
|
+
|
38
|
+
def symbols_cleanup(docxml)
|
39
|
+
docxml.xpath("//definitions/dl").each do |dl|
|
40
|
+
dl_out = extract_symbols_list(dl)
|
41
|
+
dl_out.sort! { |a, b| a[:key] <=> b[:key] || a[:dt] <=> b[:dt] }
|
42
|
+
dl.children = dl_out.map { |d| d[:dt].to_s + d[:dd].to_s }.join("\n")
|
43
|
+
end
|
44
|
+
docxml
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Metanorma
|
2
|
+
module Standoc
|
3
|
+
module Cleanup
|
4
|
+
def dl1_table_cleanup(xmldoc)
|
5
|
+
q = "//table/following-sibling::*[1][self::dl]"
|
6
|
+
xmldoc.xpath(q).each do |s|
|
7
|
+
s["key"] == "true" and s.previous_element << s.remove
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
# move Key dl after table footer
|
12
|
+
def dl2_table_cleanup(xmldoc)
|
13
|
+
q = "//table/following-sibling::*[1][self::p]"
|
14
|
+
xmldoc.xpath(q).each do |s|
|
15
|
+
if s.text =~ /^\s*key[^a-z]*$/i && s&.next_element&.name == "dl"
|
16
|
+
s.next_element["key"] = "true"
|
17
|
+
s.previous_element << s.next_element.remove
|
18
|
+
s.remove
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def insert_thead(table)
|
24
|
+
thead = table.at("./thead")
|
25
|
+
return thead unless thead.nil?
|
26
|
+
|
27
|
+
if tname = table.at("./name")
|
28
|
+
thead = tname.add_next_sibling("<thead/>").first
|
29
|
+
return thead
|
30
|
+
end
|
31
|
+
table.children.first.add_previous_sibling("<thead/>").first
|
32
|
+
end
|
33
|
+
|
34
|
+
def header_rows_cleanup(xmldoc)
|
35
|
+
xmldoc.xpath("//table[@headerrows]").each do |s|
|
36
|
+
thead = insert_thead(s)
|
37
|
+
(thead.xpath("./tr").size...s["headerrows"].to_i).each do
|
38
|
+
row = s.at("./tbody/tr")
|
39
|
+
row.parent = thead
|
40
|
+
end
|
41
|
+
thead.xpath(".//td").each { |n| n.name = "th" }
|
42
|
+
s.delete("headerrows")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def table_cleanup(xmldoc)
|
47
|
+
dl1_table_cleanup(xmldoc)
|
48
|
+
dl2_table_cleanup(xmldoc)
|
49
|
+
notes_table_cleanup(xmldoc)
|
50
|
+
header_rows_cleanup(xmldoc)
|
51
|
+
end
|
52
|
+
|
53
|
+
# move notes into table
|
54
|
+
def notes_table_cleanup(xmldoc)
|
55
|
+
nomatches = false
|
56
|
+
until nomatches
|
57
|
+
nomatches = true
|
58
|
+
xmldoc.xpath("//table/following-sibling::*[1]"\
|
59
|
+
"[self::note[not(@keep-separate = 'true')]]").each do |n|
|
60
|
+
n.delete("keep-separate")
|
61
|
+
n.previous_element << n.remove
|
62
|
+
nomatches = false
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,140 @@
|
|
1
|
+
require_relative "term_lookup_cleanup"
|
2
|
+
require_relative "cleanup_terms_designations"
|
3
|
+
|
4
|
+
module Metanorma
|
5
|
+
module Standoc
|
6
|
+
module Cleanup
|
7
|
+
def termdomain_cleanup(xmldoc)
|
8
|
+
xmldoc.xpath("//p/domain").each do |a|
|
9
|
+
parent = a.parent
|
10
|
+
prev = parent.previous
|
11
|
+
prev.next = a.remove
|
12
|
+
parent.text.strip.empty? and parent.remove
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def termdomain1_cleanup(xmldoc)
|
17
|
+
xmldoc.xpath("//term").each do |t|
|
18
|
+
d = t.xpath("./domain | ./subject").last or next
|
19
|
+
defn = d.at("../definition") and defn.previous = d.remove
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def termdefinition_cleanup(xmldoc)
|
24
|
+
generate_termdefinitions(xmldoc)
|
25
|
+
split_termdefinitions(xmldoc)
|
26
|
+
alternate_termdefinitions(xmldoc)
|
27
|
+
end
|
28
|
+
|
29
|
+
TERMDEF_BLOCKS =
|
30
|
+
"./p | ./ol | ./dl[not(@metadata = 'true')] | ./ul | ./figure | "\
|
31
|
+
"./formula | ./table".freeze
|
32
|
+
|
33
|
+
def generate_termdefinitions(xmldoc)
|
34
|
+
xmldoc.xpath("//term[not(definition)]").each do |d|
|
35
|
+
first_child = d.at(TERMDEF_BLOCKS) || next
|
36
|
+
t = Nokogiri::XML::Element.new("definition", xmldoc)
|
37
|
+
first_child.replace(t)
|
38
|
+
t << first_child.remove
|
39
|
+
d.xpath(TERMDEF_BLOCKS).each do |n|
|
40
|
+
t << n.remove
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def split_termdefinitions(xmldoc)
|
46
|
+
xmldoc.xpath("//definition").each do |d|
|
47
|
+
if d.at("./p | ./ol | ./dl | ./ul")
|
48
|
+
d.children = "<verbal-definition>#{d.children}</verbal-definition>"
|
49
|
+
else
|
50
|
+
d.children = "<non-verbal-representation>"\
|
51
|
+
"#{d.children}</non-verbal-representation>"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def alternate_termdefinitions(xmldoc)
|
57
|
+
xmldoc.xpath("//term").each do |t|
|
58
|
+
t.xpath("./definition").each do |d|
|
59
|
+
d1 = d.next_element or next
|
60
|
+
if (v = d.at("./verbal-definition")) &&
|
61
|
+
!d.at("./non-verbal-representation") &&
|
62
|
+
!d1.at("./verbal-definition") &&
|
63
|
+
nv = d1.at("./non-verbal-representation")
|
64
|
+
v.next = nv.remove
|
65
|
+
d1.remove
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def termdocsource_cleanup(xmldoc)
|
72
|
+
f = xmldoc.at("//preface | //sections")
|
73
|
+
xmldoc.xpath("//termdocsource").each { |s| f.previous = s.remove }
|
74
|
+
end
|
75
|
+
|
76
|
+
def term_children_cleanup(xmldoc)
|
77
|
+
xmldoc.xpath("//terms[terms]").each { |t| t.name = "clause" }
|
78
|
+
xmldoc.xpath("//term").each do |t|
|
79
|
+
%w(termnote termexample termsource term).each do |w|
|
80
|
+
t.xpath("./#{w}").each { |n| t << n.remove }
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def termdef_from_termbase(xmldoc)
|
86
|
+
xmldoc.xpath("//term").each do |x|
|
87
|
+
if (c = x.at("./origin/termref")) && !x.at("./definition")
|
88
|
+
x.at("./origin").previous = fetch_termbase(c["base"], c.text)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def termnote_example_cleanup(xmldoc)
|
94
|
+
%w(note example).each do |w|
|
95
|
+
xmldoc.xpath("//term#{w}[not(ancestor::term)]").each do |x|
|
96
|
+
x.name = w
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def termdef_cleanup(xmldoc)
|
102
|
+
termdef_unnest_cleanup(xmldoc)
|
103
|
+
Metanorma::Standoc::TermLookupCleanup.new(xmldoc, @log).call
|
104
|
+
term_nonverbal_designations(xmldoc)
|
105
|
+
term_dl_to_metadata(xmldoc)
|
106
|
+
term_termsource_to_designation(xmldoc)
|
107
|
+
term_designation_reorder(xmldoc)
|
108
|
+
termdef_from_termbase(xmldoc)
|
109
|
+
termdomain_cleanup(xmldoc)
|
110
|
+
termdef_stem_cleanup(xmldoc)
|
111
|
+
termdefinition_cleanup(xmldoc)
|
112
|
+
termdomain1_cleanup(xmldoc)
|
113
|
+
termnote_example_cleanup(xmldoc)
|
114
|
+
term_children_cleanup(xmldoc)
|
115
|
+
termdocsource_cleanup(xmldoc)
|
116
|
+
end
|
117
|
+
|
118
|
+
def index_cleanup(xmldoc)
|
119
|
+
return unless @index_terms
|
120
|
+
|
121
|
+
xmldoc.xpath("//preferred").each do |p|
|
122
|
+
index_cleanup1(p.at("./expression/name | ./letter-symbol/name"),
|
123
|
+
p.xpath("./field-of-application | ./usage-info")
|
124
|
+
&.map(&:text)&.join(", "))
|
125
|
+
end
|
126
|
+
xmldoc.xpath("//definitions/dl/dt").each do |p|
|
127
|
+
index_cleanup1(p, "")
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def index_cleanup1(term, fieldofappl)
|
132
|
+
return unless term
|
133
|
+
|
134
|
+
idx = term.children.dup
|
135
|
+
fieldofappl.empty? or idx << ", <#{fieldofappl}>"
|
136
|
+
term << "<index><primary>#{idx.to_xml}</primary></index>"
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
module Metanorma
|
2
|
+
module Standoc
|
3
|
+
module Cleanup
|
4
|
+
def termdef_stem_cleanup(xmldoc)
|
5
|
+
termdef_stem2admitted(xmldoc)
|
6
|
+
xmldoc.xpath("//term//expression/name[stem]").each do |n|
|
7
|
+
test = n.dup
|
8
|
+
test.at("./stem").remove
|
9
|
+
next unless test.text.strip.empty?
|
10
|
+
|
11
|
+
n.parent.name = "letter-symbol"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def termdef_stem2admitted(xmldoc)
|
16
|
+
xmldoc.xpath("//term/p/stem").each do |a|
|
17
|
+
if initial_formula(a.parent)
|
18
|
+
parent = a.parent
|
19
|
+
parent.replace("<admitted>#{term_expr(a.to_xml)}</admitted>")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
xmldoc.xpath("//term/formula").each do |a|
|
23
|
+
initial_formula(a) and
|
24
|
+
a.replace("<admitted>#{term_expr(a.children.to_xml)}</admitted>")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def initial_formula(elem)
|
29
|
+
elem.elements.size == 1 && # para contains just stem expression
|
30
|
+
!elem.at("./preceding-sibling::p | ./preceding-sibling::dl | "\
|
31
|
+
"./preceding-sibling::ol | ./preceding-sibling::ul")
|
32
|
+
end
|
33
|
+
|
34
|
+
# release termdef tags from surrounding paras
|
35
|
+
def termdef_unnest_cleanup(xmldoc)
|
36
|
+
desgn = "//p/admitted | //p/deprecates | //p/preferred | //p//related"
|
37
|
+
nodes = xmldoc.xpath(desgn)
|
38
|
+
while !nodes.empty?
|
39
|
+
nodes[0].parent.replace(nodes[0].parent.children)
|
40
|
+
nodes = xmldoc.xpath(desgn)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def term_dl_to_metadata(xmldoc)
|
45
|
+
xmldoc.xpath("//term[dl[@metadata = 'true']]").each do |t|
|
46
|
+
t.xpath("./dl[@metadata = 'true']").each do |dl|
|
47
|
+
prev = related2pref(dl_to_designation(dl)) or next
|
48
|
+
term_dl_to_designation_metadata(prev, dl)
|
49
|
+
term_dl_to_term_metadata(prev, dl)
|
50
|
+
term_dl_to_expression_metadata(prev, dl)
|
51
|
+
dl.remove
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def term_dl_to_term_metadata(prev, dlist)
|
57
|
+
return unless prev.name == "preferred" &&
|
58
|
+
prev.at("./preceding-sibling::preferred").nil?
|
59
|
+
|
60
|
+
ins = term_element_insert_point(prev)
|
61
|
+
%w(domain subject).each do |a|
|
62
|
+
ins = dl_to_elems(ins, prev.parent, dlist, a)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def term_dl_to_designation_metadata(prev, dlist)
|
67
|
+
%w(absent geographic-area).each do |a|
|
68
|
+
dl_to_attrs(prev, dlist, a)
|
69
|
+
end
|
70
|
+
%w(field-of-application usage-info).reverse.each do |a|
|
71
|
+
dl_to_elems(prev.at("./expression"), prev, dlist, a)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def term_element_insert_point(prev)
|
76
|
+
ins = prev
|
77
|
+
while %w(preferred admitted deprecates related domain dl)
|
78
|
+
.include? ins&.next_element&.name
|
79
|
+
ins = ins.next_element
|
80
|
+
end
|
81
|
+
ins
|
82
|
+
end
|
83
|
+
|
84
|
+
def term_dl_to_expression_metadata(prev, dlist)
|
85
|
+
term_dl_to_expression_root_metadata(prev, dlist)
|
86
|
+
term_dl_to_expression_name_metadata(prev, dlist)
|
87
|
+
term_to_letter_symbol(prev, dlist)
|
88
|
+
end
|
89
|
+
|
90
|
+
def term_dl_to_expression_root_metadata(prev, dlist)
|
91
|
+
%w(isInternational).each do |a|
|
92
|
+
p = prev.at("./expression | ./letter-symbol | ./graphical-symbol")
|
93
|
+
dl_to_attrs(p, dlist, a)
|
94
|
+
end
|
95
|
+
%w(language script type).each do |a|
|
96
|
+
p = prev.at("./expression") or next
|
97
|
+
dl_to_attrs(p, dlist, a)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def term_dl_to_expression_name_metadata(prev, dlist)
|
102
|
+
%w(abbreviation-type pronunciation).reverse.each do |a|
|
103
|
+
dl_to_elems(prev.at("./expression/name"), prev, dlist, a)
|
104
|
+
end
|
105
|
+
g = dlist.at("./dt[text()='grammar']/following::dd//dl") and
|
106
|
+
term_dl_to_expression_grammar(prev, g)
|
107
|
+
end
|
108
|
+
|
109
|
+
def term_dl_to_expression_grammar(prev, dlist)
|
110
|
+
prev.at(".//expression") or return
|
111
|
+
prev.at(".//expression") << "<grammar><sentinel/></grammar>"
|
112
|
+
%w(gender number isPreposition isParticiple isAdjective isAdverb isNoun
|
113
|
+
grammar-value).reverse.each do |a|
|
114
|
+
dl_to_elems(prev.at(".//expression/grammar/*"), prev.elements.last,
|
115
|
+
dlist, a)
|
116
|
+
end
|
117
|
+
term_dl_to_designation_category(prev, "gender")
|
118
|
+
term_dl_to_designation_category(prev, "number")
|
119
|
+
prev.at(".//expression/grammar/sentinel").remove
|
120
|
+
end
|
121
|
+
|
122
|
+
def term_dl_to_designation_category(prev, category)
|
123
|
+
cat = prev.at(".//expression/grammar/#{category}")
|
124
|
+
/,/.match?(cat&.text) and
|
125
|
+
cat.replace(cat.text.split(/,\s*/)
|
126
|
+
.map { |x| "<#{category}>#{x}</#{category}>" }.join)
|
127
|
+
end
|
128
|
+
|
129
|
+
def term_to_letter_symbol(prev, dlist)
|
130
|
+
ls = dlist.at("./dt[text()='letter-symbol']/following::dd/p")
|
131
|
+
return unless ls&.text == "true"
|
132
|
+
|
133
|
+
prev.at(".//expression").name = "letter-symbol"
|
134
|
+
end
|
135
|
+
|
136
|
+
def dl_to_designation(dlist)
|
137
|
+
prev = dlist.previous_element
|
138
|
+
unless %w(preferred admitted deprecates related).include? prev&.name
|
139
|
+
@log.add("AsciiDoc Input", dlist, "Metadata definition list does "\
|
140
|
+
"not follow a term designation")
|
141
|
+
return nil
|
142
|
+
end
|
143
|
+
prev
|
144
|
+
end
|
145
|
+
|
146
|
+
def term_nonverbal_designations(xmldoc)
|
147
|
+
xmldoc.xpath("//term/preferred | //term/admitted | //term/deprecates")
|
148
|
+
.each do |d|
|
149
|
+
d.text.strip.empty? or next
|
150
|
+
n = d.next_element
|
151
|
+
if %w(formula figure).include?(n&.name)
|
152
|
+
term_nonverbal_designations1(d, n)
|
153
|
+
else d.at("./expression/name") or
|
154
|
+
d.children = term_expr("")
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def term_nonverbal_designations1(desgn, elem)
|
160
|
+
desgn = related2pref(desgn)
|
161
|
+
if elem.name == "figure"
|
162
|
+
elem.at("./name").remove
|
163
|
+
desgn.children =
|
164
|
+
"<graphical-symbol>#{elem.remove.to_xml}</graphical-symbol>"
|
165
|
+
else
|
166
|
+
desgn.children = term_expr(elem.at("./stem").to_xml)
|
167
|
+
elem.remove
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def term_termsource_to_designation(xmldoc)
|
172
|
+
xmldoc.xpath("//term/termsource").each do |t|
|
173
|
+
p = t.previous_element
|
174
|
+
while %w(domain subject).include? p&.name
|
175
|
+
p = p.previous_element
|
176
|
+
end
|
177
|
+
%w(preferred admitted deprecates related).include?(p&.name) or
|
178
|
+
next
|
179
|
+
related2pref(p) << t.remove
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def term_designation_reorder(xmldoc)
|
184
|
+
xmldoc.xpath("//term").each do |t|
|
185
|
+
%w(preferred admitted deprecates related)
|
186
|
+
.each_with_object([]) do |tag, m|
|
187
|
+
t.xpath("./#{tag}").each { |x| m << x.remove }
|
188
|
+
end.reverse.each do |x|
|
189
|
+
t.children.first.previous = x
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
def related2pref(elem)
|
195
|
+
elem&.name == "related" ? elem = elem.at("./preferred") : elem
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module Metanorma
|
2
|
+
module Standoc
|
3
|
+
module Cleanup
|
4
|
+
def textcleanup(result)
|
5
|
+
text = result.flatten.map { |l| l.sub(/\s*$/, "") } * "\n"
|
6
|
+
!@keepasciimath and text = asciimath2mathml(text)
|
7
|
+
text = text.gsub(/\s+<fn /, "<fn ")
|
8
|
+
text.gsub(%r{<passthrough\s+formats="metanorma">([^<]*)
|
9
|
+
</passthrough>}mx) { HTMLEntities.new.decode($1) }
|
10
|
+
end
|
11
|
+
|
12
|
+
IGNORE_DUMBQUOTES =
|
13
|
+
"//pre | //pre//* | //tt | //tt//* | "\
|
14
|
+
"//sourcecode | //sourcecode//* | //bibdata//* | //stem | "\
|
15
|
+
"//stem//* | //figure[@class = 'pseudocode'] | "\
|
16
|
+
"//figure[@class = 'pseudocode']//*".freeze
|
17
|
+
|
18
|
+
def smartquotes_cleanup(xmldoc)
|
19
|
+
xmldoc.xpath("//date").each { |d| Metanorma::Utils::endash_date(d) }
|
20
|
+
if @smartquotes then smartquotes_cleanup1(xmldoc)
|
21
|
+
else dumbquote_cleanup(xmldoc)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def smartquotes_cleanup1(xmldoc)
|
26
|
+
uninterrupt_quotes_around_xml(xmldoc)
|
27
|
+
dumb2smart_quotes(xmldoc)
|
28
|
+
end
|
29
|
+
|
30
|
+
# "abc<tag/>", def => "abc",<tag/> def
|
31
|
+
def uninterrupt_quotes_around_xml(xmldoc)
|
32
|
+
xmldoc.traverse do |n|
|
33
|
+
next unless n.text? && n&.previous&.element?
|
34
|
+
next unless /^['"]/.match?(n.text)
|
35
|
+
next unless n.previous.ancestors("pre, tt, sourcecode, stem, figure")
|
36
|
+
.empty?
|
37
|
+
|
38
|
+
uninterrupt_quotes_around_xml1(n.previous)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def uninterrupt_quotes_around_xml1(elem)
|
43
|
+
prev = elem.at(".//preceding::text()[1]") or return
|
44
|
+
/\S$/.match?(prev.text) or return
|
45
|
+
foll = elem.at(".//following::text()[1]")
|
46
|
+
m = /^(["'][[:punct:]]*)(\s|$)/
|
47
|
+
.match(HTMLEntities.new.decode(foll&.text)) or return
|
48
|
+
foll.content = foll.text.sub(/^(["'][[:punct:]]*)/, "")
|
49
|
+
prev.content = "#{prev.text}#{m[1]}"
|
50
|
+
end
|
51
|
+
|
52
|
+
def dumb2smart_quotes(xmldoc)
|
53
|
+
(xmldoc.xpath("//*[child::text()]") - xmldoc.xpath(IGNORE_DUMBQUOTES))
|
54
|
+
.each do |x|
|
55
|
+
x.children.each do |n|
|
56
|
+
next unless n.text?
|
57
|
+
|
58
|
+
/[-'"(<>]|\.\.|\dx/.match(n) or next
|
59
|
+
|
60
|
+
n.replace(Metanorma::Utils::smartformat(n.text))
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def dumbquote_cleanup(xmldoc)
|
66
|
+
xmldoc.traverse do |n|
|
67
|
+
next unless n.text?
|
68
|
+
|
69
|
+
n.replace(n.text.gsub(/(?<=\p{Alnum})\u2019(?=\p{Alpha})/, "'")) # .
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
module Metanorma
|
2
|
+
module Standoc
|
3
|
+
module Cleanup
|
4
|
+
def toc_cleanup(xmldoc)
|
5
|
+
toc_cleanup_para(xmldoc)
|
6
|
+
xmldoc.xpath("//toc").each { |t| toc_cleanup1(t, xmldoc) }
|
7
|
+
toc_cleanup_clause(xmldoc)
|
8
|
+
toc_metadata(xmldoc)
|
9
|
+
end
|
10
|
+
|
11
|
+
def toc_cleanup_para(xmldoc)
|
12
|
+
xmldoc.xpath("//p[toc]").each do |x|
|
13
|
+
x.xpath("./toc").reverse.each do |t|
|
14
|
+
x.next = t
|
15
|
+
end
|
16
|
+
x.remove if x.text.strip.empty?
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def toc_index(toc, xmldoc)
|
21
|
+
depths = toc_index_depths(toc)
|
22
|
+
depths.keys.each_with_object([]) do |key, arr|
|
23
|
+
xmldoc.xpath(key).each do |x|
|
24
|
+
arr << toc_index1(key, x, depths)
|
25
|
+
end
|
26
|
+
end.sort_by { |a| a[:line] }
|
27
|
+
end
|
28
|
+
|
29
|
+
def toc_index1(key, entry, depths)
|
30
|
+
t = entry.at("./following-sibling::variant-title[@type = 'toc']") and
|
31
|
+
entry = t
|
32
|
+
{ text: entry.children.to_xml, depth: depths[key].to_i,
|
33
|
+
target: entry.xpath("(./ancestor-or-self::*/@id)[last()]")[0].text,
|
34
|
+
line: entry.line }
|
35
|
+
end
|
36
|
+
|
37
|
+
def toc_index_depths(toc)
|
38
|
+
toc.xpath("./toc-xpath").each_with_object({}) do |x, m|
|
39
|
+
m[x.text] = x["depth"]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def toc_cleanup1(toc, xmldoc)
|
44
|
+
depth = 1
|
45
|
+
ret = ""
|
46
|
+
toc_index(toc, xmldoc).each do |x|
|
47
|
+
ret = toc_cleanup1_entry(x, depth, ret)
|
48
|
+
depth = x[:depth]
|
49
|
+
end
|
50
|
+
toc.children = "<ul>#{ret}</ul>"
|
51
|
+
end
|
52
|
+
|
53
|
+
def toc_cleanup1_entry(entry, depth, ret)
|
54
|
+
if depth > entry[:depth]
|
55
|
+
ret += "</ul></li>" * (depth - entry[:depth])
|
56
|
+
elsif depth < entry[:depth]
|
57
|
+
ret += "<li><ul>" * (entry[:depth] - depth)
|
58
|
+
end
|
59
|
+
ret + "<li><xref target='#{entry[:target]}'>#{entry[:text]}</xref></li>"
|
60
|
+
end
|
61
|
+
|
62
|
+
def toc_cleanup_clause(xmldoc)
|
63
|
+
xmldoc
|
64
|
+
.xpath("//clause[@type = 'toc'] | //annex[@type = 'toc']").each do |c|
|
65
|
+
c.xpath(".//ul[not(ancestor::ul)]").each do |ul|
|
66
|
+
toc_cleanup_clause_entry(xmldoc, ul)
|
67
|
+
ul.replace("<toc>#{ul.to_xml}</toc>")
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def toc_cleanup_clause_entry(xmldoc, list)
|
73
|
+
list.xpath(".//xref[not(text())]").each do |x|
|
74
|
+
c1 = xmldoc.at("//*[@id = '#{x['target']}']")
|
75
|
+
t = c1.at("./variant-title[@type = 'toc']") || c1.at("./title")
|
76
|
+
x << t.dup.children
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def toc_metadata(xmldoc)
|
81
|
+
return unless @htmltoclevels || @doctoclevels || @toclevels
|
82
|
+
|
83
|
+
ins = xmldoc.at("//misc-container") ||
|
84
|
+
xmldoc.at("//bibdata").after("<misc-container/>").next_element
|
85
|
+
toc_metadata1(ins)
|
86
|
+
end
|
87
|
+
|
88
|
+
def toc_metadata1(ins)
|
89
|
+
[[@toclevels, "TOC Heading Levels"],
|
90
|
+
[@toclevels, "TOC Heading Levels"],
|
91
|
+
[@toclevels, "TOC Heading Levels"]].each do |n|
|
92
|
+
n[0] and ins << "<presentation-metadata><name>#{n[1]}</name>"\
|
93
|
+
"<value>#{n[0]}</value></presentation-metadata>"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|