metanorma-standoc 1.11.3 → 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +3 -31
- data/.gitignore +23 -0
- data/Gemfile +0 -1
- data/lib/asciidoctor/standoc/base.rb +2 -145
- data/lib/asciidoctor/standoc/blocks.rb +2 -238
- data/lib/asciidoctor/standoc/blocks_notes.rb +2 -100
- data/lib/asciidoctor/standoc/cleanup.rb +2 -208
- data/lib/asciidoctor/standoc/cleanup_amend.rb +2 -53
- data/lib/asciidoctor/standoc/cleanup_block.rb +2 -172
- data/lib/asciidoctor/standoc/cleanup_boilerplate.rb +2 -212
- data/lib/asciidoctor/standoc/cleanup_footnotes.rb +2 -108
- data/lib/asciidoctor/standoc/cleanup_image.rb +2 -69
- data/lib/asciidoctor/standoc/cleanup_inline.rb +2 -189
- data/lib/asciidoctor/standoc/cleanup_maths.rb +2 -221
- data/lib/asciidoctor/standoc/cleanup_ref.rb +2 -169
- data/lib/asciidoctor/standoc/cleanup_ref_dl.rb +2 -103
- data/lib/asciidoctor/standoc/cleanup_reqt.rb +2 -110
- data/lib/asciidoctor/standoc/cleanup_section.rb +2 -184
- data/lib/asciidoctor/standoc/cleanup_section_names.rb +2 -91
- data/lib/asciidoctor/standoc/cleanup_symbols.rb +2 -47
- data/lib/asciidoctor/standoc/cleanup_table.rb +2 -67
- data/lib/asciidoctor/standoc/cleanup_terms.rb +2 -139
- data/lib/asciidoctor/standoc/cleanup_terms_designations.rb +2 -192
- data/lib/asciidoctor/standoc/cleanup_text.rb +2 -95
- data/lib/asciidoctor/standoc/cleanup_toc.rb +3 -0
- data/lib/asciidoctor/standoc/cleanup_xref.rb +2 -106
- data/lib/asciidoctor/standoc/converter.rb +2 -123
- data/lib/asciidoctor/standoc/datamodel/attributes_table_preprocessor.rb +2 -56
- data/lib/asciidoctor/standoc/datamodel/diagram_preprocessor.rb +2 -102
- data/lib/asciidoctor/standoc/datamodel/plantuml_renderer.rb +3 -404
- data/lib/asciidoctor/standoc/deprecated.rb +5 -0
- data/lib/asciidoctor/standoc/front.rb +2 -219
- data/lib/asciidoctor/standoc/front_contributor.rb +2 -191
- data/lib/asciidoctor/standoc/inline.rb +2 -231
- data/lib/asciidoctor/standoc/lists.rb +2 -119
- data/lib/asciidoctor/standoc/macros.rb +2 -203
- data/lib/asciidoctor/standoc/macros_form.rb +2 -62
- data/lib/asciidoctor/standoc/macros_note.rb +2 -44
- data/lib/asciidoctor/standoc/macros_plantuml.rb +2 -112
- data/lib/asciidoctor/standoc/macros_terms.rb +2 -180
- data/lib/asciidoctor/standoc/ref.rb +2 -251
- data/lib/asciidoctor/standoc/ref_sect.rb +2 -153
- data/lib/asciidoctor/standoc/ref_utility.rb +2 -0
- data/lib/asciidoctor/standoc/render.rb +2 -116
- data/lib/asciidoctor/standoc/reqt.rb +2 -89
- data/lib/asciidoctor/standoc/section.rb +2 -194
- data/lib/asciidoctor/standoc/table.rb +2 -84
- data/lib/asciidoctor/standoc/term_lookup_cleanup.rb +2 -178
- data/lib/asciidoctor/standoc/terms.rb +2 -153
- data/lib/asciidoctor/standoc/utils.rb +2 -100
- data/lib/asciidoctor/standoc/validate.rb +2 -157
- data/lib/asciidoctor/standoc/validate_section.rb +2 -54
- data/lib/isodoc/html/htmlstyle.css +44 -29
- data/lib/isodoc/html/htmlstyle.scss +17 -12
- data/lib/metanorma/standoc/base.rb +163 -0
- data/lib/{asciidoctor → metanorma}/standoc/basicdoc.rng +0 -0
- data/lib/{asciidoctor → metanorma}/standoc/biblio.rng +2 -2
- data/lib/metanorma/standoc/blocks.rb +239 -0
- data/lib/metanorma/standoc/blocks_notes.rb +101 -0
- data/lib/metanorma/standoc/cleanup.rb +157 -0
- data/lib/metanorma/standoc/cleanup_amend.rb +54 -0
- data/lib/metanorma/standoc/cleanup_block.rb +173 -0
- data/lib/metanorma/standoc/cleanup_boilerplate.rb +213 -0
- data/lib/metanorma/standoc/cleanup_footnotes.rb +109 -0
- data/lib/metanorma/standoc/cleanup_image.rb +70 -0
- data/lib/metanorma/standoc/cleanup_inline.rb +190 -0
- data/lib/metanorma/standoc/cleanup_maths.rb +222 -0
- data/lib/metanorma/standoc/cleanup_ref.rb +170 -0
- data/lib/metanorma/standoc/cleanup_ref_dl.rb +104 -0
- data/lib/metanorma/standoc/cleanup_reqt.rb +111 -0
- data/lib/metanorma/standoc/cleanup_section.rb +212 -0
- data/lib/metanorma/standoc/cleanup_section_names.rb +92 -0
- data/lib/metanorma/standoc/cleanup_symbols.rb +48 -0
- data/lib/metanorma/standoc/cleanup_table.rb +68 -0
- data/lib/metanorma/standoc/cleanup_terms.rb +140 -0
- data/lib/metanorma/standoc/cleanup_terms_designations.rb +199 -0
- data/lib/metanorma/standoc/cleanup_text.rb +74 -0
- data/lib/metanorma/standoc/cleanup_toc.rb +98 -0
- data/lib/metanorma/standoc/cleanup_xref.rb +107 -0
- data/lib/metanorma/standoc/converter.rb +126 -0
- data/lib/metanorma/standoc/datamodel/attributes_table_preprocessor.rb +57 -0
- data/lib/metanorma/standoc/datamodel/diagram_preprocessor.rb +103 -0
- data/lib/metanorma/standoc/datamodel/plantuml_renderer.rb +409 -0
- data/lib/metanorma/standoc/front.rb +224 -0
- data/lib/metanorma/standoc/front_contributor.rb +192 -0
- data/lib/metanorma/standoc/inline.rb +232 -0
- data/lib/{asciidoctor → metanorma}/standoc/isodoc.rng +104 -3
- data/lib/metanorma/standoc/lists.rb +120 -0
- data/lib/metanorma/standoc/macros.rb +205 -0
- data/lib/metanorma/standoc/macros_embed.rb +72 -0
- data/lib/metanorma/standoc/macros_form.rb +63 -0
- data/lib/metanorma/standoc/macros_note.rb +45 -0
- data/lib/metanorma/standoc/macros_plantuml.rb +113 -0
- data/lib/metanorma/standoc/macros_terms.rb +194 -0
- data/lib/metanorma/standoc/ref.rb +243 -0
- data/lib/metanorma/standoc/ref_sect.rb +153 -0
- data/lib/{asciidoctor/standoc/ref_date_id.rb → metanorma/standoc/ref_utility.rb} +43 -5
- data/lib/metanorma/standoc/render.rb +115 -0
- data/lib/metanorma/standoc/reqt.rb +90 -0
- data/lib/{asciidoctor → metanorma}/standoc/reqt.rng +0 -0
- data/lib/metanorma/standoc/section.rb +209 -0
- data/lib/metanorma/standoc/table.rb +85 -0
- data/lib/metanorma/standoc/term_lookup_cleanup.rb +179 -0
- data/lib/metanorma/standoc/terms.rb +160 -0
- data/lib/metanorma/standoc/utils.rb +101 -0
- data/lib/metanorma/standoc/validate.rb +158 -0
- data/lib/metanorma/standoc/validate_section.rb +55 -0
- data/lib/metanorma/standoc/version.rb +1 -1
- data/lib/{asciidoctor → metanorma}/standoc/views/datamodel/model_representation.adoc.erb +0 -0
- data/lib/{asciidoctor → metanorma}/standoc/views/datamodel/plantuml_representation.adoc.erb +0 -0
- data/lib/metanorma-standoc.rb +1 -1
- data/metanorma-standoc.gemspec +1 -1
- data/spec/assets/a1.adoc +8 -0
- data/spec/assets/a2.adoc +8 -0
- data/spec/assets/a3.adoc +9 -0
- data/spec/assets/a4.adoc +4 -0
- data/spec/{asciidoctor → metanorma}/base_spec.rb +499 -407
- data/spec/{asciidoctor → metanorma}/blank_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/blocks_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/cleanup_blocks_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/cleanup_sections_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/cleanup_spec.rb +5 -5
- data/spec/{asciidoctor → metanorma}/cleanup_terms_spec.rb +227 -119
- data/spec/{asciidoctor → metanorma}/datamodel/attributes_table_preprocessor_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/datamodel/diagram_preprocessor_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/inline_spec.rb +170 -1
- data/spec/{asciidoctor → metanorma}/isobib_cache_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/lists_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/macros_json2text_spec.rb +0 -0
- data/spec/{asciidoctor → metanorma}/macros_plantuml_spec.rb +3 -3
- data/spec/{asciidoctor → metanorma}/macros_spec.rb +97 -6
- data/spec/{asciidoctor → metanorma}/macros_yaml2text_spec.rb +0 -0
- data/spec/metanorma/refs_dl_spec.rb +863 -0
- data/spec/{asciidoctor → metanorma}/refs_spec.rb +522 -15
- data/spec/{asciidoctor → metanorma}/section_spec.rb +59 -1
- data/spec/{asciidoctor → metanorma}/table_spec.rb +1 -1
- data/spec/{asciidoctor → metanorma}/validate_spec.rb +2 -2
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +46 -46
- data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +12 -12
- data/spec/vcr_cassettes/hide_refs.yml +599 -0
- data/spec/vcr_cassettes/isobib_get_123.yml +12 -12
- data/spec/vcr_cassettes/isobib_get_123_1.yml +24 -24
- data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +35 -35
- data/spec/vcr_cassettes/isobib_get_123_2001.yml +13 -13
- data/spec/vcr_cassettes/isobib_get_124.yml +10 -10
- data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +18 -18
- data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +46 -46
- metadata +88 -32
- data/spec/asciidoctor/refs_dl_spec.rb +0 -864
@@ -0,0 +1,109 @@
|
|
1
|
+
require "date"
|
2
|
+
require "htmlentities"
|
3
|
+
require "json"
|
4
|
+
|
5
|
+
module Metanorma
|
6
|
+
module Standoc
|
7
|
+
module Cleanup
|
8
|
+
def footnote_content(fn)
|
9
|
+
c = fn.children.respond_to?(:to_xml) ? fn.children.to_xml : fn.children
|
10
|
+
c.gsub(/ id="[^"]+"/, "")
|
11
|
+
end
|
12
|
+
|
13
|
+
# include footnotes inside figure if they are the only content
|
14
|
+
# of the paras following
|
15
|
+
def figure_footnote_cleanup(xmldoc)
|
16
|
+
nomatches = false
|
17
|
+
until nomatches
|
18
|
+
q = "//figure/following-sibling::*[1][self::p and *[1][self::fn]]"
|
19
|
+
nomatches = true
|
20
|
+
xmldoc.xpath(q).each do |s|
|
21
|
+
next if s.children.map { |c| c.text? && /[[:alpha:]]/.match(c.text) }.any?
|
22
|
+
|
23
|
+
s.previous_element << s.first_element_child.remove
|
24
|
+
s.remove
|
25
|
+
nomatches = false
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def table_footnote_renumber1(fn, i, seen)
|
31
|
+
content = footnote_content(fn)
|
32
|
+
if seen[content] then outnum = seen[content]
|
33
|
+
else
|
34
|
+
i += 1
|
35
|
+
outnum = i
|
36
|
+
seen[content] = outnum
|
37
|
+
end
|
38
|
+
fn["reference"] = (outnum - 1 + "a".ord).chr
|
39
|
+
fn["table"] = true
|
40
|
+
[i, seen]
|
41
|
+
end
|
42
|
+
|
43
|
+
def table_footnote_renumber(xmldoc)
|
44
|
+
xmldoc.xpath("//table | //figure").each do |t|
|
45
|
+
seen = {}
|
46
|
+
i = 0
|
47
|
+
t.xpath(".//fn[not(ancestor::name)]").each do |fn|
|
48
|
+
i, seen = table_footnote_renumber1(fn, i, seen)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def other_footnote_renumber1(fn, i, seen)
|
54
|
+
unless fn["table"]
|
55
|
+
content = footnote_content(fn)
|
56
|
+
if seen[content] then outnum = seen[content]
|
57
|
+
else
|
58
|
+
i += 1
|
59
|
+
outnum = i
|
60
|
+
seen[content] = outnum
|
61
|
+
end
|
62
|
+
fn["reference"] = outnum.to_s
|
63
|
+
end
|
64
|
+
[i, seen]
|
65
|
+
end
|
66
|
+
|
67
|
+
def other_footnote_renumber(xmldoc)
|
68
|
+
seen = {}
|
69
|
+
i = 0
|
70
|
+
xmldoc.xpath("//fn").each do |fn|
|
71
|
+
i, seen = other_footnote_renumber1(fn, i, seen)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def title_footnote_move(xmldoc)
|
76
|
+
ins = xmldoc.at("//bibdata/language")
|
77
|
+
xmldoc.xpath("//bibdata/title//fn").each do |f|
|
78
|
+
f.name = "note"
|
79
|
+
f["type"] = "title-footnote"
|
80
|
+
f.delete("reference")
|
81
|
+
ins.previous = f.remove
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def footnote_block_cleanup(xmldoc)
|
86
|
+
xmldoc.xpath("//footnoteblock").each do |f|
|
87
|
+
f.name = 'fn'
|
88
|
+
if id = xmldoc.at("//*[@id = '#{f.text}']")
|
89
|
+
f.children = id.remove.children
|
90
|
+
else
|
91
|
+
@log.add("Crossreferences", f,
|
92
|
+
"Could not resolve footnoteblock:[#{f.text}]")
|
93
|
+
f.children = "[ERROR]"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def footnote_cleanup(xmldoc)
|
99
|
+
footnote_block_cleanup(xmldoc)
|
100
|
+
title_footnote_move(xmldoc)
|
101
|
+
table_footnote_renumber(xmldoc)
|
102
|
+
other_footnote_renumber(xmldoc)
|
103
|
+
xmldoc.xpath("//fn").each do |fn|
|
104
|
+
fn.delete("table")
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module Metanorma
|
2
|
+
module Standoc
|
3
|
+
module Cleanup
|
4
|
+
def svgmap_cleanup(xmldoc)
|
5
|
+
svgmap_moveattrs(xmldoc)
|
6
|
+
svgmap_populate(xmldoc)
|
7
|
+
Metanorma::Utils::svgmap_rewrite(xmldoc, @localdir)
|
8
|
+
end
|
9
|
+
|
10
|
+
def guid?(str)
|
11
|
+
/^_[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/i
|
12
|
+
.match(str)
|
13
|
+
end
|
14
|
+
|
15
|
+
def svgmap_moveattrs(xmldoc)
|
16
|
+
xmldoc.xpath("//svgmap").each do |s|
|
17
|
+
f = s.at(".//figure") or next
|
18
|
+
(t = s.at("./name")) && !f.at("./name") and
|
19
|
+
f.children.first.previous = t.remove
|
20
|
+
if s["id"] && guid?(f["id"])
|
21
|
+
f["id"] = s["id"]
|
22
|
+
s.delete("id")
|
23
|
+
end
|
24
|
+
svgmap_moveattrs1(s, f)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def svgmap_moveattrs1(svgmap, figure)
|
29
|
+
%w(unnumbered number subsequence keep-with-next
|
30
|
+
keep-lines-together tag multilingual-rendering).each do |a|
|
31
|
+
next if figure[a] || !svgmap[a]
|
32
|
+
|
33
|
+
figure[a] = svgmap[a]
|
34
|
+
svgmap.delete(a)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def svgmap_populate(xmldoc)
|
39
|
+
xmldoc.xpath("//svgmap").each do |s|
|
40
|
+
s1 = s.dup
|
41
|
+
s.children.remove
|
42
|
+
f = s1.at(".//figure") and s << f
|
43
|
+
s1.xpath(".//li").each do |li|
|
44
|
+
t = li&.at(".//eref | .//link | .//xref") or next
|
45
|
+
href = t.xpath("./following-sibling::node()")
|
46
|
+
href.empty? or
|
47
|
+
s << %[<target href="#{svgmap_target(href)}">#{t.to_xml}</target>]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def svgmap_target(nodeset)
|
53
|
+
nodeset.each do |n|
|
54
|
+
next unless n.name == "link"
|
55
|
+
|
56
|
+
n.children = n["target"]
|
57
|
+
end
|
58
|
+
nodeset.text.sub(/^[,; ]/, "").strip
|
59
|
+
end
|
60
|
+
|
61
|
+
def img_cleanup(xmldoc)
|
62
|
+
return xmldoc unless @datauriimage
|
63
|
+
|
64
|
+
xmldoc.xpath("//image").each do |i|
|
65
|
+
i["src"] = Metanorma::Utils::datauri(i["src"], @localdir)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
require "metanorma-utils"
|
2
|
+
require "digest"
|
3
|
+
|
4
|
+
module Metanorma
|
5
|
+
module Standoc
|
6
|
+
module Cleanup
|
7
|
+
def empty_text_before_first_element(elem)
|
8
|
+
elem.children.each do |c|
|
9
|
+
return false if c.text? && /\S/.match(c.text)
|
10
|
+
return true if c.element?
|
11
|
+
end
|
12
|
+
true
|
13
|
+
end
|
14
|
+
|
15
|
+
def strip_initial_space(elem)
|
16
|
+
return unless elem.children[0].text?
|
17
|
+
|
18
|
+
if /\S/.match?(elem.children[0].text)
|
19
|
+
elem.children[0].content = elem.children[0].text.gsub(/^ /, "")
|
20
|
+
else
|
21
|
+
elem.children[0].remove
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def bookmark_cleanup(xmldoc)
|
26
|
+
li_bookmark_cleanup(xmldoc)
|
27
|
+
dt_bookmark_cleanup(xmldoc)
|
28
|
+
end
|
29
|
+
|
30
|
+
def bookmark_to_id(elem, bookmark)
|
31
|
+
parent = bookmark.parent
|
32
|
+
elem["id"] = bookmark.remove["id"]
|
33
|
+
strip_initial_space(parent)
|
34
|
+
end
|
35
|
+
|
36
|
+
def li_bookmark_cleanup(xmldoc)
|
37
|
+
xmldoc.xpath("//li[descendant::bookmark]").each do |x|
|
38
|
+
if x.at("./*[1][local-name() = 'p']/"\
|
39
|
+
"*[1][local-name() = 'bookmark']") &&
|
40
|
+
empty_text_before_first_element(x.elements[0])
|
41
|
+
bookmark_to_id(x, x.elements[0].elements[0])
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def dt_bookmark_cleanup(xmldoc)
|
47
|
+
xmldoc.xpath("//dt[descendant::bookmark]").each do |x|
|
48
|
+
if x.at("./*[1][local-name() = 'p']/"\
|
49
|
+
"*[1][local-name() = 'bookmark']") &&
|
50
|
+
empty_text_before_first_element(x.elements[0])
|
51
|
+
bookmark_to_id(x, x.elements[0].elements[0])
|
52
|
+
elsif x.at("./*[1][local-name() = 'bookmark']") &&
|
53
|
+
empty_text_before_first_element(x)
|
54
|
+
bookmark_to_id(x, x.elements[0])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def concept_cleanup(xmldoc)
|
60
|
+
xmldoc.xpath("//concept[not(termxref)]").each do |x|
|
61
|
+
term = x.at("./refterm")
|
62
|
+
term&.remove if term&.text&.empty?
|
63
|
+
concept_cleanup1(x)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def concept_cleanup1(elem)
|
68
|
+
elem.children.remove if elem&.children&.text&.strip&.empty?
|
69
|
+
key_extract_locality(elem)
|
70
|
+
if /:/.match?(elem["key"]) then concept_termbase_cleanup(elem)
|
71
|
+
elsif refid? elem["key"] then concept_eref_cleanup(elem)
|
72
|
+
else concept_xref_cleanup(elem)
|
73
|
+
end
|
74
|
+
elem.delete("key")
|
75
|
+
end
|
76
|
+
|
77
|
+
def related_cleanup(xmldoc)
|
78
|
+
xmldoc.xpath("//related[not(termxref)]").each do |x|
|
79
|
+
term = x.at("./refterm")
|
80
|
+
term.replace("<preferred>#{term_expr(term.children.to_xml)}"\
|
81
|
+
"</preferred>")
|
82
|
+
concept_cleanup1(x)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def key_extract_locality(elem)
|
87
|
+
return unless /,/.match?(elem["key"])
|
88
|
+
|
89
|
+
elem.add_child("<locality>#{elem['key'].sub(/^[^,]+,/, '')}</locality>")
|
90
|
+
elem["key"] = elem["key"].sub(/,.*$/, "")
|
91
|
+
end
|
92
|
+
|
93
|
+
def concept_termbase_cleanup(elem)
|
94
|
+
t = elem&.at("./xrefrender")&.remove&.children
|
95
|
+
termbase, key = elem["key"].split(/:/, 2)
|
96
|
+
elem.add_child(%(<termref base="#{termbase}" target="#{key}">) +
|
97
|
+
"#{t&.to_xml}</termref>")
|
98
|
+
end
|
99
|
+
|
100
|
+
def concept_xref_cleanup(elem)
|
101
|
+
t = elem&.at("./xrefrender")&.remove&.children
|
102
|
+
elem.add_child(%(<xref target="#{elem['key']}">#{t&.to_xml}</xref>))
|
103
|
+
end
|
104
|
+
|
105
|
+
def concept_eref_cleanup(elem)
|
106
|
+
t = elem&.at("./xrefrender")&.remove&.children&.to_xml
|
107
|
+
l = elem&.at("./locality")&.remove&.children&.to_xml
|
108
|
+
elem.add_child "<eref bibitemid='#{elem['key']}'>#{l}</eref>"
|
109
|
+
extract_localities(elem.elements[-1])
|
110
|
+
elem.elements[-1].add_child(t) if t
|
111
|
+
end
|
112
|
+
|
113
|
+
def to_xreftarget(str)
|
114
|
+
return Metanorma::Utils::to_ncname(str) unless /^[^#]+#.+$/.match?(str)
|
115
|
+
|
116
|
+
/^(?<pref>[^#]+)#(?<suff>.+)$/ =~ str
|
117
|
+
pref = pref.gsub(%r([#{Metanorma::Utils::NAMECHAR}])o, "_")
|
118
|
+
suff = suff.gsub(%r([#{Metanorma::Utils::NAMECHAR}])o, "_")
|
119
|
+
"#{pref}##{suff}"
|
120
|
+
end
|
121
|
+
|
122
|
+
IDREF = "//*/@id | //review/@from | //review/@to | "\
|
123
|
+
"//callout/@target | //citation/@bibitemid | "\
|
124
|
+
"//eref/@bibitemid".freeze
|
125
|
+
|
126
|
+
def anchor_cleanup(elem)
|
127
|
+
anchor_cleanup1(elem)
|
128
|
+
xreftarget_cleanup(elem)
|
129
|
+
contenthash_id_cleanup(elem)
|
130
|
+
end
|
131
|
+
|
132
|
+
def anchor_cleanup1(elem)
|
133
|
+
elem.xpath(IDREF).each do |s|
|
134
|
+
if (ret = Metanorma::Utils::to_ncname(s.value)) != (orig = s.value)
|
135
|
+
s.value = ret
|
136
|
+
output = s.parent.dup
|
137
|
+
output.children.remove
|
138
|
+
@log.add("Anchors", s.parent,
|
139
|
+
"normalised identifier in #{output} from #{orig}")
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def xreftarget_cleanup(elem)
|
145
|
+
elem.xpath("//xref/@target").each do |s|
|
146
|
+
if (ret = to_xreftarget(s.value)) != (orig = s.value)
|
147
|
+
s.value = ret
|
148
|
+
output = s.parent.dup
|
149
|
+
output.children.remove
|
150
|
+
@log.add("Anchors", s.parent,
|
151
|
+
"normalised identifier in #{output} from #{orig}")
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def guid?(str)
|
157
|
+
/^_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/
|
158
|
+
.match?(str)
|
159
|
+
end
|
160
|
+
|
161
|
+
def contenthash_id_cleanup(doc)
|
162
|
+
ids = contenthash_id_make(doc)
|
163
|
+
contenthash_id_update_refs(doc, ids)
|
164
|
+
end
|
165
|
+
|
166
|
+
def contenthash_id_make(doc)
|
167
|
+
doc.xpath("//*[@id]").each_with_object({}) do |x, m|
|
168
|
+
next unless guid?(x["id"])
|
169
|
+
|
170
|
+
m[x["id"]] = contenthash(x)
|
171
|
+
x["id"] = m[x["id"]]
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def contenthash_id_update_refs(doc, ids)
|
176
|
+
[%w(review from), %w(review to), %w(callout target), %w(eref bibitemid),
|
177
|
+
%w(citation bibitemid), %w(xref target), %w(xref to)].each do |a|
|
178
|
+
doc.xpath("//#{a[0]}").each do |x|
|
179
|
+
ids[x[a[1]]] and x[a[1]] = ids[x[a[1]]]
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def contenthash(elem)
|
185
|
+
Digest::MD5.hexdigest("#{elem.path}////#{elem.text}")
|
186
|
+
.sub(/^(.{8})(.{4})(.{4})(.{4})(.{12})$/, "_\\1-\\2-\\3-\\4-\\5")
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
@@ -0,0 +1,222 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
require "pathname"
|
3
|
+
require "html2doc"
|
4
|
+
require "asciimath2unitsml"
|
5
|
+
require_relative "./cleanup_block"
|
6
|
+
require_relative "./cleanup_footnotes"
|
7
|
+
require_relative "./cleanup_ref"
|
8
|
+
require_relative "./cleanup_ref_dl"
|
9
|
+
require_relative "./cleanup_boilerplate"
|
10
|
+
require_relative "./cleanup_section"
|
11
|
+
require_relative "./cleanup_terms"
|
12
|
+
require_relative "./cleanup_inline"
|
13
|
+
require_relative "./cleanup_amend"
|
14
|
+
require "relaton_iev"
|
15
|
+
|
16
|
+
module Metanorma
|
17
|
+
module Standoc
|
18
|
+
module Cleanup
|
19
|
+
def asciimath2mathml(text)
|
20
|
+
text = text.gsub(%r{<stem type="AsciiMath">(.+?)</stem>}m) do
|
21
|
+
"<amathstem>#{HTMLEntities.new.decode($1)}</amathstem>"
|
22
|
+
end
|
23
|
+
text = Html2Doc.asciimath_to_mathml(text,
|
24
|
+
["<amathstem>", "</amathstem>"])
|
25
|
+
x = Nokogiri::XML(text)
|
26
|
+
x.xpath("//*[local-name() = 'math'][not(parent::stem)]").each do |y|
|
27
|
+
y.wrap("<stem type='MathML'></stem>")
|
28
|
+
end
|
29
|
+
x.to_xml
|
30
|
+
end
|
31
|
+
|
32
|
+
def xml_unescape_mathml(xml)
|
33
|
+
return if xml.children.any? { |y| y.element? }
|
34
|
+
|
35
|
+
math = xml.text.gsub(/</, "<").gsub(/>/, ">")
|
36
|
+
.gsub(/"/, '"').gsub(/'/, "'").gsub(/&/, "&")
|
37
|
+
.gsub(/<[^: \r\n\t\/]+:/, "<").gsub(/<\/[^ \r\n\t:]+:/, "</")
|
38
|
+
xml.children = math
|
39
|
+
end
|
40
|
+
|
41
|
+
MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
|
42
|
+
|
43
|
+
def mathml_preserve_space(math)
|
44
|
+
math.xpath(".//m:mtext", "m" => MATHML_NS).each do |x|
|
45
|
+
x.children = x.children.to_xml
|
46
|
+
.gsub(/^\s/, " ").gsub(/\s$/, " ")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def mathml_namespace(stem)
|
51
|
+
stem.xpath("./math").each { |x| x.default_namespace = MATHML_NS }
|
52
|
+
end
|
53
|
+
|
54
|
+
def mathml_mi_italics
|
55
|
+
{ uppergreek: true, upperroman: true,
|
56
|
+
lowergreek: true, lowerroman: true }
|
57
|
+
end
|
58
|
+
|
59
|
+
# presuppose multichar mi upright, singlechar mi MathML default italic
|
60
|
+
def mathml_italicise(xml)
|
61
|
+
xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
|
62
|
+
"m" => MATHML_NS).each do |i|
|
63
|
+
char = HTMLEntities.new.decode(i.text)
|
64
|
+
i["mathvariant"] = "normal" if mi_italicise?(char)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def mi_italicise?(char)
|
69
|
+
return false if char.length > 1
|
70
|
+
|
71
|
+
if /\p{Greek}/.match?(char)
|
72
|
+
(/\p{Lower}/.match(char) && !mathml_mi_italics[:lowergreek]) ||
|
73
|
+
(/\p{Upper}/.match(char) && !mathml_mi_italics[:uppergreek])
|
74
|
+
elsif /\p{Latin}/.match?(char)
|
75
|
+
(/\p{Lower}/.match(char) && !mathml_mi_italics[:lowerroman]) ||
|
76
|
+
(/\p{Upper}/.match(char) && !mathml_mi_italics[:upperroman])
|
77
|
+
else false
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
UNITSML_NS = "https://schema.unitsml.org/unitsml/1.0".freeze
|
82
|
+
|
83
|
+
def add_misc_container(xmldoc)
|
84
|
+
unless ins = xmldoc.at("//misc-container")
|
85
|
+
a = xmldoc.at("//termdocsource") || xmldoc.at("//bibdata")
|
86
|
+
a.next = "<misc-container/>"
|
87
|
+
ins = xmldoc.at("//misc-container")
|
88
|
+
end
|
89
|
+
ins
|
90
|
+
end
|
91
|
+
|
92
|
+
def mathml_unitsML(xmldoc)
|
93
|
+
return unless xmldoc.at(".//m:*", "m" => UNITSML_NS)
|
94
|
+
|
95
|
+
misc = add_misc_container(xmldoc)
|
96
|
+
unitsml = misc.add_child("<UnitsML xmlns='#{UNITSML_NS}'/>").first
|
97
|
+
%w(Unit CountedItem Quantity Dimension Prefix).each do |t|
|
98
|
+
gather_unitsml(unitsml, xmldoc, t)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def gather_unitsml(unitsml, xmldoc, tag)
|
103
|
+
tags = xmldoc.xpath(".//m:#{tag}", "m" => UNITSML_NS)
|
104
|
+
.each_with_object({}) do |x, m|
|
105
|
+
m[x["id"]] = x.remove
|
106
|
+
end
|
107
|
+
return if tags.empty?
|
108
|
+
|
109
|
+
set = unitsml.add_child("<#{tag}Set/>").first
|
110
|
+
tags.each_value { |v| set << v }
|
111
|
+
end
|
112
|
+
|
113
|
+
def asciimath2unitsml_options
|
114
|
+
{ multiplier: :space }
|
115
|
+
end
|
116
|
+
|
117
|
+
def mathvariant_override(inner, outer)
|
118
|
+
case outer
|
119
|
+
when "bold"
|
120
|
+
case inner
|
121
|
+
when "normal" then "bold"
|
122
|
+
when "italic" then "bold-italic"
|
123
|
+
when "fraktur" then "bold-fraktur"
|
124
|
+
when "script" then "bold-script"
|
125
|
+
when "sans-serif" then "bold-sans-serif"
|
126
|
+
when "sans-serif-italic" then "sans-serif-bold-italic"
|
127
|
+
else inner
|
128
|
+
end
|
129
|
+
when "italic"
|
130
|
+
case inner
|
131
|
+
when "normal" then "italic"
|
132
|
+
when "bold" then "bold-italic"
|
133
|
+
when "sans-serif" then "sans-serif-italic"
|
134
|
+
when "bold-sans-serif" then "sans-serif-bold-italic"
|
135
|
+
else inner
|
136
|
+
end
|
137
|
+
when "bold-italic"
|
138
|
+
case inner
|
139
|
+
when "normal", "bold", "italic" then "bold-italic"
|
140
|
+
when "sans-serif", "bold-sans-serif", "sans-serif-italic"
|
141
|
+
"sans-serif-bold-italic"
|
142
|
+
else inner
|
143
|
+
end
|
144
|
+
when "fraktur"
|
145
|
+
case inner
|
146
|
+
when "normal" then "fraktur"
|
147
|
+
when "bold" then "bold-fraktur"
|
148
|
+
else inner
|
149
|
+
end
|
150
|
+
when "bold-fraktur"
|
151
|
+
case inner
|
152
|
+
when "normal", "fraktur" then "bold-fraktur"
|
153
|
+
else inner
|
154
|
+
end
|
155
|
+
when "script"
|
156
|
+
case inner
|
157
|
+
when "normal" then "script"
|
158
|
+
when "bold" then "bold-script"
|
159
|
+
else inner
|
160
|
+
end
|
161
|
+
when "bold-script"
|
162
|
+
case inner
|
163
|
+
when "normal", "script" then "bold-script"
|
164
|
+
else inner
|
165
|
+
end
|
166
|
+
when "sans-serif"
|
167
|
+
case inner
|
168
|
+
when "normal" then "sans-serif"
|
169
|
+
when "bold" then "bold-sans-serif"
|
170
|
+
when "italic" then "sans-serif-italic"
|
171
|
+
when "bold-italic" then "sans-serif-bold-italic"
|
172
|
+
else inner
|
173
|
+
end
|
174
|
+
when "bold-sans-serif"
|
175
|
+
case inner
|
176
|
+
when "normal", "bold", "sans-serif" then "bold-sans-serif"
|
177
|
+
when "italic", "bold-italic", "sans-serif-italic"
|
178
|
+
"sans-serif-bold-italic"
|
179
|
+
else inner
|
180
|
+
end
|
181
|
+
when "sans-serif-italic"
|
182
|
+
case inner
|
183
|
+
when "normal", "italic", "sans-serif" then "sans-serif-italic"
|
184
|
+
when "bold", "bold-italic", "sans-serif-bold"
|
185
|
+
"sans-serif-bold-italic"
|
186
|
+
else inner
|
187
|
+
end
|
188
|
+
when "sans-serif-bold-italic"
|
189
|
+
case inner
|
190
|
+
when "normal", "italic", "sans-serif", "sans-serif-italic",
|
191
|
+
"bold", "bold-italic", "sans-serif-bold"
|
192
|
+
"sans-serif-bold-italic"
|
193
|
+
else inner
|
194
|
+
end
|
195
|
+
else inner
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def mathml_mathvariant(math)
|
200
|
+
math.xpath(".//*[@mathvariant]").each do |outer|
|
201
|
+
outer.xpath(".//*[@mathvariant]").each do |inner|
|
202
|
+
inner["mathvariant"] =
|
203
|
+
mathvariant_override(outer["mathvariant"], inner["mathvariant"])
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def mathml_cleanup(xmldoc)
|
209
|
+
unitsml = Asciimath2UnitsML::Conv.new(asciimath2unitsml_options)
|
210
|
+
xmldoc.xpath("//stem[@type = 'MathML']").each do |x|
|
211
|
+
xml_unescape_mathml(x)
|
212
|
+
mathml_namespace(x)
|
213
|
+
mathml_preserve_space(x)
|
214
|
+
unitsml.MathML2UnitsML(x)
|
215
|
+
mathml_mathvariant(x)
|
216
|
+
mathml_italicise(x)
|
217
|
+
end
|
218
|
+
mathml_unitsML(xmldoc)
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|