metanorma-standoc 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitattributes +4 -0
- data/.gitignore +11 -0
- data/.hound.yml +3 -0
- data/.oss-guides.rubocop.yml +1077 -0
- data/.rubocop.ribose.yml +66 -0
- data/.rubocop.tb.yml +650 -0
- data/.rubocop.yml +15 -0
- data/.travis.yml +21 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/Gemfile +7 -0
- data/LICENSE +25 -0
- data/Makefile +39 -0
- data/README.adoc +9 -0
- data/Rakefile +6 -0
- data/bin/rspec +18 -0
- data/docs/customisation.adoc +178 -0
- data/docs/guidance.adoc +436 -0
- data/docs/htmloutput.adoc +115 -0
- data/docs/quickstart.adoc +375 -0
- data/lib/asciidoctor/standoc/base.rb +198 -0
- data/lib/asciidoctor/standoc/biblio.rng +836 -0
- data/lib/asciidoctor/standoc/blocks.rb +190 -0
- data/lib/asciidoctor/standoc/cleanup.rb +247 -0
- data/lib/asciidoctor/standoc/cleanup_block.rb +193 -0
- data/lib/asciidoctor/standoc/cleanup_footnotes.rb +78 -0
- data/lib/asciidoctor/standoc/cleanup_ref.rb +125 -0
- data/lib/asciidoctor/standoc/converter.rb +55 -0
- data/lib/asciidoctor/standoc/front.rb +121 -0
- data/lib/asciidoctor/standoc/inline.rb +134 -0
- data/lib/asciidoctor/standoc/isodoc.rng +1059 -0
- data/lib/asciidoctor/standoc/lists.rb +87 -0
- data/lib/asciidoctor/standoc/macros.rb +95 -0
- data/lib/asciidoctor/standoc/ref.rb +187 -0
- data/lib/asciidoctor/standoc/section.rb +159 -0
- data/lib/asciidoctor/standoc/table.rb +61 -0
- data/lib/asciidoctor/standoc/utils.rb +121 -0
- data/lib/asciidoctor/standoc/validate.rb +65 -0
- data/lib/asciidoctor/standoc/validate_section.rb +42 -0
- data/lib/asciidoctor/standoc/version.rb +5 -0
- data/lib/metanorma-standoc.rb +9 -0
- data/lib/metanorma/standoc.rb +7 -0
- data/lib/metanorma/standoc/processor.rb +40 -0
- data/metanorma-standoc.gemspec +47 -0
- data/spec/asciidoctor-standoc/base_spec.rb +271 -0
- data/spec/asciidoctor-standoc/blocks_spec.rb +469 -0
- data/spec/asciidoctor-standoc/cleanup_spec.rb +760 -0
- data/spec/asciidoctor-standoc/inline_spec.rb +162 -0
- data/spec/asciidoctor-standoc/isobib_cache_spec.rb +332 -0
- data/spec/asciidoctor-standoc/lists_spec.rb +190 -0
- data/spec/asciidoctor-standoc/macros_spec.rb +111 -0
- data/spec/asciidoctor-standoc/refs_spec.rb +606 -0
- data/spec/asciidoctor-standoc/section_spec.rb +310 -0
- data/spec/asciidoctor-standoc/table_spec.rb +307 -0
- data/spec/asciidoctor-standoc/validate_spec.rb +133 -0
- data/spec/assets/header.html +7 -0
- data/spec/assets/html.css +2 -0
- data/spec/assets/htmlcover.html +4 -0
- data/spec/assets/htmlintro.html +5 -0
- data/spec/assets/i18n.yaml +2 -0
- data/spec/assets/iso.headless.html +33 -0
- data/spec/assets/iso.xml +8 -0
- data/spec/assets/rice_image1.png +0 -0
- data/spec/assets/scripts.html +3 -0
- data/spec/assets/std.css +2 -0
- data/spec/assets/word.css +2 -0
- data/spec/assets/wordcover.html +3 -0
- data/spec/assets/wordintro.html +4 -0
- data/spec/examples/103_01_02.html +247 -0
- data/spec/examples/english.yaml +69 -0
- data/spec/examples/iso_123_.xml +45 -0
- data/spec/examples/iso_123_all_parts.xml +45 -0
- data/spec/examples/iso_123_no_year_note.xml +46 -0
- data/spec/examples/iso_124_.xml +41 -0
- data/spec/examples/iso_216_.xml +47 -0
- data/spec/examples/iso_iec_12382_.xml +48 -0
- data/spec/examples/rice.adoc +715 -0
- data/spec/examples/rice.preview.html +1877 -0
- data/spec/examples/rice.sh +4 -0
- data/spec/examples/rice_images/rice_image1.png +0 -0
- data/spec/examples/rice_images/rice_image2.png +0 -0
- data/spec/examples/rice_images/rice_image3_1.png +0 -0
- data/spec/examples/rice_images/rice_image3_2.png +0 -0
- data/spec/examples/rice_images/rice_image3_3.png +0 -0
- data/spec/metanorma/processor_spec.rb +70 -0
- data/spec/spec_helper.rb +198 -0
- metadata +370 -0
@@ -0,0 +1,190 @@
|
|
1
|
+
require "htmlentities"
|
2
|
+
require "uri"
|
3
|
+
|
4
|
+
module Asciidoctor
|
5
|
+
module Standoc
|
6
|
+
module Blocks
|
7
|
+
def id_attr(node = nil)
|
8
|
+
{ id: Utils::anchor_or_uuid(node) }
|
9
|
+
end
|
10
|
+
|
11
|
+
# open block is a container of multiple blocks,
|
12
|
+
# treated as a single block.
|
13
|
+
# We append each contained block to its parent
|
14
|
+
def open(node)
|
15
|
+
result = []
|
16
|
+
node.blocks.each do |b|
|
17
|
+
result << send(b.context, b)
|
18
|
+
end
|
19
|
+
result
|
20
|
+
end
|
21
|
+
|
22
|
+
def literal(node)
|
23
|
+
paragraph(node)
|
24
|
+
end
|
25
|
+
|
26
|
+
# NOTE: html escaping is performed by Nokogiri
|
27
|
+
def stem(node)
|
28
|
+
stem_content = node.lines.join("\n")
|
29
|
+
noko do |xml|
|
30
|
+
xml.formula **id_attr(node) do |s|
|
31
|
+
stem_parse(stem_content, s)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def sidebar_attrs(node)
|
37
|
+
date = node.attr("date") || Date.today.iso8601.gsub(/\+.*$/, "")
|
38
|
+
date += "T00:00:00Z" unless /T/.match date
|
39
|
+
{
|
40
|
+
reviewer: node.attr("reviewer") || node.attr("source") || "(Unknown)",
|
41
|
+
id: Utils::anchor_or_uuid(node),
|
42
|
+
date: date,
|
43
|
+
from: node.attr("from"),
|
44
|
+
to: node.attr("to") || node.attr("from"),
|
45
|
+
}
|
46
|
+
end
|
47
|
+
|
48
|
+
def sidebar(node)
|
49
|
+
return unless draft?
|
50
|
+
noko do |xml|
|
51
|
+
xml.review **attr_code(sidebar_attrs(node)) do |r|
|
52
|
+
wrap_in_para(node, r)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def termnote(n)
|
58
|
+
noko do |xml|
|
59
|
+
xml.termnote **id_attr(n) do |ex|
|
60
|
+
wrap_in_para(n, ex)
|
61
|
+
end
|
62
|
+
end.join("\n")
|
63
|
+
end
|
64
|
+
|
65
|
+
def note(n)
|
66
|
+
noko do |xml|
|
67
|
+
xml.note **id_attr(n) do |c|
|
68
|
+
wrap_in_para(n, c)
|
69
|
+
end
|
70
|
+
end.join("\n")
|
71
|
+
end
|
72
|
+
|
73
|
+
def admonition_attrs(node)
|
74
|
+
name = node.attr("name")
|
75
|
+
if type = node.attr("type")
|
76
|
+
["danger", "safety precautions"].each do |t|
|
77
|
+
name = t if type.casecmp(t).zero?
|
78
|
+
end
|
79
|
+
end
|
80
|
+
{ id: Utils::anchor_or_uuid(node), type: name }
|
81
|
+
end
|
82
|
+
|
83
|
+
def admonition(node)
|
84
|
+
return termnote(node) if in_terms?
|
85
|
+
return note(node) if node.attr("name") == "note"
|
86
|
+
noko do |xml|
|
87
|
+
xml.admonition **admonition_attrs(node) do |a|
|
88
|
+
wrap_in_para(node, a)
|
89
|
+
end
|
90
|
+
end.join("\n")
|
91
|
+
end
|
92
|
+
|
93
|
+
def term_example(node)
|
94
|
+
noko do |xml|
|
95
|
+
xml.termexample **id_attr(node) do |ex|
|
96
|
+
wrap_in_para(node, ex)
|
97
|
+
end
|
98
|
+
end.join("\n")
|
99
|
+
end
|
100
|
+
|
101
|
+
def example(node)
|
102
|
+
return term_example(node) if in_terms?
|
103
|
+
noko do |xml|
|
104
|
+
xml.example **id_attr(node) do |ex|
|
105
|
+
content = node.content
|
106
|
+
ex << content
|
107
|
+
end
|
108
|
+
end.join("\n")
|
109
|
+
end
|
110
|
+
|
111
|
+
def preamble(node)
|
112
|
+
noko do |xml|
|
113
|
+
xml.foreword do |xml_abstract|
|
114
|
+
xml_abstract.title { |t| t << "Foreword" }
|
115
|
+
content = node.content
|
116
|
+
xml_abstract << content
|
117
|
+
end
|
118
|
+
end.join("\n")
|
119
|
+
end
|
120
|
+
|
121
|
+
def image_attributes(node)
|
122
|
+
uri = node.image_uri node.attr("target")
|
123
|
+
types = MIME::Types.type_for(uri)
|
124
|
+
{ src: uri,
|
125
|
+
id: Utils::anchor_or_uuid,
|
126
|
+
imagetype: types.first.sub_type.upcase,
|
127
|
+
height: node.attr("height") || "auto",
|
128
|
+
width: node.attr("width") || "auto" }
|
129
|
+
end
|
130
|
+
|
131
|
+
def figure_title(node, f)
|
132
|
+
unless node.title.nil?
|
133
|
+
f.name { |name| name << node.title }
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def image(node)
|
138
|
+
noko do |xml|
|
139
|
+
xml.figure **id_attr(node) do |f|
|
140
|
+
figure_title(node, f)
|
141
|
+
f.image **attr_code(image_attributes(node))
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def paragraph(node)
|
147
|
+
return termsource(node) if node.role == "source"
|
148
|
+
attrs = { align: node.attr("align"),
|
149
|
+
id: Utils::anchor_or_uuid(node) }
|
150
|
+
noko do |xml|
|
151
|
+
xml.p **attr_code(attrs) do |xml_t|
|
152
|
+
xml_t << node.content
|
153
|
+
end
|
154
|
+
end.join("\n")
|
155
|
+
end
|
156
|
+
|
157
|
+
def quote_attrs(node)
|
158
|
+
{ id: Utils::anchor_or_uuid(node), align: node.attr("align") }
|
159
|
+
end
|
160
|
+
|
161
|
+
def quote_attribution(node, out)
|
162
|
+
if node.attr("citetitle")
|
163
|
+
m = /^(?<cite>[^,]+)(,(?<text>.*$))?$/m.match node.attr("citetitle")
|
164
|
+
out.source m[:text],
|
165
|
+
**attr_code(target: m[:cite], type: "inline")
|
166
|
+
end
|
167
|
+
if node.attr("attribution")
|
168
|
+
out.author { |a| a << node.attr("attribution") }
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def quote(node)
|
173
|
+
noko do |xml|
|
174
|
+
xml.quote **attr_code(quote_attrs(node)) do |q|
|
175
|
+
quote_attribution(node, q)
|
176
|
+
wrap_in_para(node, q)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def listing(node)
|
182
|
+
# NOTE: html escaping is performed by Nokogiri
|
183
|
+
noko do |xml|
|
184
|
+
xml.sourcecode(**id_attr(node)) { |s| s << node.content }
|
185
|
+
# xml.sourcecode(**id_attr(node)) { |s| s << node.lines.join("\n") }
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
@@ -0,0 +1,247 @@
|
|
1
|
+
require "date"
|
2
|
+
require "nokogiri"
|
3
|
+
require "pathname"
|
4
|
+
require "open-uri"
|
5
|
+
require "pp"
|
6
|
+
require_relative "./cleanup_block.rb"
|
7
|
+
require_relative "./cleanup_footnotes.rb"
|
8
|
+
require_relative "./cleanup_ref.rb"
|
9
|
+
|
10
|
+
module Asciidoctor
|
11
|
+
module Standoc
|
12
|
+
module Cleanup
|
13
|
+
def textcleanup(text)
|
14
|
+
text.gsub(/\s+<fn /, "<fn ")
|
15
|
+
end
|
16
|
+
|
17
|
+
def cleanup(xmldoc)
|
18
|
+
termdef_cleanup(xmldoc)
|
19
|
+
sections_cleanup(xmldoc)
|
20
|
+
obligations_cleanup(xmldoc)
|
21
|
+
table_cleanup(xmldoc)
|
22
|
+
formula_cleanup(xmldoc)
|
23
|
+
figure_cleanup(xmldoc)
|
24
|
+
ref_cleanup(xmldoc)
|
25
|
+
note_cleanup(xmldoc)
|
26
|
+
normref_cleanup(xmldoc)
|
27
|
+
biblio_cleanup(xmldoc)
|
28
|
+
reference_names(xmldoc)
|
29
|
+
xref_cleanup(xmldoc)
|
30
|
+
bpart_cleanup(xmldoc)
|
31
|
+
quotesource_cleanup(xmldoc)
|
32
|
+
para_cleanup(xmldoc)
|
33
|
+
callout_cleanup(xmldoc)
|
34
|
+
origin_cleanup(xmldoc)
|
35
|
+
element_name_cleanup(xmldoc)
|
36
|
+
footnote_renumber(xmldoc)
|
37
|
+
empty_element_cleanup(xmldoc)
|
38
|
+
mathml_cleanup(xmldoc)
|
39
|
+
script_cleanup(xmldoc)
|
40
|
+
docidentifier_cleanup(xmldoc)
|
41
|
+
bookmark_cleanup(xmldoc)
|
42
|
+
xmldoc
|
43
|
+
end
|
44
|
+
|
45
|
+
# ISO as a prefix goes first
|
46
|
+
def docidentifier_cleanup(xmldoc)
|
47
|
+
id = xmldoc.at("//bibdata/docidentifier/project-number")
|
48
|
+
return unless id
|
49
|
+
|
50
|
+
prefix = []
|
51
|
+
xmldoc.xpath("//bibdata/contributor[role/@type = 'publisher']"\
|
52
|
+
"/organization").each do |x|
|
53
|
+
x1 = x.at("abbreviation")&.text || x.at("name")&.text
|
54
|
+
x1 == "ISO" and prefix.unshift("ISO") or prefix << x1
|
55
|
+
end
|
56
|
+
|
57
|
+
id.content = prefix.join("/") + " " + id.text
|
58
|
+
end
|
59
|
+
|
60
|
+
TEXT_ELEMS =
|
61
|
+
%w{status language script version author name callout phone
|
62
|
+
email street city state country postcode identifier referenceFrom
|
63
|
+
referenceTo docidentifier prefix initial addition surname forename
|
64
|
+
title draft secretariat title-main title-intro title-part}.freeze
|
65
|
+
|
66
|
+
# it seems Nokogiri::XML is treating the content of <script> as cdata,
|
67
|
+
# because of its use in HTML. Bad nokogiri. Undoing that, since we use
|
68
|
+
# script as a normal tag
|
69
|
+
def script_cleanup(xmldoc)
|
70
|
+
xmldoc.xpath("//script").each do |x|
|
71
|
+
x.content = x.to_str
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def empty_element_cleanup(xmldoc)
|
76
|
+
xmldoc.xpath("//" + TEXT_ELEMS.join(" | //")).each do |x|
|
77
|
+
x.remove if x.children.empty?
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def element_name_cleanup(xmldoc)
|
82
|
+
xmldoc.traverse { |n| n.name = n.name.gsub(/_/, "-") }
|
83
|
+
end
|
84
|
+
|
85
|
+
def link_callouts_to_annotations(callouts, annotations)
|
86
|
+
callouts.each_with_index do |c, i|
|
87
|
+
c["target"] = "_" + UUIDTools::UUID.random_create
|
88
|
+
annotations[i]["id"] = c["target"]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def align_callouts_to_annotations(xmldoc)
|
93
|
+
xmldoc.xpath("//sourcecode").each do |x|
|
94
|
+
callouts = x.elements.select { |e| e.name == "callout" }
|
95
|
+
annotations = x.elements.select { |e| e.name == "annotation" }
|
96
|
+
if callouts.size == annotations.size
|
97
|
+
link_callouts_to_annotations(callouts, annotations)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def merge_annotations_into_sourcecode(xmldoc)
|
103
|
+
xmldoc.xpath("//sourcecode").each do |x|
|
104
|
+
while x&.next_element&.name == "annotation"
|
105
|
+
x.next_element.parent = x
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def callout_cleanup(xmldoc)
|
111
|
+
merge_annotations_into_sourcecode(xmldoc)
|
112
|
+
align_callouts_to_annotations(xmldoc)
|
113
|
+
end
|
114
|
+
|
115
|
+
def termdef_stem_cleanup(xmldoc)
|
116
|
+
xmldoc.xpath("//term/p/stem").each do |a|
|
117
|
+
if a.parent.elements.size == 1
|
118
|
+
# para containing just a stem expression
|
119
|
+
t = Nokogiri::XML::Element.new("admitted", xmldoc)
|
120
|
+
parent = a.parent
|
121
|
+
t.children = a.remove
|
122
|
+
parent.replace(t)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def termdomain_cleanup(xmldoc)
|
128
|
+
xmldoc.xpath("//p/domain").each do |a|
|
129
|
+
prev = a.parent.previous
|
130
|
+
prev.next = a.remove
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def termdefinition_cleanup(xmldoc)
|
135
|
+
xmldoc.xpath("//term").each do |d|
|
136
|
+
first_child = d.at("./p | ./figure | ./formula") || return
|
137
|
+
t = Nokogiri::XML::Element.new("definition", xmldoc)
|
138
|
+
first_child.replace(t)
|
139
|
+
t << first_child.remove
|
140
|
+
d.xpath("./p | ./figure | ./formula").each { |n| t << n.remove }
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def termdef_unnest_cleanup(xmldoc)
|
145
|
+
# release termdef tags from surrounding paras
|
146
|
+
nodes = xmldoc.xpath("//p/admitted | //p/deprecates")
|
147
|
+
while !nodes.empty?
|
148
|
+
nodes[0].parent.replace(nodes[0].parent.children)
|
149
|
+
nodes = xmldoc.xpath("//p/admitted | //p/deprecates")
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def termdef_boilerplate_cleanup(xmldoc)
|
154
|
+
xmldoc.xpath("//terms/p | //terms/ul").each(&:remove)
|
155
|
+
end
|
156
|
+
|
157
|
+
def termdef_subclause_cleanup(xmldoc)
|
158
|
+
xmldoc.xpath("//terms[terms]").each do |t|
|
159
|
+
t.name = "clause"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def termdocsource_cleanup(xmldoc)
|
164
|
+
f = xmldoc.at("//preface | //sections")
|
165
|
+
xmldoc.xpath("//terms/termdocsource | "\
|
166
|
+
"//clause/termdocsource").each do |s|
|
167
|
+
f.previous = s.remove
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def termdef_cleanup(xmldoc)
|
172
|
+
termdef_unnest_cleanup(xmldoc)
|
173
|
+
termdef_stem_cleanup(xmldoc)
|
174
|
+
termdomain_cleanup(xmldoc)
|
175
|
+
termdefinition_cleanup(xmldoc)
|
176
|
+
termdef_boilerplate_cleanup(xmldoc)
|
177
|
+
termdef_subclause_cleanup(xmldoc)
|
178
|
+
termdocsource_cleanup(xmldoc)
|
179
|
+
end
|
180
|
+
|
181
|
+
def biblio_cleanup(xmldoc)
|
182
|
+
xmldoc.xpath("//references[references]").each do |t|
|
183
|
+
t.name = "clause"
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
ELEMS_ALLOW_NOTES =
|
188
|
+
# %w[p formula quote sourcecode example admonition ul ol dl figure]
|
189
|
+
%w[p formula ul ol dl figure].freeze
|
190
|
+
|
191
|
+
# if a note is at the end of a section, it is left alone
|
192
|
+
# if a note is followed by a non-note block,
|
193
|
+
# it is moved inside its preceding block if it is not delimited
|
194
|
+
# (so there was no way of making that block include the note)
|
195
|
+
def note_cleanup(xmldoc)
|
196
|
+
q = "//note[following-sibling::*[not(local-name() = 'note')]]"
|
197
|
+
xmldoc.xpath(q).each do |n|
|
198
|
+
next unless n.ancestors("table").empty?
|
199
|
+
prev = n.previous_element || next
|
200
|
+
n.parent = prev if ELEMS_ALLOW_NOTES.include? prev.name
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def empty_text_before_first_element(x)
|
205
|
+
x.children.each do |c|
|
206
|
+
if c.text?
|
207
|
+
return false if /\S/.match(c.text)
|
208
|
+
end
|
209
|
+
return true if c.element?
|
210
|
+
end
|
211
|
+
true
|
212
|
+
end
|
213
|
+
|
214
|
+
def strip_initial_space(x)
|
215
|
+
if x.children[0].text?
|
216
|
+
if !/\S/.match(x.children[0].text)
|
217
|
+
x.children[0].remove
|
218
|
+
else
|
219
|
+
x.children[0].content = x.children[0].text.gsub(/^ /, "")
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
def bookmark_cleanup(xmldoc)
|
225
|
+
xmldoc.xpath("//li[descendant::bookmark]").each do |x|
|
226
|
+
if x&.elements&.first&.name == "p" &&
|
227
|
+
x&.elements&.first&.elements&.first&.name == "bookmark"
|
228
|
+
if empty_text_before_first_element(x.elements[0])
|
229
|
+
x["id"] = x.elements[0].elements[0].remove["id"]
|
230
|
+
strip_initial_space(x.elements[0])
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
def mathml_cleanup(xmldoc)
|
237
|
+
xmldoc.xpath("//stem[@type = 'MathML']").each do |x|
|
238
|
+
math = x.text.gsub(/</, "<").gsub(/>/, ">").gsub(/"/, '"').
|
239
|
+
gsub(/&/, "&").gsub(/<[^:\/]+:/, "<").gsub(/<\/[^:]+:/, "</").
|
240
|
+
gsub(/ xmlns[^>]+/, "").
|
241
|
+
gsub(/<math>/, '<math xmlns="http://www.w3.org/1998/Math/MathML">')
|
242
|
+
x.children = math
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
247
|
+
end
|
@@ -0,0 +1,193 @@
|
|
1
|
+
require "date"
|
2
|
+
require "nokogiri"
|
3
|
+
require "htmlentities"
|
4
|
+
require "json"
|
5
|
+
require "pathname"
|
6
|
+
require "open-uri"
|
7
|
+
require "pp"
|
8
|
+
|
9
|
+
module Asciidoctor
|
10
|
+
module Standoc
|
11
|
+
module Cleanup
|
12
|
+
def para_cleanup(xmldoc)
|
13
|
+
xmldoc.xpath("//p[not(@id)]").each do |x|
|
14
|
+
x["id"] = Utils::anchor_or_uuid
|
15
|
+
end
|
16
|
+
xmldoc.xpath("//note[not(@id)][not(ancestor::bibitem)]"\
|
17
|
+
"[not(ancestor::table)]").each do |x|
|
18
|
+
x["id"] = Utils::anchor_or_uuid
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# move Key dl after table footer
|
23
|
+
def dl_table_cleanup(xmldoc)
|
24
|
+
q = "//table/following-sibling::*[1]"\
|
25
|
+
"[self::p and normalize-space() = 'Key']"
|
26
|
+
xmldoc.xpath(q).each do |s|
|
27
|
+
if !s.next_element.nil? && s.next_element.name == "dl"
|
28
|
+
s.previous_element << s.next_element.remove
|
29
|
+
s.remove
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def insert_thead(s)
|
35
|
+
thead = s.at("./thead")
|
36
|
+
return thead unless thead.nil?
|
37
|
+
if tname = s.at("./name")
|
38
|
+
thead = tname.add_next_sibling("<thead/>").first
|
39
|
+
return thead
|
40
|
+
end
|
41
|
+
s.children.first.add_previous_sibling("<thead/>").first
|
42
|
+
end
|
43
|
+
|
44
|
+
def header_rows_cleanup(xmldoc)
|
45
|
+
xmldoc.xpath("//table[@headerrows]").each do |s|
|
46
|
+
thead = insert_thead(s)
|
47
|
+
(thead.xpath("./tr").size...s["headerrows"].to_i).each do
|
48
|
+
row = s.at("./tbody/tr")
|
49
|
+
row.parent = thead
|
50
|
+
end
|
51
|
+
s.delete("headerrows")
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def table_cleanup(xmldoc)
|
56
|
+
dl_table_cleanup(xmldoc)
|
57
|
+
notes_table_cleanup(xmldoc)
|
58
|
+
header_rows_cleanup(xmldoc)
|
59
|
+
end
|
60
|
+
|
61
|
+
# move notes into table
|
62
|
+
def notes_table_cleanup(xmldoc)
|
63
|
+
nomatches = false
|
64
|
+
until nomatches
|
65
|
+
q = "//table/following-sibling::*[1][self::note]"
|
66
|
+
nomatches = true
|
67
|
+
xmldoc.xpath(q).each do |n|
|
68
|
+
n.previous_element << n.remove
|
69
|
+
nomatches = false
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# include where definition list inside stem block
|
75
|
+
def formula_cleanup(x)
|
76
|
+
q = "//formula/following-sibling::*[1]"\
|
77
|
+
"[self::p and text() = 'where']"
|
78
|
+
x.xpath(q).each do |s|
|
79
|
+
if !s.next_element.nil? && s.next_element.name == "dl"
|
80
|
+
s.previous_element << s.next_element.remove
|
81
|
+
s.remove
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# include key definition list inside figure
|
87
|
+
def figure_dl_cleanup(xmldoc)
|
88
|
+
q = "//figure/following-sibling::*"\
|
89
|
+
"[self::p and normalize-space() = 'Key']"
|
90
|
+
xmldoc.xpath(q).each do |s|
|
91
|
+
if !s.next_element.nil? && s.next_element.name == "dl"
|
92
|
+
s.previous_element << s.next_element.remove
|
93
|
+
s.remove
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# examples containing only figures become subfigures of figures
|
99
|
+
def subfigure_cleanup(xmldoc)
|
100
|
+
nodes = xmldoc.xpath("//example/figure")
|
101
|
+
while !nodes.empty?
|
102
|
+
nodes[0].parent.name = "figure"
|
103
|
+
nodes = xmldoc.xpath("//example/figure")
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def figure_cleanup(xmldoc)
|
108
|
+
figure_footnote_cleanup(xmldoc)
|
109
|
+
figure_dl_cleanup(xmldoc)
|
110
|
+
subfigure_cleanup(xmldoc)
|
111
|
+
end
|
112
|
+
|
113
|
+
def make_preface(x, s)
|
114
|
+
if x.at("//foreword | //introduction")
|
115
|
+
preface = s.add_previous_sibling("<preface/>").first
|
116
|
+
foreword = x.at("//foreword")
|
117
|
+
preface.add_child foreword.remove if foreword
|
118
|
+
introduction = x.at("//introduction")
|
119
|
+
preface.add_child introduction.remove if introduction
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def make_bibliography(x, s)
|
124
|
+
if x.at("//sections/references")
|
125
|
+
biblio = s.add_next_sibling("<bibliography/>").first
|
126
|
+
x.xpath("//sections/references").each do |r|
|
127
|
+
biblio.add_child r.remove
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def sections_order_cleanup(x)
|
133
|
+
s = x.at("//sections")
|
134
|
+
make_preface(x, s)
|
135
|
+
make_bibliography(x, s)
|
136
|
+
x.xpath("//sections/annex").reverse_each { |r| s.next = r.remove }
|
137
|
+
end
|
138
|
+
|
139
|
+
def maxlevel(x)
|
140
|
+
max = 5
|
141
|
+
x.xpath("//clause[@level]").each do |c|
|
142
|
+
max = c["level"].to_i if max < c["level"].to_i
|
143
|
+
end
|
144
|
+
max
|
145
|
+
end
|
146
|
+
|
147
|
+
def sections_level_cleanup(x)
|
148
|
+
m = maxlevel(x)
|
149
|
+
return if m < 6
|
150
|
+
m.downto(6).each do |l|
|
151
|
+
x.xpath("//clause[@level = '#{l}']").each do |c|
|
152
|
+
c.delete("level")
|
153
|
+
c.previous_element << c.remove
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def sections_cleanup(x)
|
159
|
+
sections_order_cleanup(x)
|
160
|
+
sections_level_cleanup(x)
|
161
|
+
end
|
162
|
+
|
163
|
+
def obligations_cleanup(x)
|
164
|
+
obligations_cleanup_info(x)
|
165
|
+
obligations_cleanup_norm(x)
|
166
|
+
obligations_cleanup_inherit(x)
|
167
|
+
end
|
168
|
+
|
169
|
+
def obligations_cleanup_info(x)
|
170
|
+
(s = x.at("//foreword")) && s["obligation"] = "informative"
|
171
|
+
(s = x.at("//introduction")) && s["obligation"] = "informative"
|
172
|
+
x.xpath("//references").each { |r| r["obligation"] = "informative" }
|
173
|
+
end
|
174
|
+
|
175
|
+
def obligations_cleanup_norm(x)
|
176
|
+
(s = x.at("//clause[title = 'Scope']")) && s["obligation"] = "normative"
|
177
|
+
(s = x.at("//clause[title = 'Symbols and Abbreviated Terms']")) &&
|
178
|
+
s["obligation"] = "normative"
|
179
|
+
x.xpath("//terms").each { |r| r["obligation"] = "normative" }
|
180
|
+
x.xpath("//symbols-abbrevs").each { |r| r["obligation"] = "normative" }
|
181
|
+
end
|
182
|
+
|
183
|
+
def obligations_cleanup_inherit(x)
|
184
|
+
x.xpath("//annex | //clause").each do |r|
|
185
|
+
r["obligation"] = "normative" unless r["obligation"]
|
186
|
+
end
|
187
|
+
x.xpath(Utils::SUBCLAUSE_XPATH).each do |r|
|
188
|
+
r["obligation"] = r.at("./ancestor::*/@obligation").text
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|