isodoc 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.hound.yml +3 -0
- data/.oss-guides.rubocop.yml +1077 -0
- data/.rubocop.ribose.yml +65 -0
- data/.rubocop.tb.yml +640 -0
- data/.rubocop.yml +15 -0
- data/Gemfile +6 -0
- data/README.adoc +32 -0
- data/isodoc.gemspec +51 -0
- data/lib/isodoc.rb +74 -0
- data/lib/isodoc/blocks.rb +184 -0
- data/lib/isodoc/cleanup.rb +155 -0
- data/lib/isodoc/html.rb +44 -0
- data/lib/isodoc/inline.rb +211 -0
- data/lib/isodoc/iso2wordhtml.rb +143 -0
- data/lib/isodoc/lists.rb +54 -0
- data/lib/isodoc/metadata.rb +99 -0
- data/lib/isodoc/postprocessing.rb +156 -0
- data/lib/isodoc/references.rb +129 -0
- data/lib/isodoc/section.rb +136 -0
- data/lib/isodoc/table.rb +99 -0
- data/lib/isodoc/terms.rb +74 -0
- data/lib/isodoc/utils.rb +88 -0
- data/lib/isodoc/version.rb +3 -0
- data/lib/isodoc/xref_gen.rb +204 -0
- metadata +338 -0
data/lib/isodoc/table.rb
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
module IsoDoc
|
2
|
+
class Convert
|
3
|
+
def table_title_parse(node, out)
|
4
|
+
name = node.at(ns("./name"))
|
5
|
+
if name
|
6
|
+
out.p **{ class: "TableTitle", align: "center" } do |p|
|
7
|
+
p.b do |b|
|
8
|
+
b << "#{get_anchors()[node['id']][:label]} — "
|
9
|
+
b << name.text
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def thead_parse(node, t)
|
16
|
+
thead = node.at(ns("./thead"))
|
17
|
+
if thead
|
18
|
+
t.thead do |h|
|
19
|
+
thead.element_children.each_with_index do |n, i|
|
20
|
+
tr_parse(n, h, i, thead.element_children.size, true)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def tbody_parse(node, t)
|
27
|
+
tbody = node.at(ns("./tbody"))
|
28
|
+
t.tbody do |h|
|
29
|
+
tbody.element_children.each_with_index do |n, i|
|
30
|
+
tr_parse(n, h, i, tbody.element_children.size, false)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def tfoot_parse(node, t)
|
36
|
+
tfoot = node.at(ns("./tfoot"))
|
37
|
+
if tfoot
|
38
|
+
t.tfoot do |h|
|
39
|
+
tfoot.element_children.each_with_index do |n, i|
|
40
|
+
tr_parse(n, h, i, tfoot.element_children.size, false)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def make_table_attr(node)
|
47
|
+
{
|
48
|
+
id: node["id"],
|
49
|
+
class: "MsoISOTable",
|
50
|
+
border: 1,
|
51
|
+
cellspacing: 0,
|
52
|
+
cellpadding: 0,
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
def table_parse(node, out)
|
57
|
+
@in_table = true
|
58
|
+
table_title_parse(node, out)
|
59
|
+
out.table **make_table_attr(node) do |t|
|
60
|
+
thead_parse(node, t)
|
61
|
+
tbody_parse(node, t)
|
62
|
+
tfoot_parse(node, t)
|
63
|
+
dl = node.at(ns("./dl")) and parse(dl, out)
|
64
|
+
node.xpath(ns("./note")).each { |n| parse(n, out) }
|
65
|
+
end
|
66
|
+
@in_table = false
|
67
|
+
# out.p { |p| p << " " }
|
68
|
+
end
|
69
|
+
|
70
|
+
SW = "solid windowtext"
|
71
|
+
|
72
|
+
#border-left:#{col.zero? ? "#{SW} 1.5pt;" : "none;"}
|
73
|
+
#border-right:#{SW} #{col == totalcols && !header ? "1.5" : "1.0"}pt;
|
74
|
+
def make_tr_attr(td, row, totalrows, col, totalcols, header)
|
75
|
+
style = td.name == "th" ? "font-weight:bold;" : ""
|
76
|
+
rowmax = td["rowspan"] ? row + td["rowspan"].to_i - 1 : row
|
77
|
+
style += <<~STYLE
|
78
|
+
border-top:#{row.zero? ? "#{SW} 1.5pt;" : "none;"}
|
79
|
+
mso-border-top-alt:#{row.zero? ? "#{SW} 1.5pt;" : "none;"}
|
80
|
+
border-bottom:#{SW} #{rowmax == totalrows ? "1.5" : "1.0"}pt;
|
81
|
+
mso-border-bottom-alt:#{SW} #{rowmax == totalrows ? "1.5" : "1.0"}pt;
|
82
|
+
STYLE
|
83
|
+
{ rowspan: td["rowspan"], colspan: td["colspan"],
|
84
|
+
align: td["align"], style: style.gsub(/\n/, "") }
|
85
|
+
end
|
86
|
+
|
87
|
+
def tr_parse(node, out, ord, totalrows, header)
|
88
|
+
out.tr do |r|
|
89
|
+
node.elements.each_with_index do |td, i|
|
90
|
+
attrs = make_tr_attr(td, ord, totalrows - 1,
|
91
|
+
i, node.elements.size - 1, header)
|
92
|
+
r.send td.name, **attr_code(attrs) do |entry|
|
93
|
+
td.children.each { |n| parse(n, entry) }
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
data/lib/isodoc/terms.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
module IsoDoc
|
2
|
+
class Convert
|
3
|
+
|
4
|
+
def definition_parse(node, out)
|
5
|
+
node.children.each { |n| parse(n, out) }
|
6
|
+
end
|
7
|
+
|
8
|
+
def modification_parse(node, out)
|
9
|
+
out << "[MODIFICATION]"
|
10
|
+
para = node.at(ns("./p"))
|
11
|
+
para.children.each { |n| parse(n, out) }
|
12
|
+
end
|
13
|
+
|
14
|
+
def deprecated_term_parse(node, out)
|
15
|
+
out.p **{ class: "AltTerms" } do |p|
|
16
|
+
p << "DEPRECATED: #{node.text}"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def admitted_term_parse(node, out)
|
21
|
+
out.p **{ class: "AltTerms" } { |p| p << node.text }
|
22
|
+
end
|
23
|
+
|
24
|
+
def term_parse(node, out)
|
25
|
+
out.p **{ class: "Terms" } { |p| p << node.text }
|
26
|
+
end
|
27
|
+
|
28
|
+
def para_then_remainder(first, node, p)
|
29
|
+
if first.name == "p"
|
30
|
+
first.children.each { |n| parse(n, p) }
|
31
|
+
node.elements.drop(1).each { |n| parse(n, div) }
|
32
|
+
else
|
33
|
+
node.elements.each { |n| parse(n, div) }
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def termexample_parse(node, out)
|
38
|
+
out.div **{ class: "Note" } do |div|
|
39
|
+
first = node.first_element_child
|
40
|
+
div.p **{ class: "Note" } do |p|
|
41
|
+
p << "EXAMPLE:"
|
42
|
+
insert_tab(p, 1)
|
43
|
+
para_then_remainder(first, node, p)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def termnote_parse(node, out)
|
49
|
+
out.div **{ class: "Note" } do |div|
|
50
|
+
first = node.first_element_child
|
51
|
+
div.p **{ class: "Note" } do |p|
|
52
|
+
p << "#{get_anchors()[node["id"]][:label]}: "
|
53
|
+
para_then_remainder(first, node, p)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def termref_parse(node, out)
|
59
|
+
out.p do |p|
|
60
|
+
p << "[TERMREF]"
|
61
|
+
node.children.each { |n| parse(n, p) }
|
62
|
+
p << "[/TERMREF]"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def termdef_parse(node, out)
|
67
|
+
out.p **{ class: "TermNum", id: node["id"] } do |p|
|
68
|
+
p << get_anchors()[node["id"]][:label]
|
69
|
+
end
|
70
|
+
set_termdomain("")
|
71
|
+
node.children.each { |n| parse(n, out) }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/isodoc/utils.rb
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
#require "uuidtools"
|
2
|
+
|
3
|
+
module IsoDoc
|
4
|
+
class Convert
|
5
|
+
def ns(xpath)
|
6
|
+
xpath.gsub(%r{/([a-zA-z])}, "/xmlns:\\1").
|
7
|
+
gsub(%r{::([a-zA-z])}, "::xmlns:\\1").
|
8
|
+
gsub(%r{\[([a-zA-z]+ ?=)}, "[xmlns:\\1").
|
9
|
+
gsub(%r{\[([a-zA-z]+\])}, "[xmlns:\\1")
|
10
|
+
end
|
11
|
+
|
12
|
+
def insert_tab(out, n)
|
13
|
+
out.span **attr_code(style: "mso-tab-count:#{n}") do |span|
|
14
|
+
[1..n].each { |i| span << "  " }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
STAGE_ABBRS = {
|
19
|
+
"00": "PWI",
|
20
|
+
"10": "NWIP",
|
21
|
+
"20": "WD",
|
22
|
+
"30": "CD",
|
23
|
+
"40": "DIS",
|
24
|
+
"50": "FDIS",
|
25
|
+
"60": "IS",
|
26
|
+
"90": "(Review)",
|
27
|
+
"95": "(Withdrawal)",
|
28
|
+
}.freeze
|
29
|
+
|
30
|
+
def stage_abbreviation(stage)
|
31
|
+
STAGE_ABBRS[stage.to_sym] || "??"
|
32
|
+
end
|
33
|
+
|
34
|
+
NOKOHEAD = <<~HERE
|
35
|
+
<!DOCTYPE html SYSTEM
|
36
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
37
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
38
|
+
<head> <title></title> <meta charset="UTF-8" /> </head>
|
39
|
+
<body> </body> </html>
|
40
|
+
HERE
|
41
|
+
|
42
|
+
|
43
|
+
# block for processing XML document fragments as XHTML,
|
44
|
+
# to allow for HTMLentities
|
45
|
+
def noko(&block)
|
46
|
+
doc = ::Nokogiri::XML.parse(NOKOHEAD)
|
47
|
+
fragment = doc.fragment("")
|
48
|
+
::Nokogiri::XML::Builder.with fragment, &block
|
49
|
+
fragment.to_xml(encoding: "US-ASCII").lines.map do |l|
|
50
|
+
l.gsub(/\s*\n/, "")
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def attr_code(attributes)
|
55
|
+
attributes = attributes.reject { |_, val| val.nil? }.map
|
56
|
+
attributes.map do |k, v|
|
57
|
+
[k, (v.is_a? String) ? HTMLEntities.new.decode(v) : v]
|
58
|
+
end.to_h
|
59
|
+
end
|
60
|
+
|
61
|
+
NOKOHEAD = <<~HERE
|
62
|
+
<!DOCTYPE html SYSTEM
|
63
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
64
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
65
|
+
<head> <title></title> <meta charset="UTF-8" /> </head>
|
66
|
+
<body> </body> </html>
|
67
|
+
HERE
|
68
|
+
|
69
|
+
def to_xhtml(xml)
|
70
|
+
xml.gsub!(/<\?xml[^>]*>/, "")
|
71
|
+
unless /<!DOCTYPE /.match? xml
|
72
|
+
xml = '<!DOCTYPE html SYSTEM
|
73
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
|
74
|
+
end
|
75
|
+
Nokogiri::XML.parse(xml)
|
76
|
+
end
|
77
|
+
|
78
|
+
def to_xhtml_fragment(xml)
|
79
|
+
doc = ::Nokogiri::XML.parse(NOKOHEAD)
|
80
|
+
fragment = doc.fragment(xml)
|
81
|
+
fragment
|
82
|
+
end
|
83
|
+
|
84
|
+
def from_xhtml(xml)
|
85
|
+
xml.to_xml.sub(%r{ xmlns="http://www.w3.org/1999/xhtml"}, "")
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,204 @@
|
|
1
|
+
#require "isodoc/utils"
|
2
|
+
|
3
|
+
module IsoDoc
|
4
|
+
class Convert
|
5
|
+
#include ::IsoDoc::Utils
|
6
|
+
|
7
|
+
@anchors = {}
|
8
|
+
|
9
|
+
def get_anchors
|
10
|
+
@anchors
|
11
|
+
end
|
12
|
+
|
13
|
+
def back_anchor_names(docxml)
|
14
|
+
docxml.xpath(ns("//annex")).each_with_index do |c, i|
|
15
|
+
annex_names(c, (65 + i).chr.to_s)
|
16
|
+
end
|
17
|
+
docxml.xpath(ns("//bibitem")).each do |ref|
|
18
|
+
reference_names(ref)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def initial_anchor_names(d)
|
23
|
+
introduction_names(d.at(ns("//content[title = 'Introduction']")))
|
24
|
+
section_names(d.at(ns("//clause[title = 'Scope']")), "1", 1)
|
25
|
+
section_names(d.at(ns(
|
26
|
+
"//references[title = 'Normative References']")), "2", 1)
|
27
|
+
section_names(d.at(ns("//terms")), "3", 1)
|
28
|
+
middle_section_asset_names(d)
|
29
|
+
end
|
30
|
+
|
31
|
+
def middle_section_asset_names(d)
|
32
|
+
middle_sections = "//clause[title = 'Scope'] | "\
|
33
|
+
"//references[title = 'Normative References'] | //terms | "\
|
34
|
+
"//symbols-abbrevs | //clause[parent::sections]"
|
35
|
+
sequential_asset_names(d.xpath(ns(middle_sections)))
|
36
|
+
end
|
37
|
+
|
38
|
+
def clause_names(docxml,sect_num)
|
39
|
+
q = "//clause[parent::sections][not(xmlns:title = 'Scope')]"
|
40
|
+
docxml.xpath(ns(q)).each_with_index do |c, i|
|
41
|
+
section_names(c, (i + sect_num).to_s, 1)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def termnote_anchor_names(docxml)
|
46
|
+
docxml.xpath(ns("//term[termnote]")).each do |t|
|
47
|
+
t.xpath(ns("./termnote")).each_with_index do |n, i|
|
48
|
+
@anchors[n["id"]] = { label: "Note #{i + 1} to entry",
|
49
|
+
xref: "#{@anchors[t["id"]][:xref]},"\
|
50
|
+
"Note #{i + 1}" }
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def table_note_anchor_names(docxml)
|
56
|
+
docxml.xpath(ns("//table[note]")).each do |t|
|
57
|
+
t.xpath(ns("./note")).each_with_index do |n, i|
|
58
|
+
@anchors[n["id"]] = { label: "NOTE #{i + 1}",
|
59
|
+
xref: "#{@anchors[t["id"]][:xref]},"\
|
60
|
+
"Note #{i + 1}" }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def middle_anchor_names(docxml)
|
66
|
+
symbols_abbrevs = docxml.at(ns("//symbols-abbrevs"))
|
67
|
+
sect_num = 4
|
68
|
+
if symbols_abbrevs
|
69
|
+
section_names(symbols_abbrevs, sect_num.to_s, 1)
|
70
|
+
sect_num += 1
|
71
|
+
end
|
72
|
+
clause_names(docxml, sect_num)
|
73
|
+
termnote_anchor_names(docxml)
|
74
|
+
end
|
75
|
+
|
76
|
+
# extract names for all anchors, xref and label
|
77
|
+
def anchor_names(docxml)
|
78
|
+
initial_anchor_names(docxml)
|
79
|
+
middle_anchor_names(docxml)
|
80
|
+
back_anchor_names(docxml)
|
81
|
+
table_note_anchor_names(docxml)
|
82
|
+
end
|
83
|
+
|
84
|
+
def sequential_figure_names(clause)
|
85
|
+
i = j = 0
|
86
|
+
clause.xpath(ns(".//figure")).each do |t|
|
87
|
+
label = "Figure #{i}" + ( j.zero? ? "" : "-#{j}" )
|
88
|
+
if t.parent.name == "figure"
|
89
|
+
j += 1
|
90
|
+
else
|
91
|
+
j = 0
|
92
|
+
i += 1
|
93
|
+
end
|
94
|
+
label = "Figure #{i}" + ( j.zero? ? "" : "-#{j}" )
|
95
|
+
@anchors[t["id"]] = { label: label, xref: label }
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def sequential_asset_names(clause)
|
100
|
+
clause.xpath(ns(".//table")).each_with_index do |t, i|
|
101
|
+
@anchors[t["id"]] = { label: "Table #{i + 1}",
|
102
|
+
xref: "Table #{i + 1}" }
|
103
|
+
end
|
104
|
+
sequential_figure_names(clause)
|
105
|
+
clause.xpath(ns(".//formula")).each_with_index do |t, i|
|
106
|
+
@anchors[t["id"]] = { label: (i + 1).to_s,
|
107
|
+
xref: "Formula #{i + 1}" }
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def hierarchical_figure_names(clause, num)
|
112
|
+
i = j = 0
|
113
|
+
clause.xpath(ns(".//figure")).each do |t|
|
114
|
+
if t.parent.name == "figure"
|
115
|
+
j += 1
|
116
|
+
else
|
117
|
+
j = 0
|
118
|
+
i += 1
|
119
|
+
end
|
120
|
+
label = "Figure #{num}.#{i}" + ( j.zero? ? "" : "-#{j}" )
|
121
|
+
@anchors[t["id"]] = { label: label, xref: label }
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def hierarchical_asset_names(clause, num)
|
126
|
+
clause.xpath(ns(".//table")).each_with_index do |t, i|
|
127
|
+
@anchors[t["id"]] = { label: "Table #{num}.#{i + 1}",
|
128
|
+
xref: "Table #{num}.#{i + 1}" }
|
129
|
+
end
|
130
|
+
hierarchical_figure_names(clause, num)
|
131
|
+
clause.xpath(ns(".//formula")).each_with_index do |t, i|
|
132
|
+
@anchors[t["id"]] = { label: "#{num}.#{i + 1}",
|
133
|
+
xref: "Formula #{num}.#{i + 1}" }
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def introduction_names(clause)
|
138
|
+
clause.xpath(ns("./subsection")).each_with_index do |c, i|
|
139
|
+
section_names(c, "0.#{i + 1}")
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def section_names(clause, num, level)
|
144
|
+
@anchors[clause["id"]] = { label: num, xref: "Clause #{num}",
|
145
|
+
level: level }
|
146
|
+
clause.xpath(ns("./subsection | ./term")).each_with_index do |c, i|
|
147
|
+
section_names1(c, "#{num}.#{i + 1}", level + 1)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def section_names1(clause, num, level)
|
152
|
+
@anchors[clause["id"]] =
|
153
|
+
{ label: num, level: level,
|
154
|
+
xref: clause.name == "term" ? num : "Clause #{num}" }
|
155
|
+
clause.xpath(ns("./subsection ")).
|
156
|
+
each_with_index do |c, i|
|
157
|
+
section_names1(c, "#{num}.#{i + 1}", level + 1)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def annex_names(clause, num)
|
162
|
+
obligation = "(Informative)"
|
163
|
+
obligation = "(Normative)" if clause["subtype"] == "normative"
|
164
|
+
label = "<b>Annex #{num}</b><br/>#{obligation}"
|
165
|
+
@anchors[clause["id"]] = { label: label,
|
166
|
+
xref: "Annex #{num}", level: 1 }
|
167
|
+
clause.xpath(ns("./subsection")).each_with_index do |c, i|
|
168
|
+
annex_names1(c, "#{num}.#{i + 1}", 2)
|
169
|
+
end
|
170
|
+
hierarchical_asset_names(clause, num)
|
171
|
+
end
|
172
|
+
|
173
|
+
def annex_names1(clause, num, level)
|
174
|
+
@anchors[clause["id"]] = { label: num,
|
175
|
+
xref: num,
|
176
|
+
level: level }
|
177
|
+
clause.xpath(ns(".//subsection")).each_with_index do |c, i|
|
178
|
+
annex_names1(c, "#{num}.#{i + 1}", level + 1)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def format_ref(ref, isopub)
|
183
|
+
return "ISO #{ref}" if isopub
|
184
|
+
return "[#{ref}]" if /^\d+$/.match?(ref) && !/^\[.*\]$/.match?(ref)
|
185
|
+
ref
|
186
|
+
end
|
187
|
+
|
188
|
+
def reference_names(ref)
|
189
|
+
isopub = ref.at(ns("./publisher/affiliation[name = 'ISO']"))
|
190
|
+
docid = ref.at(ns("./docidentifier"))
|
191
|
+
return ref_names(ref) unless docid
|
192
|
+
date = ref.at(ns("./publisherdate"))
|
193
|
+
reference = format_ref(docid.text, isopub)
|
194
|
+
reference += ": #{date.text}" if date && isopub
|
195
|
+
@anchors[ref["id"]] = { xref: reference }
|
196
|
+
end
|
197
|
+
|
198
|
+
def ref_names(ref)
|
199
|
+
linkend = ref.text
|
200
|
+
linkend.gsub!(/[\[\]]/, "") unless /^\[\d+\]$/.match? linkend
|
201
|
+
@anchors[ref["id"]] = { xref: linkend }
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|