isodoc 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.hound.yml +3 -0
- data/.oss-guides.rubocop.yml +1077 -0
- data/.rubocop.ribose.yml +65 -0
- data/.rubocop.tb.yml +640 -0
- data/.rubocop.yml +15 -0
- data/Gemfile +6 -0
- data/README.adoc +32 -0
- data/isodoc.gemspec +51 -0
- data/lib/isodoc.rb +74 -0
- data/lib/isodoc/blocks.rb +184 -0
- data/lib/isodoc/cleanup.rb +155 -0
- data/lib/isodoc/html.rb +44 -0
- data/lib/isodoc/inline.rb +211 -0
- data/lib/isodoc/iso2wordhtml.rb +143 -0
- data/lib/isodoc/lists.rb +54 -0
- data/lib/isodoc/metadata.rb +99 -0
- data/lib/isodoc/postprocessing.rb +156 -0
- data/lib/isodoc/references.rb +129 -0
- data/lib/isodoc/section.rb +136 -0
- data/lib/isodoc/table.rb +99 -0
- data/lib/isodoc/terms.rb +74 -0
- data/lib/isodoc/utils.rb +88 -0
- data/lib/isodoc/version.rb +3 -0
- data/lib/isodoc/xref_gen.rb +204 -0
- metadata +338 -0
data/lib/isodoc/table.rb
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
module IsoDoc
|
2
|
+
class Convert
|
3
|
+
def table_title_parse(node, out)
|
4
|
+
name = node.at(ns("./name"))
|
5
|
+
if name
|
6
|
+
out.p **{ class: "TableTitle", align: "center" } do |p|
|
7
|
+
p.b do |b|
|
8
|
+
b << "#{get_anchors()[node['id']][:label]} — "
|
9
|
+
b << name.text
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def thead_parse(node, t)
|
16
|
+
thead = node.at(ns("./thead"))
|
17
|
+
if thead
|
18
|
+
t.thead do |h|
|
19
|
+
thead.element_children.each_with_index do |n, i|
|
20
|
+
tr_parse(n, h, i, thead.element_children.size, true)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def tbody_parse(node, t)
|
27
|
+
tbody = node.at(ns("./tbody"))
|
28
|
+
t.tbody do |h|
|
29
|
+
tbody.element_children.each_with_index do |n, i|
|
30
|
+
tr_parse(n, h, i, tbody.element_children.size, false)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def tfoot_parse(node, t)
|
36
|
+
tfoot = node.at(ns("./tfoot"))
|
37
|
+
if tfoot
|
38
|
+
t.tfoot do |h|
|
39
|
+
tfoot.element_children.each_with_index do |n, i|
|
40
|
+
tr_parse(n, h, i, tfoot.element_children.size, false)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def make_table_attr(node)
|
47
|
+
{
|
48
|
+
id: node["id"],
|
49
|
+
class: "MsoISOTable",
|
50
|
+
border: 1,
|
51
|
+
cellspacing: 0,
|
52
|
+
cellpadding: 0,
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
def table_parse(node, out)
|
57
|
+
@in_table = true
|
58
|
+
table_title_parse(node, out)
|
59
|
+
out.table **make_table_attr(node) do |t|
|
60
|
+
thead_parse(node, t)
|
61
|
+
tbody_parse(node, t)
|
62
|
+
tfoot_parse(node, t)
|
63
|
+
dl = node.at(ns("./dl")) and parse(dl, out)
|
64
|
+
node.xpath(ns("./note")).each { |n| parse(n, out) }
|
65
|
+
end
|
66
|
+
@in_table = false
|
67
|
+
# out.p { |p| p << " " }
|
68
|
+
end
|
69
|
+
|
70
|
+
SW = "solid windowtext"
|
71
|
+
|
72
|
+
#border-left:#{col.zero? ? "#{SW} 1.5pt;" : "none;"}
|
73
|
+
#border-right:#{SW} #{col == totalcols && !header ? "1.5" : "1.0"}pt;
|
74
|
+
def make_tr_attr(td, row, totalrows, col, totalcols, header)
|
75
|
+
style = td.name == "th" ? "font-weight:bold;" : ""
|
76
|
+
rowmax = td["rowspan"] ? row + td["rowspan"].to_i - 1 : row
|
77
|
+
style += <<~STYLE
|
78
|
+
border-top:#{row.zero? ? "#{SW} 1.5pt;" : "none;"}
|
79
|
+
mso-border-top-alt:#{row.zero? ? "#{SW} 1.5pt;" : "none;"}
|
80
|
+
border-bottom:#{SW} #{rowmax == totalrows ? "1.5" : "1.0"}pt;
|
81
|
+
mso-border-bottom-alt:#{SW} #{rowmax == totalrows ? "1.5" : "1.0"}pt;
|
82
|
+
STYLE
|
83
|
+
{ rowspan: td["rowspan"], colspan: td["colspan"],
|
84
|
+
align: td["align"], style: style.gsub(/\n/, "") }
|
85
|
+
end
|
86
|
+
|
87
|
+
def tr_parse(node, out, ord, totalrows, header)
|
88
|
+
out.tr do |r|
|
89
|
+
node.elements.each_with_index do |td, i|
|
90
|
+
attrs = make_tr_attr(td, ord, totalrows - 1,
|
91
|
+
i, node.elements.size - 1, header)
|
92
|
+
r.send td.name, **attr_code(attrs) do |entry|
|
93
|
+
td.children.each { |n| parse(n, entry) }
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
data/lib/isodoc/terms.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
module IsoDoc
|
2
|
+
class Convert
|
3
|
+
|
4
|
+
def definition_parse(node, out)
|
5
|
+
node.children.each { |n| parse(n, out) }
|
6
|
+
end
|
7
|
+
|
8
|
+
def modification_parse(node, out)
|
9
|
+
out << "[MODIFICATION]"
|
10
|
+
para = node.at(ns("./p"))
|
11
|
+
para.children.each { |n| parse(n, out) }
|
12
|
+
end
|
13
|
+
|
14
|
+
def deprecated_term_parse(node, out)
|
15
|
+
out.p **{ class: "AltTerms" } do |p|
|
16
|
+
p << "DEPRECATED: #{node.text}"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def admitted_term_parse(node, out)
|
21
|
+
out.p **{ class: "AltTerms" } { |p| p << node.text }
|
22
|
+
end
|
23
|
+
|
24
|
+
def term_parse(node, out)
|
25
|
+
out.p **{ class: "Terms" } { |p| p << node.text }
|
26
|
+
end
|
27
|
+
|
28
|
+
def para_then_remainder(first, node, p)
|
29
|
+
if first.name == "p"
|
30
|
+
first.children.each { |n| parse(n, p) }
|
31
|
+
node.elements.drop(1).each { |n| parse(n, div) }
|
32
|
+
else
|
33
|
+
node.elements.each { |n| parse(n, div) }
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def termexample_parse(node, out)
|
38
|
+
out.div **{ class: "Note" } do |div|
|
39
|
+
first = node.first_element_child
|
40
|
+
div.p **{ class: "Note" } do |p|
|
41
|
+
p << "EXAMPLE:"
|
42
|
+
insert_tab(p, 1)
|
43
|
+
para_then_remainder(first, node, p)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def termnote_parse(node, out)
|
49
|
+
out.div **{ class: "Note" } do |div|
|
50
|
+
first = node.first_element_child
|
51
|
+
div.p **{ class: "Note" } do |p|
|
52
|
+
p << "#{get_anchors()[node["id"]][:label]}: "
|
53
|
+
para_then_remainder(first, node, p)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def termref_parse(node, out)
|
59
|
+
out.p do |p|
|
60
|
+
p << "[TERMREF]"
|
61
|
+
node.children.each { |n| parse(n, p) }
|
62
|
+
p << "[/TERMREF]"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def termdef_parse(node, out)
|
67
|
+
out.p **{ class: "TermNum", id: node["id"] } do |p|
|
68
|
+
p << get_anchors()[node["id"]][:label]
|
69
|
+
end
|
70
|
+
set_termdomain("")
|
71
|
+
node.children.each { |n| parse(n, out) }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/isodoc/utils.rb
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
#require "uuidtools"
|
2
|
+
|
3
|
+
module IsoDoc
|
4
|
+
class Convert
|
5
|
+
def ns(xpath)
|
6
|
+
xpath.gsub(%r{/([a-zA-z])}, "/xmlns:\\1").
|
7
|
+
gsub(%r{::([a-zA-z])}, "::xmlns:\\1").
|
8
|
+
gsub(%r{\[([a-zA-z]+ ?=)}, "[xmlns:\\1").
|
9
|
+
gsub(%r{\[([a-zA-z]+\])}, "[xmlns:\\1")
|
10
|
+
end
|
11
|
+
|
12
|
+
def insert_tab(out, n)
|
13
|
+
out.span **attr_code(style: "mso-tab-count:#{n}") do |span|
|
14
|
+
[1..n].each { |i| span << "  " }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
STAGE_ABBRS = {
|
19
|
+
"00": "PWI",
|
20
|
+
"10": "NWIP",
|
21
|
+
"20": "WD",
|
22
|
+
"30": "CD",
|
23
|
+
"40": "DIS",
|
24
|
+
"50": "FDIS",
|
25
|
+
"60": "IS",
|
26
|
+
"90": "(Review)",
|
27
|
+
"95": "(Withdrawal)",
|
28
|
+
}.freeze
|
29
|
+
|
30
|
+
def stage_abbreviation(stage)
|
31
|
+
STAGE_ABBRS[stage.to_sym] || "??"
|
32
|
+
end
|
33
|
+
|
34
|
+
NOKOHEAD = <<~HERE
|
35
|
+
<!DOCTYPE html SYSTEM
|
36
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
37
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
38
|
+
<head> <title></title> <meta charset="UTF-8" /> </head>
|
39
|
+
<body> </body> </html>
|
40
|
+
HERE
|
41
|
+
|
42
|
+
|
43
|
+
# block for processing XML document fragments as XHTML,
|
44
|
+
# to allow for HTMLentities
|
45
|
+
def noko(&block)
|
46
|
+
doc = ::Nokogiri::XML.parse(NOKOHEAD)
|
47
|
+
fragment = doc.fragment("")
|
48
|
+
::Nokogiri::XML::Builder.with fragment, &block
|
49
|
+
fragment.to_xml(encoding: "US-ASCII").lines.map do |l|
|
50
|
+
l.gsub(/\s*\n/, "")
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def attr_code(attributes)
|
55
|
+
attributes = attributes.reject { |_, val| val.nil? }.map
|
56
|
+
attributes.map do |k, v|
|
57
|
+
[k, (v.is_a? String) ? HTMLEntities.new.decode(v) : v]
|
58
|
+
end.to_h
|
59
|
+
end
|
60
|
+
|
61
|
+
NOKOHEAD = <<~HERE
|
62
|
+
<!DOCTYPE html SYSTEM
|
63
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
64
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
65
|
+
<head> <title></title> <meta charset="UTF-8" /> </head>
|
66
|
+
<body> </body> </html>
|
67
|
+
HERE
|
68
|
+
|
69
|
+
def to_xhtml(xml)
|
70
|
+
xml.gsub!(/<\?xml[^>]*>/, "")
|
71
|
+
unless /<!DOCTYPE /.match? xml
|
72
|
+
xml = '<!DOCTYPE html SYSTEM
|
73
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
|
74
|
+
end
|
75
|
+
Nokogiri::XML.parse(xml)
|
76
|
+
end
|
77
|
+
|
78
|
+
def to_xhtml_fragment(xml)
|
79
|
+
doc = ::Nokogiri::XML.parse(NOKOHEAD)
|
80
|
+
fragment = doc.fragment(xml)
|
81
|
+
fragment
|
82
|
+
end
|
83
|
+
|
84
|
+
def from_xhtml(xml)
|
85
|
+
xml.to_xml.sub(%r{ xmlns="http://www.w3.org/1999/xhtml"}, "")
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,204 @@
|
|
1
|
+
#require "isodoc/utils"
|
2
|
+
|
3
|
+
module IsoDoc
|
4
|
+
class Convert
|
5
|
+
#include ::IsoDoc::Utils
|
6
|
+
|
7
|
+
@anchors = {}
|
8
|
+
|
9
|
+
def get_anchors
|
10
|
+
@anchors
|
11
|
+
end
|
12
|
+
|
13
|
+
def back_anchor_names(docxml)
|
14
|
+
docxml.xpath(ns("//annex")).each_with_index do |c, i|
|
15
|
+
annex_names(c, (65 + i).chr.to_s)
|
16
|
+
end
|
17
|
+
docxml.xpath(ns("//bibitem")).each do |ref|
|
18
|
+
reference_names(ref)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def initial_anchor_names(d)
|
23
|
+
introduction_names(d.at(ns("//content[title = 'Introduction']")))
|
24
|
+
section_names(d.at(ns("//clause[title = 'Scope']")), "1", 1)
|
25
|
+
section_names(d.at(ns(
|
26
|
+
"//references[title = 'Normative References']")), "2", 1)
|
27
|
+
section_names(d.at(ns("//terms")), "3", 1)
|
28
|
+
middle_section_asset_names(d)
|
29
|
+
end
|
30
|
+
|
31
|
+
def middle_section_asset_names(d)
|
32
|
+
middle_sections = "//clause[title = 'Scope'] | "\
|
33
|
+
"//references[title = 'Normative References'] | //terms | "\
|
34
|
+
"//symbols-abbrevs | //clause[parent::sections]"
|
35
|
+
sequential_asset_names(d.xpath(ns(middle_sections)))
|
36
|
+
end
|
37
|
+
|
38
|
+
def clause_names(docxml,sect_num)
|
39
|
+
q = "//clause[parent::sections][not(xmlns:title = 'Scope')]"
|
40
|
+
docxml.xpath(ns(q)).each_with_index do |c, i|
|
41
|
+
section_names(c, (i + sect_num).to_s, 1)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def termnote_anchor_names(docxml)
|
46
|
+
docxml.xpath(ns("//term[termnote]")).each do |t|
|
47
|
+
t.xpath(ns("./termnote")).each_with_index do |n, i|
|
48
|
+
@anchors[n["id"]] = { label: "Note #{i + 1} to entry",
|
49
|
+
xref: "#{@anchors[t["id"]][:xref]},"\
|
50
|
+
"Note #{i + 1}" }
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def table_note_anchor_names(docxml)
|
56
|
+
docxml.xpath(ns("//table[note]")).each do |t|
|
57
|
+
t.xpath(ns("./note")).each_with_index do |n, i|
|
58
|
+
@anchors[n["id"]] = { label: "NOTE #{i + 1}",
|
59
|
+
xref: "#{@anchors[t["id"]][:xref]},"\
|
60
|
+
"Note #{i + 1}" }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def middle_anchor_names(docxml)
|
66
|
+
symbols_abbrevs = docxml.at(ns("//symbols-abbrevs"))
|
67
|
+
sect_num = 4
|
68
|
+
if symbols_abbrevs
|
69
|
+
section_names(symbols_abbrevs, sect_num.to_s, 1)
|
70
|
+
sect_num += 1
|
71
|
+
end
|
72
|
+
clause_names(docxml, sect_num)
|
73
|
+
termnote_anchor_names(docxml)
|
74
|
+
end
|
75
|
+
|
76
|
+
# extract names for all anchors, xref and label
|
77
|
+
def anchor_names(docxml)
|
78
|
+
initial_anchor_names(docxml)
|
79
|
+
middle_anchor_names(docxml)
|
80
|
+
back_anchor_names(docxml)
|
81
|
+
table_note_anchor_names(docxml)
|
82
|
+
end
|
83
|
+
|
84
|
+
def sequential_figure_names(clause)
|
85
|
+
i = j = 0
|
86
|
+
clause.xpath(ns(".//figure")).each do |t|
|
87
|
+
label = "Figure #{i}" + ( j.zero? ? "" : "-#{j}" )
|
88
|
+
if t.parent.name == "figure"
|
89
|
+
j += 1
|
90
|
+
else
|
91
|
+
j = 0
|
92
|
+
i += 1
|
93
|
+
end
|
94
|
+
label = "Figure #{i}" + ( j.zero? ? "" : "-#{j}" )
|
95
|
+
@anchors[t["id"]] = { label: label, xref: label }
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def sequential_asset_names(clause)
|
100
|
+
clause.xpath(ns(".//table")).each_with_index do |t, i|
|
101
|
+
@anchors[t["id"]] = { label: "Table #{i + 1}",
|
102
|
+
xref: "Table #{i + 1}" }
|
103
|
+
end
|
104
|
+
sequential_figure_names(clause)
|
105
|
+
clause.xpath(ns(".//formula")).each_with_index do |t, i|
|
106
|
+
@anchors[t["id"]] = { label: (i + 1).to_s,
|
107
|
+
xref: "Formula #{i + 1}" }
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def hierarchical_figure_names(clause, num)
|
112
|
+
i = j = 0
|
113
|
+
clause.xpath(ns(".//figure")).each do |t|
|
114
|
+
if t.parent.name == "figure"
|
115
|
+
j += 1
|
116
|
+
else
|
117
|
+
j = 0
|
118
|
+
i += 1
|
119
|
+
end
|
120
|
+
label = "Figure #{num}.#{i}" + ( j.zero? ? "" : "-#{j}" )
|
121
|
+
@anchors[t["id"]] = { label: label, xref: label }
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def hierarchical_asset_names(clause, num)
|
126
|
+
clause.xpath(ns(".//table")).each_with_index do |t, i|
|
127
|
+
@anchors[t["id"]] = { label: "Table #{num}.#{i + 1}",
|
128
|
+
xref: "Table #{num}.#{i + 1}" }
|
129
|
+
end
|
130
|
+
hierarchical_figure_names(clause, num)
|
131
|
+
clause.xpath(ns(".//formula")).each_with_index do |t, i|
|
132
|
+
@anchors[t["id"]] = { label: "#{num}.#{i + 1}",
|
133
|
+
xref: "Formula #{num}.#{i + 1}" }
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def introduction_names(clause)
|
138
|
+
clause.xpath(ns("./subsection")).each_with_index do |c, i|
|
139
|
+
section_names(c, "0.#{i + 1}")
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def section_names(clause, num, level)
|
144
|
+
@anchors[clause["id"]] = { label: num, xref: "Clause #{num}",
|
145
|
+
level: level }
|
146
|
+
clause.xpath(ns("./subsection | ./term")).each_with_index do |c, i|
|
147
|
+
section_names1(c, "#{num}.#{i + 1}", level + 1)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def section_names1(clause, num, level)
|
152
|
+
@anchors[clause["id"]] =
|
153
|
+
{ label: num, level: level,
|
154
|
+
xref: clause.name == "term" ? num : "Clause #{num}" }
|
155
|
+
clause.xpath(ns("./subsection ")).
|
156
|
+
each_with_index do |c, i|
|
157
|
+
section_names1(c, "#{num}.#{i + 1}", level + 1)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def annex_names(clause, num)
|
162
|
+
obligation = "(Informative)"
|
163
|
+
obligation = "(Normative)" if clause["subtype"] == "normative"
|
164
|
+
label = "<b>Annex #{num}</b><br/>#{obligation}"
|
165
|
+
@anchors[clause["id"]] = { label: label,
|
166
|
+
xref: "Annex #{num}", level: 1 }
|
167
|
+
clause.xpath(ns("./subsection")).each_with_index do |c, i|
|
168
|
+
annex_names1(c, "#{num}.#{i + 1}", 2)
|
169
|
+
end
|
170
|
+
hierarchical_asset_names(clause, num)
|
171
|
+
end
|
172
|
+
|
173
|
+
def annex_names1(clause, num, level)
|
174
|
+
@anchors[clause["id"]] = { label: num,
|
175
|
+
xref: num,
|
176
|
+
level: level }
|
177
|
+
clause.xpath(ns(".//subsection")).each_with_index do |c, i|
|
178
|
+
annex_names1(c, "#{num}.#{i + 1}", level + 1)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def format_ref(ref, isopub)
|
183
|
+
return "ISO #{ref}" if isopub
|
184
|
+
return "[#{ref}]" if /^\d+$/.match?(ref) && !/^\[.*\]$/.match?(ref)
|
185
|
+
ref
|
186
|
+
end
|
187
|
+
|
188
|
+
def reference_names(ref)
|
189
|
+
isopub = ref.at(ns("./publisher/affiliation[name = 'ISO']"))
|
190
|
+
docid = ref.at(ns("./docidentifier"))
|
191
|
+
return ref_names(ref) unless docid
|
192
|
+
date = ref.at(ns("./publisherdate"))
|
193
|
+
reference = format_ref(docid.text, isopub)
|
194
|
+
reference += ": #{date.text}" if date && isopub
|
195
|
+
@anchors[ref["id"]] = { xref: reference }
|
196
|
+
end
|
197
|
+
|
198
|
+
def ref_names(ref)
|
199
|
+
linkend = ref.text
|
200
|
+
linkend.gsub!(/[\[\]]/, "") unless /^\[\d+\]$/.match? linkend
|
201
|
+
@anchors[ref["id"]] = { xref: linkend }
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|