isodoc 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,99 @@
1
+ module IsoDoc
2
+ class Convert
3
+ def table_title_parse(node, out)
4
+ name = node.at(ns("./name"))
5
+ if name
6
+ out.p **{ class: "TableTitle", align: "center" } do |p|
7
+ p.b do |b|
8
+ b << "#{get_anchors()[node['id']][:label]}&nbsp;&mdash; "
9
+ b << name.text
10
+ end
11
+ end
12
+ end
13
+ end
14
+
15
+ def thead_parse(node, t)
16
+ thead = node.at(ns("./thead"))
17
+ if thead
18
+ t.thead do |h|
19
+ thead.element_children.each_with_index do |n, i|
20
+ tr_parse(n, h, i, thead.element_children.size, true)
21
+ end
22
+ end
23
+ end
24
+ end
25
+
26
+ def tbody_parse(node, t)
27
+ tbody = node.at(ns("./tbody"))
28
+ t.tbody do |h|
29
+ tbody.element_children.each_with_index do |n, i|
30
+ tr_parse(n, h, i, tbody.element_children.size, false)
31
+ end
32
+ end
33
+ end
34
+
35
+ def tfoot_parse(node, t)
36
+ tfoot = node.at(ns("./tfoot"))
37
+ if tfoot
38
+ t.tfoot do |h|
39
+ tfoot.element_children.each_with_index do |n, i|
40
+ tr_parse(n, h, i, tfoot.element_children.size, false)
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ def make_table_attr(node)
47
+ {
48
+ id: node["id"],
49
+ class: "MsoISOTable",
50
+ border: 1,
51
+ cellspacing: 0,
52
+ cellpadding: 0,
53
+ }
54
+ end
55
+
56
+ def table_parse(node, out)
57
+ @in_table = true
58
+ table_title_parse(node, out)
59
+ out.table **make_table_attr(node) do |t|
60
+ thead_parse(node, t)
61
+ tbody_parse(node, t)
62
+ tfoot_parse(node, t)
63
+ dl = node.at(ns("./dl")) and parse(dl, out)
64
+ node.xpath(ns("./note")).each { |n| parse(n, out) }
65
+ end
66
+ @in_table = false
67
+ # out.p { |p| p << "&nbsp;" }
68
+ end
69
+
70
+ SW = "solid windowtext"
71
+
72
+ #border-left:#{col.zero? ? "#{SW} 1.5pt;" : "none;"}
73
+ #border-right:#{SW} #{col == totalcols && !header ? "1.5" : "1.0"}pt;
74
+ def make_tr_attr(td, row, totalrows, col, totalcols, header)
75
+ style = td.name == "th" ? "font-weight:bold;" : ""
76
+ rowmax = td["rowspan"] ? row + td["rowspan"].to_i - 1 : row
77
+ style += <<~STYLE
78
+ border-top:#{row.zero? ? "#{SW} 1.5pt;" : "none;"}
79
+ mso-border-top-alt:#{row.zero? ? "#{SW} 1.5pt;" : "none;"}
80
+ border-bottom:#{SW} #{rowmax == totalrows ? "1.5" : "1.0"}pt;
81
+ mso-border-bottom-alt:#{SW} #{rowmax == totalrows ? "1.5" : "1.0"}pt;
82
+ STYLE
83
+ { rowspan: td["rowspan"], colspan: td["colspan"],
84
+ align: td["align"], style: style.gsub(/\n/, "") }
85
+ end
86
+
87
+ def tr_parse(node, out, ord, totalrows, header)
88
+ out.tr do |r|
89
+ node.elements.each_with_index do |td, i|
90
+ attrs = make_tr_attr(td, ord, totalrows - 1,
91
+ i, node.elements.size - 1, header)
92
+ r.send td.name, **attr_code(attrs) do |entry|
93
+ td.children.each { |n| parse(n, entry) }
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,74 @@
1
+ module IsoDoc
2
+ class Convert
3
+
4
+ def definition_parse(node, out)
5
+ node.children.each { |n| parse(n, out) }
6
+ end
7
+
8
+ def modification_parse(node, out)
9
+ out << "[MODIFICATION]"
10
+ para = node.at(ns("./p"))
11
+ para.children.each { |n| parse(n, out) }
12
+ end
13
+
14
+ def deprecated_term_parse(node, out)
15
+ out.p **{ class: "AltTerms" } do |p|
16
+ p << "DEPRECATED: #{node.text}"
17
+ end
18
+ end
19
+
20
+ def admitted_term_parse(node, out)
21
+ out.p **{ class: "AltTerms" } { |p| p << node.text }
22
+ end
23
+
24
+ def term_parse(node, out)
25
+ out.p **{ class: "Terms" } { |p| p << node.text }
26
+ end
27
+
28
+ def para_then_remainder(first, node, p)
29
+ if first.name == "p"
30
+ first.children.each { |n| parse(n, p) }
31
+ node.elements.drop(1).each { |n| parse(n, div) }
32
+ else
33
+ node.elements.each { |n| parse(n, div) }
34
+ end
35
+ end
36
+
37
+ def termexample_parse(node, out)
38
+ out.div **{ class: "Note" } do |div|
39
+ first = node.first_element_child
40
+ div.p **{ class: "Note" } do |p|
41
+ p << "EXAMPLE:"
42
+ insert_tab(p, 1)
43
+ para_then_remainder(first, node, p)
44
+ end
45
+ end
46
+ end
47
+
48
+ def termnote_parse(node, out)
49
+ out.div **{ class: "Note" } do |div|
50
+ first = node.first_element_child
51
+ div.p **{ class: "Note" } do |p|
52
+ p << "#{get_anchors()[node["id"]][:label]}: "
53
+ para_then_remainder(first, node, p)
54
+ end
55
+ end
56
+ end
57
+
58
+ def termref_parse(node, out)
59
+ out.p do |p|
60
+ p << "[TERMREF]"
61
+ node.children.each { |n| parse(n, p) }
62
+ p << "[/TERMREF]"
63
+ end
64
+ end
65
+
66
+ def termdef_parse(node, out)
67
+ out.p **{ class: "TermNum", id: node["id"] } do |p|
68
+ p << get_anchors()[node["id"]][:label]
69
+ end
70
+ set_termdomain("")
71
+ node.children.each { |n| parse(n, out) }
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,88 @@
1
+ #require "uuidtools"
2
+
3
+ module IsoDoc
4
+ class Convert
5
+ def ns(xpath)
6
+ xpath.gsub(%r{/([a-zA-z])}, "/xmlns:\\1").
7
+ gsub(%r{::([a-zA-z])}, "::xmlns:\\1").
8
+ gsub(%r{\[([a-zA-z]+ ?=)}, "[xmlns:\\1").
9
+ gsub(%r{\[([a-zA-z]+\])}, "[xmlns:\\1")
10
+ end
11
+
12
+ def insert_tab(out, n)
13
+ out.span **attr_code(style: "mso-tab-count:#{n}") do |span|
14
+ [1..n].each { |i| span << "&#xA0; " }
15
+ end
16
+ end
17
+
18
+ STAGE_ABBRS = {
19
+ "00": "PWI",
20
+ "10": "NWIP",
21
+ "20": "WD",
22
+ "30": "CD",
23
+ "40": "DIS",
24
+ "50": "FDIS",
25
+ "60": "IS",
26
+ "90": "(Review)",
27
+ "95": "(Withdrawal)",
28
+ }.freeze
29
+
30
+ def stage_abbreviation(stage)
31
+ STAGE_ABBRS[stage.to_sym] || "??"
32
+ end
33
+
34
+ NOKOHEAD = <<~HERE
35
+ <!DOCTYPE html SYSTEM
36
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
37
+ <html xmlns="http://www.w3.org/1999/xhtml">
38
+ <head> <title></title> <meta charset="UTF-8" /> </head>
39
+ <body> </body> </html>
40
+ HERE
41
+
42
+
43
+ # block for processing XML document fragments as XHTML,
44
+ # to allow for HTMLentities
45
+ def noko(&block)
46
+ doc = ::Nokogiri::XML.parse(NOKOHEAD)
47
+ fragment = doc.fragment("")
48
+ ::Nokogiri::XML::Builder.with fragment, &block
49
+ fragment.to_xml(encoding: "US-ASCII").lines.map do |l|
50
+ l.gsub(/\s*\n/, "")
51
+ end
52
+ end
53
+
54
+ def attr_code(attributes)
55
+ attributes = attributes.reject { |_, val| val.nil? }.map
56
+ attributes.map do |k, v|
57
+ [k, (v.is_a? String) ? HTMLEntities.new.decode(v) : v]
58
+ end.to_h
59
+ end
60
+
61
+ NOKOHEAD = <<~HERE
62
+ <!DOCTYPE html SYSTEM
63
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
64
+ <html xmlns="http://www.w3.org/1999/xhtml">
65
+ <head> <title></title> <meta charset="UTF-8" /> </head>
66
+ <body> </body> </html>
67
+ HERE
68
+
69
+ def to_xhtml(xml)
70
+ xml.gsub!(/<\?xml[^>]*>/, "")
71
+ unless /<!DOCTYPE /.match? xml
72
+ xml = '<!DOCTYPE html SYSTEM
73
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
74
+ end
75
+ Nokogiri::XML.parse(xml)
76
+ end
77
+
78
+ def to_xhtml_fragment(xml)
79
+ doc = ::Nokogiri::XML.parse(NOKOHEAD)
80
+ fragment = doc.fragment(xml)
81
+ fragment
82
+ end
83
+
84
+ def from_xhtml(xml)
85
+ xml.to_xml.sub(%r{ xmlns="http://www.w3.org/1999/xhtml"}, "")
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,3 @@
1
+ module IsoDoc
2
+ VERSION = "0.0.1".freeze
3
+ end
@@ -0,0 +1,204 @@
1
+ #require "isodoc/utils"
2
+
3
+ module IsoDoc
4
+ class Convert
5
+ #include ::IsoDoc::Utils
6
+
7
+ @anchors = {}
8
+
9
+ def get_anchors
10
+ @anchors
11
+ end
12
+
13
+ def back_anchor_names(docxml)
14
+ docxml.xpath(ns("//annex")).each_with_index do |c, i|
15
+ annex_names(c, (65 + i).chr.to_s)
16
+ end
17
+ docxml.xpath(ns("//bibitem")).each do |ref|
18
+ reference_names(ref)
19
+ end
20
+ end
21
+
22
+ def initial_anchor_names(d)
23
+ introduction_names(d.at(ns("//content[title = 'Introduction']")))
24
+ section_names(d.at(ns("//clause[title = 'Scope']")), "1", 1)
25
+ section_names(d.at(ns(
26
+ "//references[title = 'Normative References']")), "2", 1)
27
+ section_names(d.at(ns("//terms")), "3", 1)
28
+ middle_section_asset_names(d)
29
+ end
30
+
31
+ def middle_section_asset_names(d)
32
+ middle_sections = "//clause[title = 'Scope'] | "\
33
+ "//references[title = 'Normative References'] | //terms | "\
34
+ "//symbols-abbrevs | //clause[parent::sections]"
35
+ sequential_asset_names(d.xpath(ns(middle_sections)))
36
+ end
37
+
38
+ def clause_names(docxml,sect_num)
39
+ q = "//clause[parent::sections][not(xmlns:title = 'Scope')]"
40
+ docxml.xpath(ns(q)).each_with_index do |c, i|
41
+ section_names(c, (i + sect_num).to_s, 1)
42
+ end
43
+ end
44
+
45
+ def termnote_anchor_names(docxml)
46
+ docxml.xpath(ns("//term[termnote]")).each do |t|
47
+ t.xpath(ns("./termnote")).each_with_index do |n, i|
48
+ @anchors[n["id"]] = { label: "Note #{i + 1} to entry",
49
+ xref: "#{@anchors[t["id"]][:xref]},"\
50
+ "Note #{i + 1}" }
51
+ end
52
+ end
53
+ end
54
+
55
+ def table_note_anchor_names(docxml)
56
+ docxml.xpath(ns("//table[note]")).each do |t|
57
+ t.xpath(ns("./note")).each_with_index do |n, i|
58
+ @anchors[n["id"]] = { label: "NOTE #{i + 1}",
59
+ xref: "#{@anchors[t["id"]][:xref]},"\
60
+ "Note #{i + 1}" }
61
+ end
62
+ end
63
+ end
64
+
65
+ def middle_anchor_names(docxml)
66
+ symbols_abbrevs = docxml.at(ns("//symbols-abbrevs"))
67
+ sect_num = 4
68
+ if symbols_abbrevs
69
+ section_names(symbols_abbrevs, sect_num.to_s, 1)
70
+ sect_num += 1
71
+ end
72
+ clause_names(docxml, sect_num)
73
+ termnote_anchor_names(docxml)
74
+ end
75
+
76
+ # extract names for all anchors, xref and label
77
+ def anchor_names(docxml)
78
+ initial_anchor_names(docxml)
79
+ middle_anchor_names(docxml)
80
+ back_anchor_names(docxml)
81
+ table_note_anchor_names(docxml)
82
+ end
83
+
84
+ def sequential_figure_names(clause)
85
+ i = j = 0
86
+ clause.xpath(ns(".//figure")).each do |t|
87
+ label = "Figure #{i}" + ( j.zero? ? "" : "-#{j}" )
88
+ if t.parent.name == "figure"
89
+ j += 1
90
+ else
91
+ j = 0
92
+ i += 1
93
+ end
94
+ label = "Figure #{i}" + ( j.zero? ? "" : "-#{j}" )
95
+ @anchors[t["id"]] = { label: label, xref: label }
96
+ end
97
+ end
98
+
99
+ def sequential_asset_names(clause)
100
+ clause.xpath(ns(".//table")).each_with_index do |t, i|
101
+ @anchors[t["id"]] = { label: "Table #{i + 1}",
102
+ xref: "Table #{i + 1}" }
103
+ end
104
+ sequential_figure_names(clause)
105
+ clause.xpath(ns(".//formula")).each_with_index do |t, i|
106
+ @anchors[t["id"]] = { label: (i + 1).to_s,
107
+ xref: "Formula #{i + 1}" }
108
+ end
109
+ end
110
+
111
+ def hierarchical_figure_names(clause, num)
112
+ i = j = 0
113
+ clause.xpath(ns(".//figure")).each do |t|
114
+ if t.parent.name == "figure"
115
+ j += 1
116
+ else
117
+ j = 0
118
+ i += 1
119
+ end
120
+ label = "Figure #{num}.#{i}" + ( j.zero? ? "" : "-#{j}" )
121
+ @anchors[t["id"]] = { label: label, xref: label }
122
+ end
123
+ end
124
+
125
+ def hierarchical_asset_names(clause, num)
126
+ clause.xpath(ns(".//table")).each_with_index do |t, i|
127
+ @anchors[t["id"]] = { label: "Table #{num}.#{i + 1}",
128
+ xref: "Table #{num}.#{i + 1}" }
129
+ end
130
+ hierarchical_figure_names(clause, num)
131
+ clause.xpath(ns(".//formula")).each_with_index do |t, i|
132
+ @anchors[t["id"]] = { label: "#{num}.#{i + 1}",
133
+ xref: "Formula #{num}.#{i + 1}" }
134
+ end
135
+ end
136
+
137
+ def introduction_names(clause)
138
+ clause.xpath(ns("./subsection")).each_with_index do |c, i|
139
+ section_names(c, "0.#{i + 1}")
140
+ end
141
+ end
142
+
143
+ def section_names(clause, num, level)
144
+ @anchors[clause["id"]] = { label: num, xref: "Clause #{num}",
145
+ level: level }
146
+ clause.xpath(ns("./subsection | ./term")).each_with_index do |c, i|
147
+ section_names1(c, "#{num}.#{i + 1}", level + 1)
148
+ end
149
+ end
150
+
151
+ def section_names1(clause, num, level)
152
+ @anchors[clause["id"]] =
153
+ { label: num, level: level,
154
+ xref: clause.name == "term" ? num : "Clause #{num}" }
155
+ clause.xpath(ns("./subsection ")).
156
+ each_with_index do |c, i|
157
+ section_names1(c, "#{num}.#{i + 1}", level + 1)
158
+ end
159
+ end
160
+
161
+ def annex_names(clause, num)
162
+ obligation = "(Informative)"
163
+ obligation = "(Normative)" if clause["subtype"] == "normative"
164
+ label = "<b>Annex #{num}</b><br/>#{obligation}"
165
+ @anchors[clause["id"]] = { label: label,
166
+ xref: "Annex #{num}", level: 1 }
167
+ clause.xpath(ns("./subsection")).each_with_index do |c, i|
168
+ annex_names1(c, "#{num}.#{i + 1}", 2)
169
+ end
170
+ hierarchical_asset_names(clause, num)
171
+ end
172
+
173
+ def annex_names1(clause, num, level)
174
+ @anchors[clause["id"]] = { label: num,
175
+ xref: num,
176
+ level: level }
177
+ clause.xpath(ns(".//subsection")).each_with_index do |c, i|
178
+ annex_names1(c, "#{num}.#{i + 1}", level + 1)
179
+ end
180
+ end
181
+
182
+ def format_ref(ref, isopub)
183
+ return "ISO #{ref}" if isopub
184
+ return "[#{ref}]" if /^\d+$/.match?(ref) && !/^\[.*\]$/.match?(ref)
185
+ ref
186
+ end
187
+
188
+ def reference_names(ref)
189
+ isopub = ref.at(ns("./publisher/affiliation[name = 'ISO']"))
190
+ docid = ref.at(ns("./docidentifier"))
191
+ return ref_names(ref) unless docid
192
+ date = ref.at(ns("./publisherdate"))
193
+ reference = format_ref(docid.text, isopub)
194
+ reference += ": #{date.text}" if date && isopub
195
+ @anchors[ref["id"]] = { xref: reference }
196
+ end
197
+
198
+ def ref_names(ref)
199
+ linkend = ref.text
200
+ linkend.gsub!(/[\[\]]/, "") unless /^\[\d+\]$/.match? linkend
201
+ @anchors[ref["id"]] = { xref: linkend }
202
+ end
203
+ end
204
+ end