isodoc 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.hound.yml +3 -0
- data/.oss-guides.rubocop.yml +1077 -0
- data/.rubocop.ribose.yml +65 -0
- data/.rubocop.tb.yml +640 -0
- data/.rubocop.yml +15 -0
- data/Gemfile +6 -0
- data/README.adoc +32 -0
- data/isodoc.gemspec +51 -0
- data/lib/isodoc.rb +74 -0
- data/lib/isodoc/blocks.rb +184 -0
- data/lib/isodoc/cleanup.rb +155 -0
- data/lib/isodoc/html.rb +44 -0
- data/lib/isodoc/inline.rb +211 -0
- data/lib/isodoc/iso2wordhtml.rb +143 -0
- data/lib/isodoc/lists.rb +54 -0
- data/lib/isodoc/metadata.rb +99 -0
- data/lib/isodoc/postprocessing.rb +156 -0
- data/lib/isodoc/references.rb +129 -0
- data/lib/isodoc/section.rb +136 -0
- data/lib/isodoc/table.rb +99 -0
- data/lib/isodoc/terms.rb +74 -0
- data/lib/isodoc/utils.rb +88 -0
- data/lib/isodoc/version.rb +3 -0
- data/lib/isodoc/xref_gen.rb +204 -0
- metadata +338 -0
data/lib/isodoc/html.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
module IsoDoc
|
2
|
+
class Convert
|
3
|
+
|
4
|
+
def toHTML(result, filename)
|
5
|
+
result = htmlPreface(htmlstyle(Nokogiri::HTML(result))).to_xml
|
6
|
+
result = populate_template(result)
|
7
|
+
File.open("#{filename}.html", "w") do |f|
|
8
|
+
f.write(result)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def htmlPreface(docxml)
|
13
|
+
cover = Nokogiri::HTML(File.read(@htmlcoverpage, encoding: "UTF-8"))
|
14
|
+
d = docxml.at('//div[@class="WordSection1"]')
|
15
|
+
d.children.first.add_previous_sibling cover.to_xml(encoding: 'US-ASCII')
|
16
|
+
cover = Nokogiri::HTML(File.read(@htmlintropage, encoding: "UTF-8"))
|
17
|
+
d = docxml.at('//div[@class="WordSection2"]')
|
18
|
+
d.children.first.add_previous_sibling cover.to_xml(encoding: 'US-ASCII')
|
19
|
+
body = docxml.at("//*[local-name() = 'body']")
|
20
|
+
body << '<script src="https://cdn.mathjax.org/mathjax/latest/'\
|
21
|
+
'MathJax.js?config=AM_HTMLorMML"></script>'
|
22
|
+
docxml
|
23
|
+
end
|
24
|
+
|
25
|
+
def htmlstylesheet
|
26
|
+
stylesheet = File.read(@htmlstylesheet, encoding: "UTF-8")
|
27
|
+
xml = Nokogiri::XML("<style/>")
|
28
|
+
xml.children.first << Nokogiri::XML::Comment.new(xml, "\n#{stylesheet}\n")
|
29
|
+
xml.root.to_s
|
30
|
+
end
|
31
|
+
|
32
|
+
def htmlstyle(docxml)
|
33
|
+
title = docxml.at("//*[local-name() = 'head']/*[local-name() = 'title']")
|
34
|
+
head = docxml.at("//*[local-name() = 'head']")
|
35
|
+
css = htmlstylesheet
|
36
|
+
if title.nil?
|
37
|
+
head.children.first.add_previous_sibling css
|
38
|
+
else
|
39
|
+
title.add_next_sibling css
|
40
|
+
end
|
41
|
+
docxml
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,211 @@
|
|
1
|
+
require "uuidtools"
|
2
|
+
|
3
|
+
module IsoDoc
|
4
|
+
class Convert
|
5
|
+
|
6
|
+
def in_footnote
|
7
|
+
@in_footnote
|
8
|
+
end
|
9
|
+
|
10
|
+
def section_break(body)
|
11
|
+
body.br **{ clear: "all", class: "section" }
|
12
|
+
end
|
13
|
+
|
14
|
+
def page_break(body)
|
15
|
+
body.br **{
|
16
|
+
clear: "all",
|
17
|
+
style: "mso-special-character:line-break;page-break-before:always",
|
18
|
+
}
|
19
|
+
end
|
20
|
+
|
21
|
+
def link_parse(node, out)
|
22
|
+
linktext = node.text
|
23
|
+
linktext = node["target"] if linktext.empty?
|
24
|
+
out.a **{ "href": node["target"] } { |l| l << linktext }
|
25
|
+
end
|
26
|
+
|
27
|
+
def callout_parse(node, out)
|
28
|
+
out << " <#{node.text}>"
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_linkend(node)
|
32
|
+
linkend = node["target"] || node["citeas"]
|
33
|
+
if get_anchors().has_key? node["target"]
|
34
|
+
linkend = get_anchors()[node["target"]][:xref]
|
35
|
+
end
|
36
|
+
if node["citeas"].nil? && get_anchors().has_key?(node["bibitemid"])
|
37
|
+
linkend = get_anchors()[node["bibitemid"]][:xref]
|
38
|
+
end
|
39
|
+
text = node.children.select { |c| c.text? && !c.text.empty? }
|
40
|
+
linkend = text.join(" ") unless text.nil? || text.empty?
|
41
|
+
# so not <origin bibitemid="ISO7301" citeas="ISO 7301">
|
42
|
+
# <locality type="section">3.1</locality></origin>
|
43
|
+
linkend
|
44
|
+
end
|
45
|
+
|
46
|
+
def xref_parse(node, out)
|
47
|
+
linkend = get_linkend(node)
|
48
|
+
out.a **{ "href": node["target"] } { |l| l << linkend }
|
49
|
+
end
|
50
|
+
|
51
|
+
def eref_parse(node, out)
|
52
|
+
linkend = get_linkend(node)
|
53
|
+
section = node.at(ns("./locality"))
|
54
|
+
section.nil? or
|
55
|
+
linkend += ", #{section["type"].capitalize} #{section.text}"
|
56
|
+
if node["type"] == "footnote"
|
57
|
+
out.sup do |s|
|
58
|
+
s.a **{ "href": node["bibitemid"] } { |l| l << linkend }
|
59
|
+
end
|
60
|
+
else
|
61
|
+
out.a **{ "href": node["bibitemid"] } { |l| l << linkend }
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def stem_parse(node, out)
|
66
|
+
ooml = if node["type"] == "AsciiMath" then "`#{node.text}`"
|
67
|
+
elsif node["type"] == "MathML" then node.first_element_child.to_s
|
68
|
+
else
|
69
|
+
node.text
|
70
|
+
end
|
71
|
+
out.span **{ class: "stem" } do |span|
|
72
|
+
span.parent.add_child ooml
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def pagebreak_parse(node, out)
|
77
|
+
attrs = { clear: all, class: "pagebreak" }
|
78
|
+
out.br **attrs
|
79
|
+
end
|
80
|
+
|
81
|
+
def error_parse(node, out)
|
82
|
+
text = node.to_xml.gsub(/</, "<").gsub(/>/, ">")
|
83
|
+
out.para do |p|
|
84
|
+
p.b **{ role: "strong" } { |e| e << text }
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def footnotes(div)
|
89
|
+
return if @footnotes.empty?
|
90
|
+
div.div **{ style: "mso-element:footnote-list" } do |div1|
|
91
|
+
@footnotes.each do |fn|
|
92
|
+
div1.parent << fn
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def footnote_attributes(fn, is_footnote)
|
98
|
+
style = nil
|
99
|
+
style = "mso-footnote-id:ftn#{fn}" if is_footnote
|
100
|
+
{ style: style,
|
101
|
+
href: "#_ftn#{fn}",
|
102
|
+
name: "_ftnref#{fn}",
|
103
|
+
title: "",
|
104
|
+
class: "zzFootnote" }
|
105
|
+
end
|
106
|
+
|
107
|
+
def make_footnote_link(a, fnid, fnref, is_footnote)
|
108
|
+
a.span **{ class: "MsoFootnoteReference" } do |s|
|
109
|
+
if is_footnote
|
110
|
+
s.span **{ style: "mso-special-character:footnote" }
|
111
|
+
else
|
112
|
+
s.a **{href: fnid} { a << fnref }
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def make_footnote_target(a, fnid, fnref, is_footnote)
|
118
|
+
a.span **{ class: "MsoFootnoteReference" } do |s|
|
119
|
+
if is_footnote
|
120
|
+
s.span **{ style: "mso-special-character:footnote" }
|
121
|
+
else
|
122
|
+
s.a **{name: fnid} { a << fnref }
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def make_footnote_text(node, fnid, fnref, is_footnote)
|
128
|
+
attrs = { style: "mso-element:footnote", id: "ftn#{fnid}" }
|
129
|
+
attrs[:style] = nil unless is_footnote
|
130
|
+
noko do |xml|
|
131
|
+
xml.div **attr_code(attrs) do |div|
|
132
|
+
div.a **footnote_attributes(fnid, is_footnote) do |a|
|
133
|
+
make_footnote_target(a, fnid, fnref, is_footnote)
|
134
|
+
insert_tab(a, 1) unless is_footnote
|
135
|
+
end
|
136
|
+
node.children.each { |n| parse(n, div) }
|
137
|
+
end
|
138
|
+
end.join("\n")
|
139
|
+
end
|
140
|
+
|
141
|
+
def get_table_ancestor_id(node)
|
142
|
+
table = node.ancestors("table") || node.ancestors("figure")
|
143
|
+
return UUIDTools::UUID.random_create.to_s if table.empty?
|
144
|
+
table.last["id"]
|
145
|
+
end
|
146
|
+
|
147
|
+
def table_footnote_parse(node, out)
|
148
|
+
fn = node["reference"]
|
149
|
+
tid = get_table_ancestor_id(node)
|
150
|
+
out.a **footnote_attributes(tid + fn, false) do |a|
|
151
|
+
make_footnote_link(a, tid + fn, fn, false)
|
152
|
+
end
|
153
|
+
# do not output footnote text if we have already seen it for this table
|
154
|
+
return if @seen_footnote.include?(tid + fn)
|
155
|
+
@in_footnote = true
|
156
|
+
out.aside { |a| a << make_footnote_text(node, tid + fn, fn, false) }
|
157
|
+
@in_footnote = false
|
158
|
+
@seen_footnote << (tid + fn)
|
159
|
+
end
|
160
|
+
|
161
|
+
def footnote_parse(node, out)
|
162
|
+
return table_footnote_parse(node, out) if @in_table || @in_figure
|
163
|
+
fn = node["reference"]
|
164
|
+
out.a **footnote_attributes(fn, true) do |a|
|
165
|
+
make_footnote_link(a, nil, nil, true)
|
166
|
+
end
|
167
|
+
@in_footnote = true
|
168
|
+
@footnotes << make_footnote_text(node, fn, fn, true)
|
169
|
+
@in_footnote = false
|
170
|
+
end
|
171
|
+
|
172
|
+
def comments(div)
|
173
|
+
return if @comments.empty?
|
174
|
+
div.div **{ style: "mso-element:comment-list" } do |div1|
|
175
|
+
@comments.each do |fn|
|
176
|
+
div1.parent << fn
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def make_comment_link(out, fn, date, from)
|
182
|
+
out.span **{ style: "MsoCommentReference" } do |s1|
|
183
|
+
s1.span **{ lang: "EN-GB", style: "font-size:9.0pt"} do |s2|
|
184
|
+
s2.a **{ style: "mso-comment-reference:SMC_#{fn};"\
|
185
|
+
"mso-comment-date:#{date}" } if from
|
186
|
+
s2.span **{ style: "mso-special-character:comment" } do |s|
|
187
|
+
s << " "
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def make_comment_text(node, fn)
|
194
|
+
noko do |xml|
|
195
|
+
xml.div **{ style: "mso-element:comment" } do |div|
|
196
|
+
div.span **{ style: %{mso-comment-author:"#{node["reviewer"]}"} }
|
197
|
+
div.p **{ class: "MsoCommentText" } do |p|
|
198
|
+
make_comment_link(p, fn, node["date"], false)
|
199
|
+
node.children.each { |n| parse(n, p) }
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end.join("\n")
|
203
|
+
end
|
204
|
+
|
205
|
+
def review_note_parse(node, out)
|
206
|
+
fn = @comments.length + 1
|
207
|
+
make_comment_link(out, fn, node["date"], true)
|
208
|
+
@comments << make_comment_text(node, fn)
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
require "pp"
|
2
|
+
|
3
|
+
module IsoDoc
|
4
|
+
class Convert
|
5
|
+
|
6
|
+
def init_file(filename)
|
7
|
+
filename = filename.gsub(%r{\.[^/.]+$}, "")
|
8
|
+
dir = "#{filename}_files"
|
9
|
+
Dir.mkdir(dir) unless File.exists?(dir)
|
10
|
+
system "rm -r #{dir}/*"
|
11
|
+
[filename, dir]
|
12
|
+
end
|
13
|
+
|
14
|
+
def make_body(xml, docxml)
|
15
|
+
body_attr = { lang: "EN-US", link: "blue", vlink: "#954F72" }
|
16
|
+
xml.body **body_attr do |body|
|
17
|
+
make_body1(body, docxml)
|
18
|
+
make_body2(body, docxml)
|
19
|
+
make_body3(body, docxml)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def make_body1(body, docxml)
|
24
|
+
body.div **{ class: "WordSection1" } do |div1|
|
25
|
+
# placeholder
|
26
|
+
div1.p { |p| p << " " }
|
27
|
+
end
|
28
|
+
section_break(body)
|
29
|
+
end
|
30
|
+
|
31
|
+
def make_body2(body, docxml)
|
32
|
+
body.div **{ class: "WordSection2" } do |div2|
|
33
|
+
info docxml, div2
|
34
|
+
end
|
35
|
+
section_break(body)
|
36
|
+
end
|
37
|
+
|
38
|
+
def make_body3(body, docxml)
|
39
|
+
body.div **{ class: "WordSection3" } do |div3|
|
40
|
+
middle docxml, div3
|
41
|
+
footnotes div3
|
42
|
+
comments div3
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def info(isoxml, out)
|
47
|
+
# intropage(out)
|
48
|
+
title isoxml, out
|
49
|
+
subtitle isoxml, out
|
50
|
+
id isoxml, out
|
51
|
+
author isoxml, out
|
52
|
+
version isoxml, out
|
53
|
+
foreword isoxml, out
|
54
|
+
introduction isoxml, out
|
55
|
+
end
|
56
|
+
|
57
|
+
def middle_title(out)
|
58
|
+
m = get_metadata
|
59
|
+
out.p **{ class: "zzSTDTitle1" } { |p| p << m[:doctitle] }
|
60
|
+
end
|
61
|
+
|
62
|
+
def middle(isoxml, out)
|
63
|
+
middle_title(out)
|
64
|
+
scope isoxml, out
|
65
|
+
norm_ref isoxml, out
|
66
|
+
terms_defs isoxml, out
|
67
|
+
symbols_abbrevs isoxml, out
|
68
|
+
clause isoxml, out
|
69
|
+
annex isoxml, out
|
70
|
+
bibliography isoxml, out
|
71
|
+
end
|
72
|
+
|
73
|
+
def smallcap_parse(node, xml)
|
74
|
+
xml.span **{style: "font-variant:small-caps;"} do |s|
|
75
|
+
s << node.text
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def text_parse(node, out)
|
80
|
+
text = node.text
|
81
|
+
text.gsub!("\n", "<br/>").gsub!(" ", " ") if in_sourcecode
|
82
|
+
out << text
|
83
|
+
end
|
84
|
+
|
85
|
+
def parse(node, out)
|
86
|
+
if node.text?
|
87
|
+
text_parse(node, out)
|
88
|
+
else
|
89
|
+
case node.name
|
90
|
+
when "em" then out.i { |e| e << node.text }
|
91
|
+
when "strong" then out.b { |e| e << node.text }
|
92
|
+
when "sup" then out.sup { |e| e << node.text }
|
93
|
+
when "sub" then out.sub { |e| e << node.text }
|
94
|
+
when "tt" then out.tt { |e| e << node.text }
|
95
|
+
when "strike" then out.s { |e| e << node.text }
|
96
|
+
when "smallcap" then smallcap_parse(node, out)
|
97
|
+
when "br" then out.br
|
98
|
+
when "hr" then out.hr
|
99
|
+
when "pagebreak" then pagebreak_parse(node, out)
|
100
|
+
when "callout" then callout_parse(node, out)
|
101
|
+
when "stem" then stem_parse(node, out)
|
102
|
+
when "clause" then clause_parse(node, out)
|
103
|
+
when "subsection" then clause_parse(node, out)
|
104
|
+
when "xref" then xref_parse(node, out)
|
105
|
+
when "eref" then eref_parse(node, out)
|
106
|
+
when "origin" then eref_parse(node, out)
|
107
|
+
when "link" then link_parse(node, out)
|
108
|
+
when "ul" then ul_parse(node, out)
|
109
|
+
when "ol" then ol_parse(node, out)
|
110
|
+
when "li" then li_parse(node, out)
|
111
|
+
when "dl" then dl_parse(node, out)
|
112
|
+
when "fn" then footnote_parse(node, out)
|
113
|
+
when "p" then para_parse(node, out)
|
114
|
+
when "quote" then quote_parse(node, out)
|
115
|
+
when "tr" then tr_parse(node, out)
|
116
|
+
when "note" then note_parse(node, out)
|
117
|
+
when "review" then review_note_parse(node, out)
|
118
|
+
when "admonition" then admonition_parse(node, out)
|
119
|
+
when "formula" then formula_parse(node, out)
|
120
|
+
when "table" then table_parse(node, out)
|
121
|
+
when "figure" then figure_parse(node, out)
|
122
|
+
when "image" then image_parse(node["src"], out, nil)
|
123
|
+
when "sourcecode" then sourcecode_parse(node, out)
|
124
|
+
when "annotation" then annotation_parse(node, out)
|
125
|
+
when "term" then termdef_parse(node, out)
|
126
|
+
when "preferred" then term_parse(node, out)
|
127
|
+
when "admitted" then admitted_term_parse(node, out)
|
128
|
+
when "deprecates" then deprecated_term_parse(node, out)
|
129
|
+
when "domain" then set_termdomain(node.text)
|
130
|
+
when "definition" then definition_parse(node, out)
|
131
|
+
when "termsource" then termref_parse(node, out)
|
132
|
+
when "isosection"
|
133
|
+
out << "[ISOSECTION] #{node.text}"
|
134
|
+
when "modification" then modification_parse(node, out)
|
135
|
+
when "termnote" then termnote_parse(node, out)
|
136
|
+
when "termexample" then termexample_parse(node, out)
|
137
|
+
else
|
138
|
+
error_parse(node, out)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
data/lib/isodoc/lists.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
module IsoDoc
|
2
|
+
class Convert
|
3
|
+
def ul_parse(node, out)
|
4
|
+
out.ul do |ul|
|
5
|
+
node.children.each { |n| parse(n, ul) }
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
OL_STYLE = {
|
10
|
+
arabic: "1",
|
11
|
+
roman: "i",
|
12
|
+
alphabet: "a",
|
13
|
+
roman_upper: "I",
|
14
|
+
alphabet_upper: "A",
|
15
|
+
}.freeze
|
16
|
+
|
17
|
+
def ol_style(type)
|
18
|
+
OL_STYLE[type.to_sym]
|
19
|
+
end
|
20
|
+
|
21
|
+
def ol_parse(node, out)
|
22
|
+
# attrs = { numeration: node["type"] }
|
23
|
+
style = ol_style(node["type"])
|
24
|
+
out.ol **attr_code(type: style) do |ol|
|
25
|
+
node.children.each { |n| parse(n, ol) }
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def li_parse(node, out)
|
30
|
+
out.li do |li|
|
31
|
+
node.children.each { |n| parse(n, li) }
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def dl_parse(node, out)
|
36
|
+
out.dl do |v|
|
37
|
+
node.elements.each_slice(2) do |dt, dd|
|
38
|
+
v.dt do |term|
|
39
|
+
if dt.elements.empty?
|
40
|
+
term.p **attr_code(class: is_note ? "Note" : nil) do
|
41
|
+
|p| p << dt.text
|
42
|
+
end
|
43
|
+
else
|
44
|
+
dt.children.each { |n| parse(n, term) }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
v.dd do |listitem|
|
48
|
+
dd.children.each { |n| parse(n, listitem) }
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|