isodoc 0.5.5 → 0.5.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CODE_OF_CONDUCT.md +46 -0
- data/LICENSE +25 -0
- data/README.adoc +1 -1
- data/Rakefile +6 -0
- data/isodoc.gemspec +1 -0
- data/lib/isodoc.rb +4 -95
- data/lib/isodoc/cleanup.rb +14 -10
- data/lib/isodoc/{notes.rb → comments.rb} +0 -73
- data/lib/isodoc/convert.rb +97 -0
- data/lib/isodoc/footnotes.rb +74 -0
- data/lib/isodoc/html.rb +41 -4
- data/lib/isodoc/i18n-en.yaml +1 -0
- data/lib/isodoc/i18n-fr.yaml +1 -0
- data/lib/isodoc/i18n-zh-Hans.yaml +1 -0
- data/lib/isodoc/i18n.rb +1 -0
- data/lib/isodoc/inline.rb +4 -12
- data/lib/isodoc/iso2wordhtml.rb +26 -13
- data/lib/isodoc/metadata.rb +23 -10
- data/lib/isodoc/references.rb +20 -22
- data/lib/isodoc/section.rb +4 -3
- data/lib/isodoc/table.rb +0 -2
- data/lib/isodoc/terms.rb +2 -13
- data/lib/isodoc/utils.rb +24 -3
- data/lib/isodoc/version.rb +1 -1
- data/lib/isodoc/wordconvert/comments.rb +155 -0
- data/lib/isodoc/wordconvert/convert.rb +31 -0
- data/lib/isodoc/wordconvert/footnotes.rb +80 -0
- data/lib/isodoc/wordconvert/wordconvertmodule.rb +212 -0
- data/lib/isodoc/xref_gen.rb +50 -79
- data/lib/isodoc/xref_sect_gen.rb +82 -0
- data/spec/assets/header.html +7 -0
- data/spec/assets/html.css +2 -0
- data/spec/assets/htmlcover.html +4 -0
- data/spec/assets/htmlintro.html +5 -0
- data/spec/assets/i18n.yaml +2 -0
- data/spec/assets/iso.xml +8 -0
- data/spec/assets/rice_image1.png +0 -0
- data/spec/assets/std.css +2 -0
- data/spec/assets/word.css +2 -0
- data/spec/assets/wordcover.html +3 -0
- data/spec/assets/wordintro.html +4 -0
- data/spec/isodoc/blocks_spec.rb +130 -47
- data/spec/isodoc/cleanup_spec.rb +693 -0
- data/spec/isodoc/footnotes_spec.rb +282 -0
- data/spec/isodoc/i18n_spec.rb +662 -0
- data/spec/isodoc/inline_spec.rb +344 -0
- data/spec/isodoc/lists_spec.rb +81 -18
- data/spec/isodoc/metadata_spec.rb +141 -0
- data/spec/isodoc/postproc_spec.rb +444 -0
- data/spec/isodoc/ref_spec.rb +158 -0
- data/spec/isodoc/section_spec.rb +275 -112
- data/spec/isodoc/table_spec.rb +146 -8
- data/spec/isodoc/terms_spec.rb +118 -0
- data/spec/isodoc/xref_spec.rb +490 -114
- metadata +46 -4
- data/lib/isodoc/postprocessing.rb +0 -176
@@ -0,0 +1,74 @@
|
|
1
|
+
require "uuidtools"
|
2
|
+
|
3
|
+
module IsoDoc
|
4
|
+
class Convert
|
5
|
+
def footnotes(div)
|
6
|
+
return if @footnotes.empty?
|
7
|
+
@footnotes.each { |fn| div.parent << fn }
|
8
|
+
end
|
9
|
+
|
10
|
+
def make_table_footnote_link(out, fnid, fnref)
|
11
|
+
attrs = { href: "##{fnid}", class: "TableFootnoteRef" }
|
12
|
+
out.a **attrs do |a|
|
13
|
+
a << fnref
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def make_table_footnote_target(out, fnid, fnref)
|
18
|
+
attrs = { id: fnid, class: "TableFootnoteRef" }
|
19
|
+
out.a **attrs do |a|
|
20
|
+
a << fnref
|
21
|
+
insert_tab(a, 1)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def make_table_footnote_text(node, fnid, fnref)
|
26
|
+
attrs = { id: "ftn#{fnid}" }
|
27
|
+
noko do |xml|
|
28
|
+
xml.div **attr_code(attrs) do |div|
|
29
|
+
make_table_footnote_target(div, fnid, fnref)
|
30
|
+
node.children.each { |n| parse(n, div) }
|
31
|
+
end
|
32
|
+
end.join("\n")
|
33
|
+
end
|
34
|
+
|
35
|
+
def make_generic_footnote_text(node, fnid)
|
36
|
+
noko do |xml|
|
37
|
+
xml.aside **{ id: "ftn#{fnid}" } do |div|
|
38
|
+
node.children.each { |n| parse(n, div) }
|
39
|
+
end
|
40
|
+
end.join("\n")
|
41
|
+
end
|
42
|
+
|
43
|
+
def get_table_ancestor_id(node)
|
44
|
+
table = node.ancestors("table") || node.ancestors("figure")
|
45
|
+
return UUIDTools::UUID.random_create.to_s if table.empty?
|
46
|
+
table.last["id"]
|
47
|
+
end
|
48
|
+
|
49
|
+
def table_footnote_parse(node, out)
|
50
|
+
fn = node["reference"]
|
51
|
+
tid = get_table_ancestor_id(node)
|
52
|
+
make_table_footnote_link(out, tid + fn, fn)
|
53
|
+
# do not output footnote text if we have already seen it for this table
|
54
|
+
return if @seen_footnote.include?(tid + fn)
|
55
|
+
@in_footnote = true
|
56
|
+
out.aside { |a| a << make_table_footnote_text(node, tid + fn, fn) }
|
57
|
+
@in_footnote = false
|
58
|
+
@seen_footnote << (tid + fn)
|
59
|
+
end
|
60
|
+
|
61
|
+
def footnote_parse(node, out)
|
62
|
+
return table_footnote_parse(node, out) if @in_table || @in_figure
|
63
|
+
fn = node["reference"]
|
64
|
+
out.a **{ "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
|
65
|
+
a.sup { |sup| sup << fn }
|
66
|
+
end
|
67
|
+
return if @seen_footnote.include?(fn)
|
68
|
+
@in_footnote = true
|
69
|
+
@footnotes << make_generic_footnote_text(node, fn)
|
70
|
+
@in_footnote = false
|
71
|
+
@seen_footnote << fn
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/isodoc/html.rb
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
module IsoDoc
|
2
2
|
class Convert
|
3
|
+
def postprocess(result, filename, dir)
|
4
|
+
result = from_xhtml(cleanup(to_xhtml(result)))
|
5
|
+
toHTML(result, filename)
|
6
|
+
end
|
7
|
+
|
3
8
|
def toHTML(result, filename)
|
4
9
|
result = from_xhtml(html_cleanup(to_xhtml(result)))
|
5
10
|
result = populate_template(result, :html)
|
@@ -10,7 +15,7 @@ module IsoDoc
|
|
10
15
|
|
11
16
|
def html_cleanup(x)
|
12
17
|
footnote_backlinks(
|
13
|
-
move_images(html_footnote_filter(html_preface(htmlstyle(x))))
|
18
|
+
html_toc(move_images(html_footnote_filter(html_preface(htmlstyle(x)))))
|
14
19
|
)
|
15
20
|
end
|
16
21
|
|
@@ -32,15 +37,23 @@ module IsoDoc
|
|
32
37
|
end
|
33
38
|
|
34
39
|
def html_preface(docxml)
|
40
|
+
html_cover(docxml) if @htmlcoverpage
|
41
|
+
html_intro(docxml) if @htmlintropage
|
42
|
+
docxml.at("//*[local-name() = 'body']") << mathjax(@openmathdelim,
|
43
|
+
@closemathdelim)
|
44
|
+
docxml
|
45
|
+
end
|
46
|
+
|
47
|
+
def html_cover(docxml)
|
35
48
|
cover = Nokogiri::HTML(File.read(@htmlcoverpage, encoding: "UTF-8"))
|
36
49
|
d = docxml.at('//div[@class="WordSection1"]')
|
37
50
|
d.children.first.add_previous_sibling cover.to_xml(encoding: "US-ASCII")
|
51
|
+
end
|
52
|
+
|
53
|
+
def html_intro(docxml)
|
38
54
|
cover = Nokogiri::HTML(File.read(@htmlintropage, encoding: "UTF-8"))
|
39
55
|
d = docxml.at('//div[@class="WordSection2"]')
|
40
56
|
d.children.first.add_previous_sibling cover.to_xml(encoding: "US-ASCII")
|
41
|
-
docxml.at("//*[local-name() = 'body']") << mathjax(@openmathdelim,
|
42
|
-
@closemathdelim)
|
43
|
-
docxml
|
44
57
|
end
|
45
58
|
|
46
59
|
def htmlstylesheet
|
@@ -110,5 +123,29 @@ module IsoDoc
|
|
110
123
|
end
|
111
124
|
docxml
|
112
125
|
end
|
126
|
+
|
127
|
+
def html_toc1(h, ret, prevname)
|
128
|
+
h["id"] = UUIDTools::UUID.random_create.to_s unless h["id"]
|
129
|
+
li = "<li><a href='##{h["id"]}'>#{header_strip(h)}</a></li>"
|
130
|
+
if h.name == "h1"
|
131
|
+
ret += "</ul>" if prevname == "h2"
|
132
|
+
else
|
133
|
+
ret += "<ul>" if prevname == "h1"
|
134
|
+
end
|
135
|
+
ret + li
|
136
|
+
end
|
137
|
+
|
138
|
+
def html_toc(docxml)
|
139
|
+
return docxml unless @htmlintropage
|
140
|
+
ret = ""
|
141
|
+
prevname = ""
|
142
|
+
docxml.xpath("//h1 | //h2").each do |h|
|
143
|
+
ret = html_toc1(h, ret, prevname) unless h["class"] == "toc-contents"
|
144
|
+
prevname = h.name
|
145
|
+
end
|
146
|
+
ret += "<ul>" if prevname == "h2"
|
147
|
+
docxml.at("//*[@id='toc-list']").replace("<ul>#{ret}</ret>")
|
148
|
+
docxml
|
149
|
+
end
|
113
150
|
end
|
114
151
|
end
|
data/lib/isodoc/i18n-en.yaml
CHANGED
data/lib/isodoc/i18n-fr.yaml
CHANGED
data/lib/isodoc/i18n.rb
CHANGED
data/lib/isodoc/inline.rb
CHANGED
@@ -3,14 +3,11 @@ require "uuidtools"
|
|
3
3
|
module IsoDoc
|
4
4
|
class Convert
|
5
5
|
def section_break(body)
|
6
|
-
body.br
|
6
|
+
body.br
|
7
7
|
end
|
8
8
|
|
9
|
-
def page_break(
|
10
|
-
|
11
|
-
clear: "all",
|
12
|
-
style: "mso-special-character:line-break;page-break-before:always",
|
13
|
-
}
|
9
|
+
def page_break(out)
|
10
|
+
out.br
|
14
11
|
end
|
15
12
|
|
16
13
|
def link_parse(node, out)
|
@@ -57,7 +54,7 @@ module IsoDoc
|
|
57
54
|
l10n(", #{@whole_of_text}")
|
58
55
|
else
|
59
56
|
eref_localities1(r["type"], r.at(ns("./referenceFrom")),
|
60
|
-
r.at(ns("./referenceTo")))
|
57
|
+
r.at(ns("./referenceTo")), @lang)
|
61
58
|
end
|
62
59
|
end
|
63
60
|
ret
|
@@ -86,11 +83,6 @@ module IsoDoc
|
|
86
83
|
end
|
87
84
|
end
|
88
85
|
|
89
|
-
def pagebreak_parse(_node, out)
|
90
|
-
attrs = { clear: all, class: "pagebreak" }
|
91
|
-
out.br **attrs
|
92
|
-
end
|
93
|
-
|
94
86
|
def error_parse(node, out)
|
95
87
|
text = node.to_xml.gsub(/</, "<").gsub(/>/, ">")
|
96
88
|
out.para do |p|
|
data/lib/isodoc/iso2wordhtml.rb
CHANGED
@@ -6,14 +6,6 @@ module IsoDoc
|
|
6
6
|
@termdomain = termdomain
|
7
7
|
end
|
8
8
|
|
9
|
-
def get_termexample
|
10
|
-
@termexample
|
11
|
-
end
|
12
|
-
|
13
|
-
def set_termexample(value)
|
14
|
-
@termexample = value
|
15
|
-
end
|
16
|
-
|
17
9
|
def in_sourcecode
|
18
10
|
@sourcecode
|
19
11
|
end
|
@@ -32,6 +24,27 @@ module IsoDoc
|
|
32
24
|
[filename, dir]
|
33
25
|
end
|
34
26
|
|
27
|
+
# these are in fact preprocess,
|
28
|
+
# but they are extraneous to main HTML file
|
29
|
+
def html_header(html, docxml, filename, dir)
|
30
|
+
anchor_names docxml
|
31
|
+
define_head html, filename, dir
|
32
|
+
end
|
33
|
+
|
34
|
+
# isodoc.css overrides any CSS injected by Html2Doc, which
|
35
|
+
# is inserted before this CSS.
|
36
|
+
def define_head(html, filename, _dir)
|
37
|
+
html.head do |head|
|
38
|
+
head.title { |t| t << filename }
|
39
|
+
return unless @standardstylesheet
|
40
|
+
head.style do |style|
|
41
|
+
stylesheet = File.read(@standardstylesheet).
|
42
|
+
gsub("FILENAME", filename)
|
43
|
+
style.comment "\n#{stylesheet}\n"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
35
48
|
def make_body(xml, docxml)
|
36
49
|
body_attr = { lang: "EN-US", link: "blue", vlink: "#954F72" }
|
37
50
|
xml.body **body_attr do |body|
|
@@ -51,6 +64,8 @@ module IsoDoc
|
|
51
64
|
def make_body2(body, docxml)
|
52
65
|
body.div **{ class: "WordSection2" } do |div2|
|
53
66
|
info docxml, div2
|
67
|
+
foreword docxml, div2
|
68
|
+
introduction docxml, div2
|
54
69
|
div2.p { |p| p << " " } # placeholder
|
55
70
|
end
|
56
71
|
section_break(body)
|
@@ -65,7 +80,6 @@ module IsoDoc
|
|
65
80
|
end
|
66
81
|
|
67
82
|
def info(isoxml, out)
|
68
|
-
# intropage(out)
|
69
83
|
title isoxml, out
|
70
84
|
subtitle isoxml, out
|
71
85
|
id isoxml, out
|
@@ -73,8 +87,7 @@ module IsoDoc
|
|
73
87
|
bibdate isoxml, out
|
74
88
|
relations isoxml, out
|
75
89
|
version isoxml, out
|
76
|
-
|
77
|
-
introduction isoxml, out
|
90
|
+
get_metadata
|
78
91
|
end
|
79
92
|
|
80
93
|
def middle_title(out)
|
@@ -125,7 +138,7 @@ module IsoDoc
|
|
125
138
|
when "br" then out.br
|
126
139
|
when "hr" then out.hr
|
127
140
|
when "bookmark" then bookmark_parse(node, out)
|
128
|
-
when "pagebreak" then
|
141
|
+
when "pagebreak" then page_break(out)
|
129
142
|
when "callout" then callout_parse(node, out)
|
130
143
|
when "stem" then stem_parse(node, out)
|
131
144
|
when "clause" then clause_parse(node, out)
|
@@ -161,7 +174,7 @@ module IsoDoc
|
|
161
174
|
when "termsource" then termref_parse(node, out)
|
162
175
|
when "modification" then modification_parse(node, out)
|
163
176
|
when "termnote" then termnote_parse(node, out)
|
164
|
-
when "termexample" then
|
177
|
+
when "termexample" then example_parse(node, out)
|
165
178
|
when "terms" then terms_parse(node, out)
|
166
179
|
when "symbols-abbrevs" then symbols_parse(node, out)
|
167
180
|
when "references" then bibliography_parse(node, out)
|
data/lib/isodoc/metadata.rb
CHANGED
@@ -65,17 +65,35 @@ module IsoDoc
|
|
65
65
|
set_metadata(:secretariat, sec.text) if sec
|
66
66
|
end
|
67
67
|
|
68
|
+
def date_range(date)
|
69
|
+
from = date.at(ns("./from"))
|
70
|
+
to = date.at(ns("./to"))
|
71
|
+
ret = from.text
|
72
|
+
ret += "–#{to.text}" if to
|
73
|
+
ret
|
74
|
+
end
|
75
|
+
|
68
76
|
def bibdate(isoxml, _out)
|
69
77
|
isoxml.xpath(ns("//bibdata/date")).each do |d|
|
70
|
-
set_metadata("#{d['type']}date".to_sym, d
|
78
|
+
set_metadata("#{d['type']}date".to_sym, date_range(d))
|
71
79
|
end
|
72
80
|
end
|
73
81
|
|
82
|
+
def iso?(org)
|
83
|
+
name = org&.at(ns("./name"))&.text
|
84
|
+
abbrev = org&.at(ns("./abbreviation"))&.text
|
85
|
+
(abbrev == "ISO" ||
|
86
|
+
name == "International Organization for Standardization" )
|
87
|
+
end
|
88
|
+
|
74
89
|
def agency(xml)
|
75
90
|
agency = ""
|
76
91
|
xml.xpath(ns("//bibdata/contributor[xmlns:role/@type = 'publisher']/"\
|
77
|
-
"organization
|
78
|
-
|
92
|
+
"organization")).each do |org|
|
93
|
+
name = org&.at(ns("./name"))&.text
|
94
|
+
abbrev = org&.at(ns("./abbreviation"))&.text
|
95
|
+
agency1 = abbrev || name
|
96
|
+
agency = iso?(org) ? "ISO/#{agency}" : "#{agency}#{agency1}/"
|
79
97
|
end
|
80
98
|
set_metadata(:agency, agency.sub(%r{/$}, ""))
|
81
99
|
end
|
@@ -105,8 +123,8 @@ module IsoDoc
|
|
105
123
|
def draftinfo(draft, revdate)
|
106
124
|
draftinfo = ""
|
107
125
|
if draft
|
108
|
-
draftinfo = " (#{@draft_lbl} #{draft
|
109
|
-
draftinfo += ", #{revdate
|
126
|
+
draftinfo = " (#{@draft_lbl} #{draft}"
|
127
|
+
draftinfo += ", #{revdate}" if revdate
|
110
128
|
draftinfo += ")"
|
111
129
|
end
|
112
130
|
l10n(draftinfo)
|
@@ -114,13 +132,8 @@ module IsoDoc
|
|
114
132
|
|
115
133
|
def version(isoxml, _out)
|
116
134
|
set_metadata(:docyear, isoxml&.at(ns("//copyright/from"))&.text)
|
117
|
-
# draft = isoxml.at(ns("//version/draft"))
|
118
|
-
# set_metadata(:draft, draft.nil? ? nil : draft.text)
|
119
135
|
set_metadata(:draft, isoxml&.at(ns("//version/draft"))&.text)
|
120
|
-
# revdate = isoxml.at(ns("//version/revision-date"))
|
121
|
-
#set_metadata(:revdate, revdate.nil? ? nil : revdate.text)
|
122
136
|
set_metadata(:revdate, isoxml&.at(ns("//version/revision-date"))&.text)
|
123
|
-
#set_metadata(:draftinfo, draftinfo(draft, revdate))
|
124
137
|
set_metadata(:draftinfo,
|
125
138
|
draftinfo(get_metadata[:draft], get_metadata[:revdate]))
|
126
139
|
end
|
data/lib/isodoc/references.rb
CHANGED
@@ -8,7 +8,7 @@ module IsoDoc
|
|
8
8
|
isocode = b.at(ns("./docidentifier")).text
|
9
9
|
isodate = b.at(ns("./date[@type = 'published']"))
|
10
10
|
reference = docid_l10n(isocode)
|
11
|
-
reference += ": #{isodate
|
11
|
+
reference += ": #{date_range(isodate)}" if isodate
|
12
12
|
reference
|
13
13
|
end
|
14
14
|
|
@@ -48,15 +48,6 @@ module IsoDoc
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
def ref_entry(list, b, ordinal, _bibliography)
|
52
|
-
ref = b.at(ns("./ref"))
|
53
|
-
para = b.at(ns("./p"))
|
54
|
-
list.p **attr_code("id": ref["id"], class: "Biblio") do |r|
|
55
|
-
ref_entry_code(r, ordinal, ref.text.gsub(/[\[\]]/, ""))
|
56
|
-
para.children.each { |n| parse(n, r) }
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
51
|
# TODO generate formatted ref if not present
|
61
52
|
def noniso_bibitem(list, b, ordinal, bibliography)
|
62
53
|
list.p **attr_code("id": b["id"], class: "Biblio") do |r|
|
@@ -66,13 +57,15 @@ module IsoDoc
|
|
66
57
|
else
|
67
58
|
r << "#{iso_bibitem_ref_code(b)}, "
|
68
59
|
end
|
69
|
-
b.at(ns("./formattedref"))
|
60
|
+
b.at(ns("./formattedref"))&.children&.each { |n| parse(n, r) }
|
70
61
|
end
|
71
62
|
end
|
72
63
|
|
73
64
|
ISO_PUBLISHER_XPATH =
|
74
65
|
"./contributor[xmlns:role/@type = 'publisher']/"\
|
75
|
-
"organization[
|
66
|
+
"organization[abbreviation = 'ISO' or xmlns:abbreviation = 'IEC' or "\
|
67
|
+
"xmlns:name = 'International Organization for Standardization' or "\
|
68
|
+
"xmlns:name = 'International Electrotechnical Commission']".freeze
|
76
69
|
|
77
70
|
def split_bibitems(f)
|
78
71
|
iso_bibitem = []
|
@@ -142,8 +135,13 @@ module IsoDoc
|
|
142
135
|
end
|
143
136
|
end
|
144
137
|
|
145
|
-
def format_ref(ref, isopub)
|
146
|
-
|
138
|
+
def format_ref(ref, isopub, date)
|
139
|
+
if isopub
|
140
|
+
return ref unless date
|
141
|
+
from = date.at(ns("./from"))
|
142
|
+
return ref if from&.text == "--"
|
143
|
+
return ref + ": #{date_range(date)}"
|
144
|
+
end
|
147
145
|
return "[#{ref}]" if /^\d+$/.match?(ref) && !/^\[.*\]$/.match?(ref)
|
148
146
|
ref
|
149
147
|
end
|
@@ -151,17 +149,17 @@ module IsoDoc
|
|
151
149
|
def reference_names(ref)
|
152
150
|
isopub = ref.at(ns(ISO_PUBLISHER_XPATH))
|
153
151
|
docid = ref.at(ns("./docidentifier"))
|
154
|
-
return ref_names(ref) unless docid
|
152
|
+
# return ref_names(ref) unless docid
|
155
153
|
date = ref.at(ns("./date[@type = 'published']"))
|
156
|
-
reference = format_ref(docid_l10n(docid.text), isopub)
|
157
|
-
reference += ": #{date
|
154
|
+
reference = format_ref(docid_l10n(docid.text), isopub, date)
|
155
|
+
# reference += ": #{date_range(date)}" if date && isopub && from.text != "--"
|
158
156
|
@anchors[ref["id"]] = { xref: reference }
|
159
157
|
end
|
160
158
|
|
161
|
-
def ref_names(ref)
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
end
|
159
|
+
# def ref_names(ref)
|
160
|
+
# linkend = ref.text
|
161
|
+
# linkend.gsub!(/[\[\]]/, "") unless /^\[\d+\]$/.match? linkend
|
162
|
+
# @anchors[ref["id"]] = { xref: linkend }
|
163
|
+
# end
|
166
164
|
end
|
167
165
|
end
|