isodoc 0.5.5 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CODE_OF_CONDUCT.md +46 -0
- data/LICENSE +25 -0
- data/README.adoc +1 -1
- data/Rakefile +6 -0
- data/isodoc.gemspec +1 -0
- data/lib/isodoc.rb +4 -95
- data/lib/isodoc/cleanup.rb +14 -10
- data/lib/isodoc/{notes.rb → comments.rb} +0 -73
- data/lib/isodoc/convert.rb +97 -0
- data/lib/isodoc/footnotes.rb +74 -0
- data/lib/isodoc/html.rb +41 -4
- data/lib/isodoc/i18n-en.yaml +1 -0
- data/lib/isodoc/i18n-fr.yaml +1 -0
- data/lib/isodoc/i18n-zh-Hans.yaml +1 -0
- data/lib/isodoc/i18n.rb +1 -0
- data/lib/isodoc/inline.rb +4 -12
- data/lib/isodoc/iso2wordhtml.rb +26 -13
- data/lib/isodoc/metadata.rb +23 -10
- data/lib/isodoc/references.rb +20 -22
- data/lib/isodoc/section.rb +4 -3
- data/lib/isodoc/table.rb +0 -2
- data/lib/isodoc/terms.rb +2 -13
- data/lib/isodoc/utils.rb +24 -3
- data/lib/isodoc/version.rb +1 -1
- data/lib/isodoc/wordconvert/comments.rb +155 -0
- data/lib/isodoc/wordconvert/convert.rb +31 -0
- data/lib/isodoc/wordconvert/footnotes.rb +80 -0
- data/lib/isodoc/wordconvert/wordconvertmodule.rb +212 -0
- data/lib/isodoc/xref_gen.rb +50 -79
- data/lib/isodoc/xref_sect_gen.rb +82 -0
- data/spec/assets/header.html +7 -0
- data/spec/assets/html.css +2 -0
- data/spec/assets/htmlcover.html +4 -0
- data/spec/assets/htmlintro.html +5 -0
- data/spec/assets/i18n.yaml +2 -0
- data/spec/assets/iso.xml +8 -0
- data/spec/assets/rice_image1.png +0 -0
- data/spec/assets/std.css +2 -0
- data/spec/assets/word.css +2 -0
- data/spec/assets/wordcover.html +3 -0
- data/spec/assets/wordintro.html +4 -0
- data/spec/isodoc/blocks_spec.rb +130 -47
- data/spec/isodoc/cleanup_spec.rb +693 -0
- data/spec/isodoc/footnotes_spec.rb +282 -0
- data/spec/isodoc/i18n_spec.rb +662 -0
- data/spec/isodoc/inline_spec.rb +344 -0
- data/spec/isodoc/lists_spec.rb +81 -18
- data/spec/isodoc/metadata_spec.rb +141 -0
- data/spec/isodoc/postproc_spec.rb +444 -0
- data/spec/isodoc/ref_spec.rb +158 -0
- data/spec/isodoc/section_spec.rb +275 -112
- data/spec/isodoc/table_spec.rb +146 -8
- data/spec/isodoc/terms_spec.rb +118 -0
- data/spec/isodoc/xref_spec.rb +490 -114
- metadata +46 -4
- data/lib/isodoc/postprocessing.rb +0 -176
@@ -0,0 +1,74 @@
|
|
1
|
+
require "uuidtools"
|
2
|
+
|
3
|
+
module IsoDoc
|
4
|
+
class Convert
|
5
|
+
def footnotes(div)
|
6
|
+
return if @footnotes.empty?
|
7
|
+
@footnotes.each { |fn| div.parent << fn }
|
8
|
+
end
|
9
|
+
|
10
|
+
def make_table_footnote_link(out, fnid, fnref)
|
11
|
+
attrs = { href: "##{fnid}", class: "TableFootnoteRef" }
|
12
|
+
out.a **attrs do |a|
|
13
|
+
a << fnref
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def make_table_footnote_target(out, fnid, fnref)
|
18
|
+
attrs = { id: fnid, class: "TableFootnoteRef" }
|
19
|
+
out.a **attrs do |a|
|
20
|
+
a << fnref
|
21
|
+
insert_tab(a, 1)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def make_table_footnote_text(node, fnid, fnref)
|
26
|
+
attrs = { id: "ftn#{fnid}" }
|
27
|
+
noko do |xml|
|
28
|
+
xml.div **attr_code(attrs) do |div|
|
29
|
+
make_table_footnote_target(div, fnid, fnref)
|
30
|
+
node.children.each { |n| parse(n, div) }
|
31
|
+
end
|
32
|
+
end.join("\n")
|
33
|
+
end
|
34
|
+
|
35
|
+
def make_generic_footnote_text(node, fnid)
|
36
|
+
noko do |xml|
|
37
|
+
xml.aside **{ id: "ftn#{fnid}" } do |div|
|
38
|
+
node.children.each { |n| parse(n, div) }
|
39
|
+
end
|
40
|
+
end.join("\n")
|
41
|
+
end
|
42
|
+
|
43
|
+
def get_table_ancestor_id(node)
|
44
|
+
table = node.ancestors("table") || node.ancestors("figure")
|
45
|
+
return UUIDTools::UUID.random_create.to_s if table.empty?
|
46
|
+
table.last["id"]
|
47
|
+
end
|
48
|
+
|
49
|
+
def table_footnote_parse(node, out)
|
50
|
+
fn = node["reference"]
|
51
|
+
tid = get_table_ancestor_id(node)
|
52
|
+
make_table_footnote_link(out, tid + fn, fn)
|
53
|
+
# do not output footnote text if we have already seen it for this table
|
54
|
+
return if @seen_footnote.include?(tid + fn)
|
55
|
+
@in_footnote = true
|
56
|
+
out.aside { |a| a << make_table_footnote_text(node, tid + fn, fn) }
|
57
|
+
@in_footnote = false
|
58
|
+
@seen_footnote << (tid + fn)
|
59
|
+
end
|
60
|
+
|
61
|
+
def footnote_parse(node, out)
|
62
|
+
return table_footnote_parse(node, out) if @in_table || @in_figure
|
63
|
+
fn = node["reference"]
|
64
|
+
out.a **{ "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
|
65
|
+
a.sup { |sup| sup << fn }
|
66
|
+
end
|
67
|
+
return if @seen_footnote.include?(fn)
|
68
|
+
@in_footnote = true
|
69
|
+
@footnotes << make_generic_footnote_text(node, fn)
|
70
|
+
@in_footnote = false
|
71
|
+
@seen_footnote << fn
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/isodoc/html.rb
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
module IsoDoc
|
2
2
|
class Convert
|
3
|
+
def postprocess(result, filename, dir)
|
4
|
+
result = from_xhtml(cleanup(to_xhtml(result)))
|
5
|
+
toHTML(result, filename)
|
6
|
+
end
|
7
|
+
|
3
8
|
def toHTML(result, filename)
|
4
9
|
result = from_xhtml(html_cleanup(to_xhtml(result)))
|
5
10
|
result = populate_template(result, :html)
|
@@ -10,7 +15,7 @@ module IsoDoc
|
|
10
15
|
|
11
16
|
def html_cleanup(x)
|
12
17
|
footnote_backlinks(
|
13
|
-
move_images(html_footnote_filter(html_preface(htmlstyle(x))))
|
18
|
+
html_toc(move_images(html_footnote_filter(html_preface(htmlstyle(x)))))
|
14
19
|
)
|
15
20
|
end
|
16
21
|
|
@@ -32,15 +37,23 @@ module IsoDoc
|
|
32
37
|
end
|
33
38
|
|
34
39
|
def html_preface(docxml)
|
40
|
+
html_cover(docxml) if @htmlcoverpage
|
41
|
+
html_intro(docxml) if @htmlintropage
|
42
|
+
docxml.at("//*[local-name() = 'body']") << mathjax(@openmathdelim,
|
43
|
+
@closemathdelim)
|
44
|
+
docxml
|
45
|
+
end
|
46
|
+
|
47
|
+
def html_cover(docxml)
|
35
48
|
cover = Nokogiri::HTML(File.read(@htmlcoverpage, encoding: "UTF-8"))
|
36
49
|
d = docxml.at('//div[@class="WordSection1"]')
|
37
50
|
d.children.first.add_previous_sibling cover.to_xml(encoding: "US-ASCII")
|
51
|
+
end
|
52
|
+
|
53
|
+
def html_intro(docxml)
|
38
54
|
cover = Nokogiri::HTML(File.read(@htmlintropage, encoding: "UTF-8"))
|
39
55
|
d = docxml.at('//div[@class="WordSection2"]')
|
40
56
|
d.children.first.add_previous_sibling cover.to_xml(encoding: "US-ASCII")
|
41
|
-
docxml.at("//*[local-name() = 'body']") << mathjax(@openmathdelim,
|
42
|
-
@closemathdelim)
|
43
|
-
docxml
|
44
57
|
end
|
45
58
|
|
46
59
|
def htmlstylesheet
|
@@ -110,5 +123,29 @@ module IsoDoc
|
|
110
123
|
end
|
111
124
|
docxml
|
112
125
|
end
|
126
|
+
|
127
|
+
def html_toc1(h, ret, prevname)
|
128
|
+
h["id"] = UUIDTools::UUID.random_create.to_s unless h["id"]
|
129
|
+
li = "<li><a href='##{h["id"]}'>#{header_strip(h)}</a></li>"
|
130
|
+
if h.name == "h1"
|
131
|
+
ret += "</ul>" if prevname == "h2"
|
132
|
+
else
|
133
|
+
ret += "<ul>" if prevname == "h1"
|
134
|
+
end
|
135
|
+
ret + li
|
136
|
+
end
|
137
|
+
|
138
|
+
def html_toc(docxml)
|
139
|
+
return docxml unless @htmlintropage
|
140
|
+
ret = ""
|
141
|
+
prevname = ""
|
142
|
+
docxml.xpath("//h1 | //h2").each do |h|
|
143
|
+
ret = html_toc1(h, ret, prevname) unless h["class"] == "toc-contents"
|
144
|
+
prevname = h.name
|
145
|
+
end
|
146
|
+
ret += "<ul>" if prevname == "h2"
|
147
|
+
docxml.at("//*[@id='toc-list']").replace("<ul>#{ret}</ret>")
|
148
|
+
docxml
|
149
|
+
end
|
113
150
|
end
|
114
151
|
end
|
data/lib/isodoc/i18n-en.yaml
CHANGED
data/lib/isodoc/i18n-fr.yaml
CHANGED
data/lib/isodoc/i18n.rb
CHANGED
data/lib/isodoc/inline.rb
CHANGED
@@ -3,14 +3,11 @@ require "uuidtools"
|
|
3
3
|
module IsoDoc
|
4
4
|
class Convert
|
5
5
|
def section_break(body)
|
6
|
-
body.br
|
6
|
+
body.br
|
7
7
|
end
|
8
8
|
|
9
|
-
def page_break(
|
10
|
-
|
11
|
-
clear: "all",
|
12
|
-
style: "mso-special-character:line-break;page-break-before:always",
|
13
|
-
}
|
9
|
+
def page_break(out)
|
10
|
+
out.br
|
14
11
|
end
|
15
12
|
|
16
13
|
def link_parse(node, out)
|
@@ -57,7 +54,7 @@ module IsoDoc
|
|
57
54
|
l10n(", #{@whole_of_text}")
|
58
55
|
else
|
59
56
|
eref_localities1(r["type"], r.at(ns("./referenceFrom")),
|
60
|
-
r.at(ns("./referenceTo")))
|
57
|
+
r.at(ns("./referenceTo")), @lang)
|
61
58
|
end
|
62
59
|
end
|
63
60
|
ret
|
@@ -86,11 +83,6 @@ module IsoDoc
|
|
86
83
|
end
|
87
84
|
end
|
88
85
|
|
89
|
-
def pagebreak_parse(_node, out)
|
90
|
-
attrs = { clear: all, class: "pagebreak" }
|
91
|
-
out.br **attrs
|
92
|
-
end
|
93
|
-
|
94
86
|
def error_parse(node, out)
|
95
87
|
text = node.to_xml.gsub(/</, "<").gsub(/>/, ">")
|
96
88
|
out.para do |p|
|
data/lib/isodoc/iso2wordhtml.rb
CHANGED
@@ -6,14 +6,6 @@ module IsoDoc
|
|
6
6
|
@termdomain = termdomain
|
7
7
|
end
|
8
8
|
|
9
|
-
def get_termexample
|
10
|
-
@termexample
|
11
|
-
end
|
12
|
-
|
13
|
-
def set_termexample(value)
|
14
|
-
@termexample = value
|
15
|
-
end
|
16
|
-
|
17
9
|
def in_sourcecode
|
18
10
|
@sourcecode
|
19
11
|
end
|
@@ -32,6 +24,27 @@ module IsoDoc
|
|
32
24
|
[filename, dir]
|
33
25
|
end
|
34
26
|
|
27
|
+
# these are in fact preprocess,
|
28
|
+
# but they are extraneous to main HTML file
|
29
|
+
def html_header(html, docxml, filename, dir)
|
30
|
+
anchor_names docxml
|
31
|
+
define_head html, filename, dir
|
32
|
+
end
|
33
|
+
|
34
|
+
# isodoc.css overrides any CSS injected by Html2Doc, which
|
35
|
+
# is inserted before this CSS.
|
36
|
+
def define_head(html, filename, _dir)
|
37
|
+
html.head do |head|
|
38
|
+
head.title { |t| t << filename }
|
39
|
+
return unless @standardstylesheet
|
40
|
+
head.style do |style|
|
41
|
+
stylesheet = File.read(@standardstylesheet).
|
42
|
+
gsub("FILENAME", filename)
|
43
|
+
style.comment "\n#{stylesheet}\n"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
35
48
|
def make_body(xml, docxml)
|
36
49
|
body_attr = { lang: "EN-US", link: "blue", vlink: "#954F72" }
|
37
50
|
xml.body **body_attr do |body|
|
@@ -51,6 +64,8 @@ module IsoDoc
|
|
51
64
|
def make_body2(body, docxml)
|
52
65
|
body.div **{ class: "WordSection2" } do |div2|
|
53
66
|
info docxml, div2
|
67
|
+
foreword docxml, div2
|
68
|
+
introduction docxml, div2
|
54
69
|
div2.p { |p| p << " " } # placeholder
|
55
70
|
end
|
56
71
|
section_break(body)
|
@@ -65,7 +80,6 @@ module IsoDoc
|
|
65
80
|
end
|
66
81
|
|
67
82
|
def info(isoxml, out)
|
68
|
-
# intropage(out)
|
69
83
|
title isoxml, out
|
70
84
|
subtitle isoxml, out
|
71
85
|
id isoxml, out
|
@@ -73,8 +87,7 @@ module IsoDoc
|
|
73
87
|
bibdate isoxml, out
|
74
88
|
relations isoxml, out
|
75
89
|
version isoxml, out
|
76
|
-
|
77
|
-
introduction isoxml, out
|
90
|
+
get_metadata
|
78
91
|
end
|
79
92
|
|
80
93
|
def middle_title(out)
|
@@ -125,7 +138,7 @@ module IsoDoc
|
|
125
138
|
when "br" then out.br
|
126
139
|
when "hr" then out.hr
|
127
140
|
when "bookmark" then bookmark_parse(node, out)
|
128
|
-
when "pagebreak" then
|
141
|
+
when "pagebreak" then page_break(out)
|
129
142
|
when "callout" then callout_parse(node, out)
|
130
143
|
when "stem" then stem_parse(node, out)
|
131
144
|
when "clause" then clause_parse(node, out)
|
@@ -161,7 +174,7 @@ module IsoDoc
|
|
161
174
|
when "termsource" then termref_parse(node, out)
|
162
175
|
when "modification" then modification_parse(node, out)
|
163
176
|
when "termnote" then termnote_parse(node, out)
|
164
|
-
when "termexample" then
|
177
|
+
when "termexample" then example_parse(node, out)
|
165
178
|
when "terms" then terms_parse(node, out)
|
166
179
|
when "symbols-abbrevs" then symbols_parse(node, out)
|
167
180
|
when "references" then bibliography_parse(node, out)
|
data/lib/isodoc/metadata.rb
CHANGED
@@ -65,17 +65,35 @@ module IsoDoc
|
|
65
65
|
set_metadata(:secretariat, sec.text) if sec
|
66
66
|
end
|
67
67
|
|
68
|
+
def date_range(date)
|
69
|
+
from = date.at(ns("./from"))
|
70
|
+
to = date.at(ns("./to"))
|
71
|
+
ret = from.text
|
72
|
+
ret += "–#{to.text}" if to
|
73
|
+
ret
|
74
|
+
end
|
75
|
+
|
68
76
|
def bibdate(isoxml, _out)
|
69
77
|
isoxml.xpath(ns("//bibdata/date")).each do |d|
|
70
|
-
set_metadata("#{d['type']}date".to_sym, d
|
78
|
+
set_metadata("#{d['type']}date".to_sym, date_range(d))
|
71
79
|
end
|
72
80
|
end
|
73
81
|
|
82
|
+
def iso?(org)
|
83
|
+
name = org&.at(ns("./name"))&.text
|
84
|
+
abbrev = org&.at(ns("./abbreviation"))&.text
|
85
|
+
(abbrev == "ISO" ||
|
86
|
+
name == "International Organization for Standardization" )
|
87
|
+
end
|
88
|
+
|
74
89
|
def agency(xml)
|
75
90
|
agency = ""
|
76
91
|
xml.xpath(ns("//bibdata/contributor[xmlns:role/@type = 'publisher']/"\
|
77
|
-
"organization
|
78
|
-
|
92
|
+
"organization")).each do |org|
|
93
|
+
name = org&.at(ns("./name"))&.text
|
94
|
+
abbrev = org&.at(ns("./abbreviation"))&.text
|
95
|
+
agency1 = abbrev || name
|
96
|
+
agency = iso?(org) ? "ISO/#{agency}" : "#{agency}#{agency1}/"
|
79
97
|
end
|
80
98
|
set_metadata(:agency, agency.sub(%r{/$}, ""))
|
81
99
|
end
|
@@ -105,8 +123,8 @@ module IsoDoc
|
|
105
123
|
def draftinfo(draft, revdate)
|
106
124
|
draftinfo = ""
|
107
125
|
if draft
|
108
|
-
draftinfo = " (#{@draft_lbl} #{draft
|
109
|
-
draftinfo += ", #{revdate
|
126
|
+
draftinfo = " (#{@draft_lbl} #{draft}"
|
127
|
+
draftinfo += ", #{revdate}" if revdate
|
110
128
|
draftinfo += ")"
|
111
129
|
end
|
112
130
|
l10n(draftinfo)
|
@@ -114,13 +132,8 @@ module IsoDoc
|
|
114
132
|
|
115
133
|
def version(isoxml, _out)
|
116
134
|
set_metadata(:docyear, isoxml&.at(ns("//copyright/from"))&.text)
|
117
|
-
# draft = isoxml.at(ns("//version/draft"))
|
118
|
-
# set_metadata(:draft, draft.nil? ? nil : draft.text)
|
119
135
|
set_metadata(:draft, isoxml&.at(ns("//version/draft"))&.text)
|
120
|
-
# revdate = isoxml.at(ns("//version/revision-date"))
|
121
|
-
#set_metadata(:revdate, revdate.nil? ? nil : revdate.text)
|
122
136
|
set_metadata(:revdate, isoxml&.at(ns("//version/revision-date"))&.text)
|
123
|
-
#set_metadata(:draftinfo, draftinfo(draft, revdate))
|
124
137
|
set_metadata(:draftinfo,
|
125
138
|
draftinfo(get_metadata[:draft], get_metadata[:revdate]))
|
126
139
|
end
|
data/lib/isodoc/references.rb
CHANGED
@@ -8,7 +8,7 @@ module IsoDoc
|
|
8
8
|
isocode = b.at(ns("./docidentifier")).text
|
9
9
|
isodate = b.at(ns("./date[@type = 'published']"))
|
10
10
|
reference = docid_l10n(isocode)
|
11
|
-
reference += ": #{isodate
|
11
|
+
reference += ": #{date_range(isodate)}" if isodate
|
12
12
|
reference
|
13
13
|
end
|
14
14
|
|
@@ -48,15 +48,6 @@ module IsoDoc
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
def ref_entry(list, b, ordinal, _bibliography)
|
52
|
-
ref = b.at(ns("./ref"))
|
53
|
-
para = b.at(ns("./p"))
|
54
|
-
list.p **attr_code("id": ref["id"], class: "Biblio") do |r|
|
55
|
-
ref_entry_code(r, ordinal, ref.text.gsub(/[\[\]]/, ""))
|
56
|
-
para.children.each { |n| parse(n, r) }
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
51
|
# TODO generate formatted ref if not present
|
61
52
|
def noniso_bibitem(list, b, ordinal, bibliography)
|
62
53
|
list.p **attr_code("id": b["id"], class: "Biblio") do |r|
|
@@ -66,13 +57,15 @@ module IsoDoc
|
|
66
57
|
else
|
67
58
|
r << "#{iso_bibitem_ref_code(b)}, "
|
68
59
|
end
|
69
|
-
b.at(ns("./formattedref"))
|
60
|
+
b.at(ns("./formattedref"))&.children&.each { |n| parse(n, r) }
|
70
61
|
end
|
71
62
|
end
|
72
63
|
|
73
64
|
ISO_PUBLISHER_XPATH =
|
74
65
|
"./contributor[xmlns:role/@type = 'publisher']/"\
|
75
|
-
"organization[
|
66
|
+
"organization[abbreviation = 'ISO' or xmlns:abbreviation = 'IEC' or "\
|
67
|
+
"xmlns:name = 'International Organization for Standardization' or "\
|
68
|
+
"xmlns:name = 'International Electrotechnical Commission']".freeze
|
76
69
|
|
77
70
|
def split_bibitems(f)
|
78
71
|
iso_bibitem = []
|
@@ -142,8 +135,13 @@ module IsoDoc
|
|
142
135
|
end
|
143
136
|
end
|
144
137
|
|
145
|
-
def format_ref(ref, isopub)
|
146
|
-
|
138
|
+
def format_ref(ref, isopub, date)
|
139
|
+
if isopub
|
140
|
+
return ref unless date
|
141
|
+
from = date.at(ns("./from"))
|
142
|
+
return ref if from&.text == "--"
|
143
|
+
return ref + ": #{date_range(date)}"
|
144
|
+
end
|
147
145
|
return "[#{ref}]" if /^\d+$/.match?(ref) && !/^\[.*\]$/.match?(ref)
|
148
146
|
ref
|
149
147
|
end
|
@@ -151,17 +149,17 @@ module IsoDoc
|
|
151
149
|
def reference_names(ref)
|
152
150
|
isopub = ref.at(ns(ISO_PUBLISHER_XPATH))
|
153
151
|
docid = ref.at(ns("./docidentifier"))
|
154
|
-
return ref_names(ref) unless docid
|
152
|
+
# return ref_names(ref) unless docid
|
155
153
|
date = ref.at(ns("./date[@type = 'published']"))
|
156
|
-
reference = format_ref(docid_l10n(docid.text), isopub)
|
157
|
-
reference += ": #{date
|
154
|
+
reference = format_ref(docid_l10n(docid.text), isopub, date)
|
155
|
+
# reference += ": #{date_range(date)}" if date && isopub && from.text != "--"
|
158
156
|
@anchors[ref["id"]] = { xref: reference }
|
159
157
|
end
|
160
158
|
|
161
|
-
def ref_names(ref)
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
end
|
159
|
+
# def ref_names(ref)
|
160
|
+
# linkend = ref.text
|
161
|
+
# linkend.gsub!(/[\[\]]/, "") unless /^\[\d+\]$/.match? linkend
|
162
|
+
# @anchors[ref["id"]] = { xref: linkend }
|
163
|
+
# end
|
166
164
|
end
|
167
165
|
end
|