isodoc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,99 @@
1
+ require "htmlentities"
2
+
3
+ module IsoDoc
4
+ class Convert
5
+
6
+ def get_metadata
7
+ @meta
8
+ end
9
+
10
+ def set_metadata(key, value)
11
+ @meta[key] = value
12
+ end
13
+
14
+ def author(isoxml, _out)
15
+ # tc = isoxml.at(ns("//technical-committee"))
16
+ tc_num = isoxml.at(ns("//technical-committee/@number"))
17
+ # sc = isoxml.at(ns("//subcommittee"))
18
+ sc_num = isoxml.at(ns("//subcommittee/@number"))
19
+ # wg = isoxml.at(ns("//workgroup"))
20
+ wg_num = isoxml.at(ns("//workgroup/@number"))
21
+ secretariat = isoxml.at(ns("//secretariat"))
22
+ set_metadata(:tc, "XXXX")
23
+ set_metadata(:sc, "XXXX")
24
+ set_metadata(:wg, "XXXX")
25
+ set_metadata(:secretariat, "XXXX")
26
+ set_metadata(:tc, tc_num.text) if tc_num
27
+ set_metadata(:sc, sc_num.text) if sc_num
28
+ set_metadata(:wg, wg_num.text) if wg_num
29
+ set_metadata(:secretariat, secretariat.text) if secretariat
30
+ end
31
+
32
+ def id(isoxml, _out)
33
+ docnumber = isoxml.at(ns("//project-number"))
34
+ partnumber = isoxml.at(ns("//project-number/@part"))
35
+ documentstatus = isoxml.at(ns("//status/stage"))
36
+ dn = docnumber.text
37
+ dn += "-#{partnumber.text}" if partnumber
38
+ if documentstatus
39
+ set_metadata(:stage, documentstatus.text)
40
+ abbr = stage_abbreviation(documentstatus.text)
41
+ set_metadata(:stageabbr, abbr)
42
+ documentstatus.text.to_i < 60 and
43
+ dn = abbr + " " + dn
44
+ end
45
+ set_metadata(:docnumber, dn)
46
+ end
47
+
48
+ def draftinfo(draft, revdate)
49
+ draftinfo = ""
50
+ if draft
51
+ draftinfo = " (draft #{draft.text}"
52
+ draftinfo += ", #{revdate.text}" if revdate
53
+ draftinfo += ")"
54
+ end
55
+ draftinfo
56
+ end
57
+
58
+ def version(isoxml, _out)
59
+ yr = isoxml.at(ns("//copyright/from"))
60
+ set_metadata(:docyear, yr.text)
61
+ draft = isoxml.at(ns("//version/draft"))
62
+ set_metadata(:draft, draft.nil? ? nil : draft.text)
63
+ revdate = isoxml.at(ns("//version/revision-date"))
64
+ set_metadata(:revdate, revdate.nil? ? nil : revdate.text)
65
+ draftinfo = draftinfo(draft, revdate)
66
+ set_metadata(:draftinfo, draftinfo(draft, revdate))
67
+ end
68
+
69
+
70
+ def compose_title(main, intro, part, partnumber)
71
+ c = HTMLEntities.new
72
+ main = c.encode(main.text, :hexadecimal)
73
+ intro &&
74
+ main = "#{c.encode(intro.text, :hexadecimal)}&nbsp;&mdash; #{main}"
75
+ part &&
76
+ main = "#{main}&nbsp;&mdash; Part&nbsp;#{partnumber}: "\
77
+ "#{c.encode(part.text, :hexadecimal)}"
78
+ main
79
+ end
80
+
81
+ def title(isoxml, _out)
82
+ intro = isoxml.at(ns("//title[@language='en']/title-intro"))
83
+ main = isoxml.at(ns("//title[@language='en']/title-main"))
84
+ part = isoxml.at(ns("//title[@language='en']/title-part"))
85
+ partnumber = isoxml.at(ns("//id/project-number/@part"))
86
+ main = compose_title(main, intro, part, partnumber)
87
+ set_metadata(:doctitle, main)
88
+ end
89
+
90
+ def subtitle(isoxml, _out)
91
+ intro = isoxml.at(ns("//title[@language='fr']/title-intro"))
92
+ main = isoxml.at(ns("//title[@language='fr']/title-main"))
93
+ part = isoxml.at(ns("//title[@language='fr']/title-part"))
94
+ partnumber = isoxml.at(ns("//id/project-number/@part"))
95
+ main = compose_title(main, intro, part, partnumber)
96
+ set_metadata(:docsubtitle, main)
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,156 @@
1
+ require "html2doc"
2
+ require "htmlentities"
3
+ require "nokogiri"
4
+ require "pp"
5
+
6
+ module IsoDoc
7
+ class Convert
8
+
9
+ def postprocess(result, filename, dir)
10
+ generate_header(filename, dir)
11
+ result = from_xhtml(cleanup(to_xhtml(result)))
12
+ toWord(result, filename, dir)
13
+ toHTML(result, filename)
14
+ end
15
+
16
+ def toWord(result, filename, dir)
17
+ result = from_xhtml(wordCleanup(to_xhtml(result)))
18
+ result = populate_template(result)
19
+ Html2Doc.process(result, filename, @wordstylesheet, "header.html",
20
+ dir, ['`', '`'])
21
+ end
22
+
23
+ def wordCleanup(docxml)
24
+ wordPreface(docxml)
25
+ wordAnnexCleanup(docxml)
26
+ docxml
27
+ end
28
+
29
+ # force Annex h2 to be p.h2Annex, so it is not picked up by ToC
30
+ def wordAnnexCleanup(docxml)
31
+ d = docxml.xpath("//h2[ancestor::*[@class = 'Section3']]").each do |h2|
32
+ h2.name = "p"
33
+ h2["class"] = "h2Annex"
34
+ end
35
+ end
36
+
37
+ def wordPreface(docxml)
38
+ cover = to_xhtml_fragment(File.read(@wordcoverpage, encoding: "UTF-8"))
39
+ d = docxml.at('//div[@class="WordSection1"]')
40
+ d.children.first.add_previous_sibling cover.to_xml(encoding: 'US-ASCII')
41
+ intro = to_xhtml_fragment(
42
+ File.read(@wordintropage, encoding: "UTF-8").
43
+ sub(/WORDTOC/, makeWordToC(docxml)))
44
+ d = docxml.at('//div[@class="WordSection2"]')
45
+ d.children.first.add_previous_sibling intro.to_xml(encoding: 'US-ASCII')
46
+ end
47
+
48
+ def populate_template(docxml)
49
+ meta = get_metadata
50
+ docxml.
51
+ gsub(/DOCYEAR/, meta[:docyear]).
52
+ gsub(/DOCNUMBER/, meta[:docnumber]).
53
+ gsub(/TCNUM/, meta[:tc]).
54
+ gsub(/SCNUM/, meta[:sc]).
55
+ gsub(/WGNUM/, meta[:wg]).
56
+ gsub(/DOCTITLE/, meta[:doctitle]).
57
+ gsub(/DOCSUBTITLE/, meta[:docsubtitle]).
58
+ gsub(/SECRETARIAT/, meta[:secretariat]).
59
+ gsub(/[ ]?DRAFTINFO/, meta[:draftinfo]).
60
+ gsub(/\[TERMREF\]\s*/, "[SOURCE: ").
61
+ gsub(/\s*\[\/TERMREF\]\s*/, "]").
62
+ gsub(/\s*\[ISOSECTION\]/, ", ").
63
+ gsub(/\s*\[MODIFICATION\]/, ", modified &mdash; ").
64
+ gsub(%r{WD/CD/DIS/FDIS}, meta[:stageabbr])
65
+ end
66
+
67
+ def generate_header(filename, dir)
68
+ header = File.read(@header, encoding: "UTF-8").
69
+ gsub(/FILENAME/, filename).
70
+ gsub(/DOCYEAR/, get_metadata()[:docyear]).
71
+ gsub(/[ ]?DRAFTINFO/, get_metadata()[:draftinfo]).
72
+ gsub(/DOCNUMBER/, get_metadata()[:docnumber])
73
+ File.open("header.html", "w") do |f|
74
+ f.write(header)
75
+ end
76
+ end
77
+
78
+ # these are in fact preprocess,
79
+ # but they are extraneous to main HTML file
80
+ def html_header(html, docxml, filename, dir)
81
+ anchor_names docxml
82
+ define_head html, filename, dir
83
+ end
84
+
85
+ # isodoc.css overrides any CSS injected by Html2Doc, which
86
+ # is inserted before this CSS.
87
+ def define_head(html, filename, dir)
88
+ html.head do |head|
89
+ head.title { |t| t << filename }
90
+ head.style do |style|
91
+ stylesheet = File.read(@standardstylesheet).
92
+ gsub("FILENAME", filename)
93
+ style.comment "\n#{stylesheet}\n"
94
+ end
95
+ end
96
+ end
97
+
98
+ def titlepage(_docxml, div)
99
+ titlepage = File.read(@wordcoverpage, encoding: "UTF-8")
100
+ div.parent.add_child titlepage
101
+ end
102
+
103
+ def wordTocEntry(toclevel, heading)
104
+ bookmark = Random.rand(1000000000)
105
+ <<~TOC
106
+ <p class="MsoToc#{toclevel}"><span class="MsoHyperlink"><span
107
+ lang="EN-GB" style='mso-no-proof:yes'>
108
+ <a href="#_Toc#{bookmark}">#{heading}<span lang="EN-GB"
109
+ class="MsoTocTextSpan">
110
+ <span style='mso-tab-count:1 dotted'>. </span>
111
+ </span><span lang="EN-GB" class="MsoTocTextSpan">
112
+ <span style='mso-element:field-begin'></span></span>
113
+ <span lang="EN-GB"
114
+ class="MsoTocTextSpan"> PAGEREF _Toc#{bookmark} \\h </span>
115
+ <span lang="EN-GB" class="MsoTocTextSpan"><span
116
+ style='mso-element:field-separator'></span></span><span
117
+ lang="EN-GB" class="MsoTocTextSpan">1</span>
118
+ <span lang="EN-GB"
119
+ class="MsoTocTextSpan"></span><span
120
+ lang="EN-GB" class="MsoTocTextSpan"><span
121
+ style='mso-element:field-end'></span></span></a></span></span></p>
122
+
123
+ TOC
124
+ end
125
+
126
+ WORD_TOC_PREFACE = <<~TOC
127
+ <span lang="EN-GB"><span
128
+ style='mso-element:field-begin'></span><span
129
+ style='mso-spacerun:yes'>&#xA0;</span>TOC
130
+ \\o &quot;1-2&quot; \\h \\z \\u <span
131
+ style='mso-element:field-separator'></span></span>
132
+ TOC
133
+
134
+ WORD_TOC_SUFFIX = <<~TOC
135
+ <p class="MsoToc1"><span lang="EN-GB"><span
136
+ style='mso-element:field-end'></span></span><span
137
+ lang="EN-GB"><o:p>&nbsp;</o:p></span></p>
138
+ TOC
139
+
140
+ def header_strip(h)
141
+ h.to_s.gsub(%r{<br/>}, " ").
142
+ sub(/<h[12][^>]*>/, "").sub(%r{</h[12]>}, "")
143
+ end
144
+
145
+ def makeWordToC(docxml)
146
+ toc = ""
147
+ docxml.xpath("//h1 | //h2[not(ancestor::*[@class = 'Section3'])]").
148
+ each do |h|
149
+ toc += wordTocEntry(h.name == "h1" ? 1 : 2, header_strip(h))
150
+ end
151
+ toc.sub(/(<p class="MsoToc1">)/,
152
+ %{\\1#{WORD_TOC_PREFACE}}) + WORD_TOC_SUFFIX
153
+ end
154
+
155
+ end
156
+ end
@@ -0,0 +1,129 @@
1
+ module IsoDoc
2
+ class Convert
3
+ def iso_bibitem_ref_code(b)
4
+ isocode = b.at(ns("./docidentifier"))
5
+ isodate = b.at(ns("./publishdate"))
6
+ reference = "ISO #{isocode.text}"
7
+ reference += ": #{isodate.text}" if isodate
8
+ reference
9
+ end
10
+
11
+ def date_note_process(b, ref)
12
+ date_note = b.xpath(ns("./note[text()][contains(.,'ISO DATE:')]"))
13
+ unless date_note.empty?
14
+ date_note.first.content =
15
+ date_note.first.content.gsub(/ISO DATE: /, "")
16
+ date_note.wrap("<p></p>")
17
+ footnote_parse(date_note.first, ref)
18
+ end
19
+ end
20
+
21
+ def iso_bibitem_entry(list, b, ordinal, biblio)
22
+ attrs = { id: b["id"], class: biblio ? "Biblio" : nil }
23
+ list.p **attr_code(attrs) do |ref|
24
+ if biblio
25
+ ref << "[#{ordinal}]"
26
+ insert_tab(ref, 1)
27
+ end
28
+ ref << iso_bibitem_ref_code(b)
29
+ date_note_process(b, ref)
30
+ ref << ", " if biblio
31
+ ref.i { |i| i << " #{b.at(ns('./name')).text}" }
32
+ end
33
+ end
34
+
35
+ def ref_entry_code(r, ordinal, t)
36
+ if /^\d+$/.match?(t)
37
+ r << "[#{t}]"
38
+ insert_tab(r, 1)
39
+ else
40
+ r << "[#{ordinal}]"
41
+ insert_tab(r, 1)
42
+ r << "#{t},"
43
+ end
44
+ end
45
+
46
+ def ref_entry(list, b, ordinal, bibliography)
47
+ ref = b.at(ns("./ref"))
48
+ para = b.at(ns("./p"))
49
+ list.p **attr_code("id": ref["id"], class: "Biblio") do |r|
50
+ ref_entry_code(r, ordinal, ref.text.gsub(/[\[\]]/, ""))
51
+ para.children.each { |n| parse(n, r) }
52
+ end
53
+ end
54
+
55
+ def noniso_bibitem(list, b, ordinal, bibliography)
56
+ ref = b.at(ns("./docidentifier"))
57
+ para = b.at(ns("./formatted"))
58
+ list.p **attr_code("id": b["id"], class: "Biblio") do |r|
59
+ ref_entry_code(r, ordinal, ref.text.gsub(/[\[\]]/, ""))
60
+ para.children.each { |n| parse(n, r) }
61
+ end
62
+ end
63
+
64
+ def split_bibitems(f)
65
+ iso_bibitem = []
66
+ non_iso_bibitem = []
67
+ f.xpath(ns("./bibitem")).each do |x|
68
+ if x.at(ns("./publisher/affiliation[name = 'ISO']")).nil?
69
+ non_iso_bibitem << x
70
+ else
71
+ iso_bibitem << x
72
+ end
73
+ end
74
+ { iso: iso_bibitem, noniso: non_iso_bibitem }
75
+ end
76
+
77
+ def biblio_list(f, div, bibliography)
78
+ bibitems = split_bibitems(f)
79
+ bibitems[:iso].each_with_index do |b, i|
80
+ iso_bibitem_entry(div, b, (i + 1), bibliography)
81
+ end
82
+ bibitems[:noniso].each_with_index do |b, i|
83
+ noniso_bibitem(div, b, (i + 1 + bibitems[:iso].size), bibliography)
84
+ end
85
+ end
86
+
87
+ NORM_WITH_REFS_PREF = <<~BOILERPLATE
88
+ The following documents are referred to in the text in such a way
89
+ that some or all of their content constitutes requirements of this
90
+ document. For dated references, only the edition cited applies.
91
+ For undated references, the latest edition of the referenced
92
+ document (including any amendments) applies.
93
+ BOILERPLATE
94
+
95
+ NORM_EMPTY_PREF =
96
+ "There are no normative references in this document."
97
+
98
+ def norm_ref_preface(f, div)
99
+ refs = f.elements.select do |e|
100
+ ["reference", "bibitem"].include? e.name
101
+ end
102
+ pref = refs.empty? ? NORM_EMPTY_PREF : NORM_WITH_REFS_PREF
103
+ div.p pref
104
+ end
105
+
106
+ def norm_ref(isoxml, out)
107
+ q = "//sections/references[title = 'Normative References']"
108
+ f = isoxml.at(ns(q)) or return
109
+ out.div do |div|
110
+ clause_name("2.", "Normative References", div, false)
111
+ norm_ref_preface(f, div)
112
+ biblio_list(f, div, false)
113
+ end
114
+ end
115
+
116
+ def bibliography(isoxml, out)
117
+ q = "//sections/references[title = 'Bibliography']"
118
+ f = isoxml.at(ns(q)) or return
119
+ page_break(out)
120
+ out.div do |div|
121
+ div.h1 "Bibliography", **{ class: "Section3" }
122
+ f.elements.reject do |e|
123
+ ["reference", "title", "bibitem"].include? e.name
124
+ end.each { |e| parse(e, div) }
125
+ biblio_list(f, div, true)
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,136 @@
1
+ module IsoDoc
2
+ class Convert
3
+ def clause_parse(node, out)
4
+ out.div **attr_code("id": node["id"]) do |s|
5
+ node.children.each do |c1|
6
+ if c1.name == "title"
7
+ if node["inline-header"]
8
+ out.span **{ class: "zzMoveToFollowing" } do |s|
9
+ s.b do |b|
10
+ b << "#{get_anchors()[node['id']][:label]}. #{c1.text} "
11
+ end
12
+ end
13
+ else
14
+ s.send "h#{get_anchors()[node['id']][:level]}" do |h|
15
+ h << "#{get_anchors()[node['id']][:label]}. #{c1.text}"
16
+ end
17
+ end
18
+ else
19
+ parse(c1, s)
20
+ end
21
+ end
22
+ end
23
+ end
24
+
25
+ def clause_name(num, title, div, inline_header)
26
+ if inline_header
27
+ div.span **{ class: "zzMoveToFollowing" } do |s|
28
+ s.b do |b|
29
+ b << num
30
+ b << title + " "
31
+ end
32
+ end
33
+ else
34
+ div.h1 do |h1|
35
+ h1 << num
36
+ insert_tab(h1, 1)
37
+ h1 << title
38
+ end
39
+ end
40
+ end
41
+
42
+ def clause(isoxml, out)
43
+ isoxml.xpath(ns("//clause[parent::sections]")).each do |c|
44
+ next if c.at(ns("./title")).text == "Scope"
45
+ out.div **attr_code("id": c["id"]) do |s|
46
+ c.elements.each do |c1|
47
+ if c1.name == "title"
48
+ clause_name("#{get_anchors()[c['id']][:label]}.",
49
+ c1.text, s, c["inline-header"])
50
+ else
51
+ parse(c1, s)
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
57
+
58
+ def annex_name(annex, name, div)
59
+ div.h1 **{ class: "Annex" } do |t|
60
+ t << "#{get_anchors()[annex['id']][:label]}<br/><br/>"
61
+ t << "<b>#{name.text}</b>"
62
+ end
63
+ end
64
+
65
+ def annex(isoxml, out)
66
+ isoxml.xpath(ns("//annex")).each do |c|
67
+ page_break(out)
68
+ out.div **attr_code("id": c["id"], class: "Section3" ) do |s|
69
+ #s1.div **{ class: "annex" } do |s|
70
+ c.elements.each do |c1|
71
+ if c1.name == "title" then annex_name(c, c1, s)
72
+ else
73
+ parse(c1, s)
74
+ end
75
+ end
76
+ # end
77
+ end
78
+ end
79
+ end
80
+
81
+ def scope(isoxml, out)
82
+ f = isoxml.at(ns("//clause[title = 'Scope']")) || return
83
+ out.div do |div|
84
+ clause_name("1.", "Scope", div, false)
85
+ f.elements.each do |e|
86
+ parse(e, div) unless e.name == "title"
87
+ end
88
+ end
89
+ end
90
+
91
+ def terms_defs(isoxml, out)
92
+ f = isoxml.at(ns("//terms")) || return
93
+ out.div do |div|
94
+ clause_name("3.", "Terms and Definitions", div, false)
95
+ f.elements.each do |e|
96
+ parse(e, div) unless e.name == "title"
97
+ end
98
+ end
99
+ end
100
+
101
+ def symbols_abbrevs(isoxml, out)
102
+ f = isoxml.at(ns("//symbols-abbrevs")) || return
103
+ out.div do |div|
104
+ clause_name("4.", "Symbols and Abbreviations", div, false)
105
+ f.elements.each do |e|
106
+ parse(e, div) unless e.name == "title"
107
+ end
108
+ end
109
+ end
110
+
111
+ def introduction(isoxml, out)
112
+ f = isoxml.at(ns("//content[title = 'Introduction']")) || return
113
+ title_attr = { class: "IntroTitle" }
114
+ page_break(out)
115
+ out.div **{ class: "Section3" } do |div|
116
+ div.h1 "Introduction", **attr_code(title_attr)
117
+ f.elements.each do |e|
118
+ if e.name == "patent-notice"
119
+ e.elements.each { |e1| parse(e1, div) }
120
+ else
121
+ parse(e, div) unless e.name == "title"
122
+ end
123
+ end
124
+ end
125
+ end
126
+
127
+ def foreword(isoxml, out)
128
+ f = isoxml.at(ns("//content[title = 'Foreword']")) || return
129
+ page_break(out)
130
+ out.div do |s|
131
+ s.h1 **{ class: "ForewordTitle" } { |h1| h1 << "Foreword" }
132
+ f.elements.each { |e| parse(e, s) unless e.name == "title" }
133
+ end
134
+ end
135
+ end
136
+ end