isodoc 0.5.9 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/isodoc.gemspec +1 -0
  3. data/lib/isodoc.rb +2 -0
  4. data/lib/isodoc/convert.rb +17 -1
  5. data/lib/isodoc/footnotes.rb +8 -3
  6. data/lib/isodoc/html.rb +65 -27
  7. data/lib/isodoc/i18n-en.yaml +1 -1
  8. data/lib/isodoc/iso/convert.rb +46 -0
  9. data/lib/isodoc/iso/html/header.html +184 -0
  10. data/lib/isodoc/iso/html/html_iso_intro.html +34 -0
  11. data/lib/isodoc/iso/html/html_iso_titlepage.html +32 -0
  12. data/lib/isodoc/iso/html/htmlstyle.scss +46 -0
  13. data/lib/isodoc/iso/html/isodoc.scss +679 -0
  14. data/lib/isodoc/iso/html/scripts.html +174 -0
  15. data/lib/isodoc/iso/html/style-human.scss +1277 -0
  16. data/lib/isodoc/iso/html/style-iso.scss +1257 -0
  17. data/lib/isodoc/iso/html/word_iso_intro.html +72 -0
  18. data/lib/isodoc/iso/html/word_iso_titlepage.html +58 -0
  19. data/lib/isodoc/iso/html/wordstyle.scss +1135 -0
  20. data/lib/isodoc/iso/wordconvert.rb +34 -0
  21. data/lib/isodoc/iso2wordhtml.rb +17 -23
  22. data/lib/isodoc/section.rb +1 -1
  23. data/lib/isodoc/version.rb +1 -1
  24. data/lib/isodoc/wordconvert/comments.rb +0 -15
  25. data/lib/isodoc/wordconvert/convert.rb +1 -0
  26. data/lib/isodoc/wordconvert/footnotes.rb +0 -12
  27. data/lib/isodoc/wordconvert/postprocess.rb +110 -0
  28. data/lib/isodoc/wordconvert/wordconvertmodule.rb +18 -130
  29. data/lib/isodoc/xref_sect_gen.rb +7 -4
  30. data/spec/assets/scripts.html +3 -1
  31. data/spec/isodoc/blocks_spec.rb +97 -211
  32. data/spec/isodoc/cleanup_spec.rb +27 -0
  33. data/spec/isodoc/footnotes_spec.rb +40 -53
  34. data/spec/isodoc/i18n_spec.rb +4 -56
  35. data/spec/isodoc/inline_spec.rb +7 -98
  36. data/spec/isodoc/iso_spec.rb +89 -0
  37. data/spec/isodoc/lists_spec.rb +5 -57
  38. data/spec/isodoc/postproc_spec.rb +43 -79
  39. data/spec/isodoc/ref_spec.rb +4 -17
  40. data/spec/isodoc/section_spec.rb +11 -24
  41. data/spec/isodoc/table_spec.rb +61 -76
  42. data/spec/isodoc/terms_spec.rb +1 -13
  43. data/spec/isodoc/xref_spec.rb +21 -164
  44. data/spec/spec_helper.rb +21 -1
  45. metadata +31 -2
@@ -0,0 +1,34 @@
1
+ module IsoDoc
2
+ module Iso
3
+ class WordConvert < IsoDoc::Convert
4
+ include IsoDoc::WordConvertModule
5
+
6
+ def default_fonts(options)
7
+ b = options[:bodyfont] ||
8
+ (options[:script] == "Hans" ? '"SimSun",serif' :
9
+ '"Cambria",serif')
10
+ h = options[:headerfont] ||
11
+ (options[:script] == "Hans" ? '"SimHei",sans-serif' :
12
+ '"Cambria",serif')
13
+ m = options[:monospacefont] || '"Courier New",monospace'
14
+ "$bodyfont: #{b};\n$headerfont: #{h};\n$monospacefont: #{m};\n"
15
+ end
16
+
17
+ def html_doc_path(file)
18
+ File.join(File.dirname(__FILE__), File.join("html", file))
19
+ end
20
+
21
+ def initialize(options)
22
+ super
23
+ @wordstylesheet = generate_css(html_doc_path("wordstyle.scss"), false, default_fonts(options))
24
+ @standardstylesheet = generate_css(html_doc_path("isodoc.scss"), false, default_fonts(options))
25
+ @header = html_doc_path("header.html")
26
+ @wordcoverpage = html_doc_path("word_iso_titlepage.html")
27
+ @wordintropage = html_doc_path("word_iso_intro.html")
28
+ @ulstyle = "l3"
29
+ @olstyle = "l2"
30
+ end
31
+
32
+ end
33
+ end
34
+ end
@@ -24,23 +24,17 @@ module IsoDoc
24
24
  [filename, dir]
25
25
  end
26
26
 
27
- # these are in fact preprocess,
28
- # but they are extraneous to main HTML file
29
- def html_header(html, docxml, filename, dir)
30
- anchor_names docxml
31
- define_head html, filename, dir
32
- end
33
-
34
27
  # isodoc.css overrides any CSS injected by Html2Doc, which
35
28
  # is inserted before this CSS.
36
29
  def define_head(html, filename, _dir)
37
30
  html.head do |head|
38
31
  head.title { |t| t << filename }
39
- return unless @standardstylesheet
40
- head.style do |style|
41
- stylesheet = File.read(@standardstylesheet).
42
- gsub("FILENAME", filename)
43
- style.comment "\n#{stylesheet}\n"
32
+ if @standardstylesheet
33
+ head.style do |style|
34
+ stylesheet = File.read(@standardstylesheet).
35
+ gsub("FILENAME", filename)
36
+ style.comment "\n#{stylesheet}\n"
37
+ end
44
38
  end
45
39
  end
46
40
  end
@@ -64,8 +58,6 @@ module IsoDoc
64
58
  def make_body2(body, docxml)
65
59
  body.div **{ class: "WordSection2" } do |div2|
66
60
  info docxml, div2
67
- foreword docxml, div2
68
- introduction docxml, div2
69
61
  div2.p { |p| p << "&nbsp;" } # placeholder
70
62
  end
71
63
  section_break(body)
@@ -73,6 +65,8 @@ module IsoDoc
73
65
 
74
66
  def make_body3(body, docxml)
75
67
  body.div **{ class: "WordSection3" } do |div3|
68
+ foreword docxml, div3
69
+ introduction docxml, div3
76
70
  middle docxml, div3
77
71
  footnotes div3
78
72
  comments div3
@@ -110,13 +104,13 @@ module IsoDoc
110
104
 
111
105
  def smallcap_parse(node, xml)
112
106
  xml.span **{ style: "font-variant:small-caps;" } do |s|
113
- s << node.text
107
+ s << node.inner_html
114
108
  end
115
109
  end
116
110
 
117
111
  def text_parse(node, out)
118
112
  return if node.nil? || node.text.nil?
119
- text = node.text
113
+ text = node.to_s
120
114
  text = text.gsub("\n", "<br/>").gsub(" ", "&nbsp;") if in_sourcecode
121
115
  out << text
122
116
  end
@@ -130,12 +124,12 @@ module IsoDoc
130
124
  text_parse(node, out)
131
125
  else
132
126
  case node.name
133
- when "em" then out.i { |e| e << node.text }
134
- when "strong" then out.b { |e| e << node.text }
135
- when "sup" then out.sup { |e| e << node.text }
136
- when "sub" then out.sub { |e| e << node.text }
137
- when "tt" then out.tt { |e| e << node.text }
138
- when "strike" then out.s { |e| e << node.text }
127
+ when "em" then out.i { |e| e << node.inner_html }
128
+ when "strong" then out.b { |e| e << node.inner_html }
129
+ when "sup" then out.sup { |e| e << node.inner_html }
130
+ when "sub" then out.sub { |e| e << node.inner_html }
131
+ when "tt" then out.tt { |e| e << node.inner_html }
132
+ when "strike" then out.s { |e| e << node.inner_html }
139
133
  when "smallcap" then smallcap_parse(node, out)
140
134
  when "br" then out.br
141
135
  when "hr" then out.hr
@@ -144,7 +138,7 @@ module IsoDoc
144
138
  when "callout" then callout_parse(node, out)
145
139
  when "stem" then stem_parse(node, out)
146
140
  when "clause" then clause_parse(node, out)
147
- # when "subclause" then clause_parse(node, out)
141
+ # when "subclause" then clause_parse(node, out)
148
142
  when "appendix" then clause_parse(node, out)
149
143
  when "xref" then xref_parse(node, out)
150
144
  when "eref" then eref_parse(node, out)
@@ -113,7 +113,7 @@ module IsoDoc
113
113
  end
114
114
 
115
115
  def term_defs_boilerplate_cont(src, term)
116
- sources = sentence_join(src.map { |s| @anchors[s["target"]][:xref] })
116
+ sources = sentence_join(src.map { |s| @anchors[s["bibitemid"]][:xref] })
117
117
  if src.empty?
118
118
  @internal_terms_boilerplate
119
119
  elsif term.nil?
@@ -1,3 +1,3 @@
1
1
  module IsoDoc
2
- VERSION = "0.5.9".freeze
2
+ VERSION = "0.6.0".freeze
3
3
  end
@@ -1,11 +1,3 @@
1
- #require "uuidtools"
2
-
3
- #module IsoDoc
4
- #class WordConvert
5
- #module WordConvertModule
6
- #def self.included base
7
- #base.class_eval do
8
-
9
1
  def in_comment
10
2
  @in_comment
11
3
  end
@@ -67,7 +59,6 @@ def comment_cleanup(docxml)
67
59
  embed_comment_in_comment_list(docxml)
68
60
  end
69
61
 
70
- #COMMENT_IN_COMMENT_LIST =
71
62
  COMMENT_IN_COMMENT_LIST1 =
72
63
  '//div[@style="mso-element:comment-list"]//'\
73
64
  'span[@style="MsoCommentReference"]'.freeze
@@ -135,13 +126,11 @@ def get_comments_from_text(docxml, link_order)
135
126
  # comments
136
127
  end
137
128
 
138
- #COMMENT_TARGET_XREFS =
139
129
  COMMENT_TARGET_XREFS1 =
140
130
  "//span[@style='mso-special-character:comment']/@target".freeze
141
131
 
142
132
  def reorder_comments_by_comment_link(docxml)
143
133
  link_order = {}
144
- #docxml.xpath(COMMENT_TARGET_XREFS).each_with_index do |target, i|
145
134
  docxml.xpath(COMMENT_TARGET_XREFS1).each_with_index do |target, i|
146
135
  link_order[target.value] = i
147
136
  end
@@ -149,7 +138,3 @@ def reorder_comments_by_comment_link(docxml)
149
138
  list = docxml.at("//*[@style='mso-element:comment-list']") || return
150
139
  list.children = comments.map { |c| c[:text] }.join("\n")
151
140
  end
152
- #end
153
- #end
154
- #end
155
- #end
@@ -20,6 +20,7 @@ module IsoDoc
20
20
  eval File.open(File.join(File.dirname(__FILE__),"wordconvertmodule.rb")).read
21
21
  eval File.open(File.join(File.dirname(__FILE__),"comments.rb")).read
22
22
  eval File.open(File.join(File.dirname(__FILE__),"footnotes.rb")).read
23
+ eval File.open(File.join(File.dirname(__FILE__),"postprocess.rb")).read
23
24
  end
24
25
  end
25
26
  end
@@ -1,11 +1,3 @@
1
- #require "uuidtools"
2
-
3
- #module IsoDoc
4
- #class WordConvert
5
- #module WordConvertModule
6
- #def self.included base
7
- #base.class_eval do
8
-
9
1
  def footnotes(div)
10
2
  return if @footnotes.empty?
11
3
  @footnotes.each { |fn| div.parent << fn }
@@ -74,7 +66,3 @@ def footnote_parse(node, out)
74
66
  @in_footnote = false
75
67
  @seen_footnote << fn
76
68
  end
77
- #end
78
- #end
79
- #end
80
- #end
@@ -0,0 +1,110 @@
1
+ def postprocess(result, filename, dir)
2
+ generate_header(filename, dir)
3
+ result = from_xhtml(cleanup(to_xhtml(result)))
4
+ toWord(result, filename, dir)
5
+ @files_to_delete.each { |f| system "rm #{f}" }
6
+ end
7
+
8
+ def toWord(result, filename, dir)
9
+ result = populate_template(result, :word)
10
+ result = from_xhtml(word_cleanup(to_xhtml(result)))
11
+ Html2Doc.process(result, filename: filename, stylesheet: @wordstylesheet,
12
+ header_file: "header.html", dir: dir,
13
+ asciimathdelims: [@openmathdelim, @closemathdelim],
14
+ liststyles: { ul: @ulstyle, ol: @olstyle })
15
+ end
16
+
17
+ def word_cleanup(docxml)
18
+ word_preface(docxml)
19
+ word_annex_cleanup(docxml)
20
+ docxml
21
+ end
22
+
23
+ # force Annex h2 to be p.h2Annex, so it is not picked up by ToC
24
+ def word_annex_cleanup(docxml)
25
+ docxml.xpath("//h2[ancestor::*[@class = 'Section3']]").each do |h2|
26
+ h2.name = "p"
27
+ h2["class"] = "h2Annex"
28
+ end
29
+ end
30
+
31
+ def word_preface(docxml)
32
+ word_cover(docxml) if @wordcoverpage
33
+ word_intro(docxml) if @wordintropage
34
+ end
35
+
36
+ def word_cover(docxml)
37
+ cover = File.read(@wordcoverpage, encoding: "UTF-8")
38
+ cover = populate_template(cover, :word)
39
+ coverxml = to_xhtml_fragment(cover)
40
+ docxml.at('//div[@class="WordSection1"]').children.first.previous =
41
+ coverxml.to_xml(encoding: "US-ASCII")
42
+ end
43
+
44
+ def word_intro(docxml)
45
+ intro = File.read(@wordintropage, encoding: "UTF-8").
46
+ sub(/WORDTOC/, make_WordToC(docxml))
47
+ intro = populate_template(intro, :word)
48
+ introxml = to_xhtml_fragment(intro)
49
+ docxml.at('//div[@class="WordSection2"]').children.first.previous =
50
+ introxml.to_xml(encoding: "US-ASCII")
51
+ end
52
+
53
+ def generate_header(filename, _dir)
54
+ return unless @header
55
+ template = Liquid::Template.parse(File.read(@header, encoding: "UTF-8"))
56
+ meta = get_metadata
57
+ meta[:filename] = filename
58
+ params = meta.map { |k, v| [k.to_s, v] }.to_h
59
+ File.open("header.html", "w") do |f|
60
+ f.write(template.render(params))
61
+ end
62
+ @files_to_delete << "header.html"
63
+ end
64
+
65
+ def word_toc_entry(toclevel, heading)
66
+ bookmark = Random.rand(1000000000)
67
+ <<~TOC
68
+ <p class="MsoToc#{toclevel}"><span class="MsoHyperlink"><span
69
+ lang="EN-GB" style='mso-no-proof:yes'>
70
+ <a href="#_Toc#{bookmark}">#{heading}<span lang="EN-GB"
71
+ class="MsoTocTextSpan">
72
+ <span style='mso-tab-count:1 dotted'>. </span>
73
+ </span><span lang="EN-GB" class="MsoTocTextSpan">
74
+ <span style='mso-element:field-begin'></span></span>
75
+ <span lang="EN-GB"
76
+ class="MsoTocTextSpan"> PAGEREF _Toc#{bookmark} \\h </span>
77
+ <span lang="EN-GB" class="MsoTocTextSpan"><span
78
+ style='mso-element:field-separator'></span></span><span
79
+ lang="EN-GB" class="MsoTocTextSpan">1</span>
80
+ <span lang="EN-GB"
81
+ class="MsoTocTextSpan"></span><span
82
+ lang="EN-GB" class="MsoTocTextSpan"><span
83
+ style='mso-element:field-end'></span></span></a></span></span></p>
84
+
85
+ TOC
86
+ end
87
+
88
+ WORD_TOC_PREFACE1 = <<~TOC.freeze
89
+ <span lang="EN-GB"><span
90
+ style='mso-element:field-begin'></span><span
91
+ style='mso-spacerun:yes'>&#xA0;</span>TOC
92
+ \\o &quot;1-2&quot; \\h \\z \\u <span
93
+ style='mso-element:field-separator'></span></span>
94
+ TOC
95
+
96
+ WORD_TOC_SUFFIX1 = <<~TOC.freeze
97
+ <p class="MsoToc1"><span lang="EN-GB"><span
98
+ style='mso-element:field-end'></span></span><span
99
+ lang="EN-GB"><o:p>&nbsp;</o:p></span></p>
100
+ TOC
101
+
102
+ def make_WordToC(docxml)
103
+ toc = ""
104
+ docxml.xpath("//h1 | //h2[not(ancestor::*[@class = 'Section3'])]").
105
+ each do |h|
106
+ toc += word_toc_entry(h.name == "h1" ? 1 : 2, header_strip(h))
107
+ end
108
+ toc.sub(/(<p class="MsoToc1">)/,
109
+ %{\\1#{WORD_TOC_PREFACE1}}) + WORD_TOC_SUFFIX1
110
+ end
@@ -1,12 +1,21 @@
1
+ def make_body2(body, docxml)
2
+ body.div **{ class: "WordSection2" } do |div2|
3
+ info docxml, div2
4
+ foreword docxml, div2
5
+ introduction docxml, div2
6
+ div2.p { |p| p << "&nbsp;" } # placeholder
7
+ end
8
+ section_break(body)
9
+ end
1
10
 
2
- #require "html2doc"
3
- #require "liquid"
11
+ def make_body3(body, docxml)
12
+ body.div **{ class: "WordSection3" } do |div3|
13
+ middle docxml, div3
14
+ footnotes div3
15
+ comments div3
16
+ end
17
+ end
4
18
 
5
- #module IsoDoc
6
- #class WordConvert < Convert
7
- #module WordConvertModule
8
- #def self.included base
9
- #base.class_eval do
10
19
  def insert_tab(out, n)
11
20
  out.span **attr_code(style: "mso-tab-count:#{n}") do |span|
12
21
  [1..n].each { span << "&#xA0; " }
@@ -32,7 +41,6 @@ def remove_bottom_border(td)
32
41
  gsub(/mso-border-bottom-alt:[^;]+;/, "mso-border-bottom-alt:0pt;")
33
42
  end
34
43
 
35
- #SW1 = IsoDoc::SW
36
44
  SW1 = "solid windowtext".freeze
37
45
 
38
46
  def new_fullcolspan_row(t, tfoot)
@@ -65,10 +73,8 @@ def section_break(body)
65
73
  end
66
74
 
67
75
  def page_break(out)
68
- out.br **{
69
- clear: "all",
70
- style: "mso-special-character:line-break;page-break-before:always",
71
- }
76
+ out.br **{ clear: "all",
77
+ style: "mso-special-character:line-break;page-break-before:always" }
72
78
  end
73
79
 
74
80
  WORD_DT_ATTRS = {class: @note ? "Note" : nil, align: "left",
@@ -131,121 +137,3 @@ def figure_aside_process(f, aside, key)
131
137
  a.parent = dd
132
138
  end
133
139
  end
134
-
135
- def postprocess(result, filename, dir)
136
- generate_header(filename, dir)
137
- result = from_xhtml(cleanup(to_xhtml(result)))
138
- toWord(result, filename, dir)
139
- @files_to_delete.each { |f| system "rm #{f}" }
140
- end
141
-
142
- def toWord(result, filename, dir)
143
- result = populate_template(result, :word)
144
- result = from_xhtml(word_cleanup(to_xhtml(result)))
145
- Html2Doc.process(result, filename: filename, stylesheet: @wordstylesheet,
146
- header_file: "header.html", dir: dir,
147
- asciimathdelims: [@openmathdelim, @closemathdelim],
148
- liststyles: { ul: @ulstyle, ol: @olstyle })
149
- end
150
-
151
- def word_cleanup(docxml)
152
- word_preface(docxml)
153
- word_annex_cleanup(docxml)
154
- docxml
155
- end
156
-
157
- # force Annex h2 to be p.h2Annex, so it is not picked up by ToC
158
- def word_annex_cleanup(docxml)
159
- docxml.xpath("//h2[ancestor::*[@class = 'Section3']]").each do |h2|
160
- h2.name = "p"
161
- h2["class"] = "h2Annex"
162
- end
163
- end
164
-
165
- def word_preface(docxml)
166
- word_cover(docxml) if @wordcoverpage
167
- word_intro(docxml) if @wordintropage
168
- end
169
-
170
- def word_cover(docxml)
171
- cover = File.read(@wordcoverpage, encoding: "UTF-8")
172
- cover = populate_template(cover, :word)
173
- coverxml = to_xhtml_fragment(cover)
174
- docxml.at('//div[@class="WordSection1"]').children.first.previous =
175
- coverxml.to_xml(encoding: "US-ASCII")
176
- end
177
-
178
- def word_intro(docxml)
179
- intro = File.read(@wordintropage, encoding: "UTF-8").
180
- sub(/WORDTOC/, make_WordToC(docxml))
181
- intro = populate_template(intro, :word)
182
- introxml = to_xhtml_fragment(intro)
183
- docxml.at('//div[@class="WordSection2"]').children.first.previous =
184
- introxml.to_xml(encoding: "US-ASCII")
185
- end
186
-
187
- def generate_header(filename, _dir)
188
- return unless @header
189
- template = Liquid::Template.parse(File.read(@header, encoding: "UTF-8"))
190
- meta = get_metadata
191
- meta[:filename] = filename
192
- params = meta.map { |k, v| [k.to_s, v] }.to_h
193
- File.open("header.html", "w") do |f|
194
- f.write(template.render(params))
195
- end
196
- @files_to_delete << "header.html"
197
- end
198
-
199
- def word_toc_entry(toclevel, heading)
200
- bookmark = Random.rand(1000000000)
201
- <<~TOC
202
- <p class="MsoToc#{toclevel}"><span class="MsoHyperlink"><span
203
- lang="EN-GB" style='mso-no-proof:yes'>
204
- <a href="#_Toc#{bookmark}">#{heading}<span lang="EN-GB"
205
- class="MsoTocTextSpan">
206
- <span style='mso-tab-count:1 dotted'>. </span>
207
- </span><span lang="EN-GB" class="MsoTocTextSpan">
208
- <span style='mso-element:field-begin'></span></span>
209
- <span lang="EN-GB"
210
- class="MsoTocTextSpan"> PAGEREF _Toc#{bookmark} \\h </span>
211
- <span lang="EN-GB" class="MsoTocTextSpan"><span
212
- style='mso-element:field-separator'></span></span><span
213
- lang="EN-GB" class="MsoTocTextSpan">1</span>
214
- <span lang="EN-GB"
215
- class="MsoTocTextSpan"></span><span
216
- lang="EN-GB" class="MsoTocTextSpan"><span
217
- style='mso-element:field-end'></span></span></a></span></span></p>
218
-
219
- TOC
220
- end
221
-
222
- #WORD_TOC_PREFACE = <<~TOC.freeze
223
- WORD_TOC_PREFACE1 = <<~TOC.freeze
224
- <span lang="EN-GB"><span
225
- style='mso-element:field-begin'></span><span
226
- style='mso-spacerun:yes'>&#xA0;</span>TOC
227
- \\o &quot;1-2&quot; \\h \\z \\u <span
228
- style='mso-element:field-separator'></span></span>
229
- TOC
230
-
231
- #WORD_TOC_SUFFIX = <<~TOC.freeze
232
- WORD_TOC_SUFFIX1 = <<~TOC.freeze
233
- <p class="MsoToc1"><span lang="EN-GB"><span
234
- style='mso-element:field-end'></span></span><span
235
- lang="EN-GB"><o:p>&nbsp;</o:p></span></p>
236
- TOC
237
-
238
- def make_WordToC(docxml)
239
- toc = ""
240
- docxml.xpath("//h1 | //h2[not(ancestor::*[@class = 'Section3'])]").
241
- each do |h|
242
- toc += word_toc_entry(h.name == "h1" ? 1 : 2, header_strip(h))
243
- end
244
- toc.sub(/(<p class="MsoToc1">)/,
245
- #%{\\1#{WORD_TOC_PREFACE}}) + WORD_TOC_SUFFIX
246
- %{\\1#{WORD_TOC_PREFACE1}}) + WORD_TOC_SUFFIX1
247
- end
248
- #end
249
- #end
250
- #end
251
- #end