isodoc 0.4.5 → 0.5.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 92ad102a0e0bc53916aaec5d7a4f23d71828f3d4
4
- data.tar.gz: 7dbbd1969e0e1714c9074ac455a335939f634335
3
+ metadata.gz: 32b644d32aaf9d2170de736e389cec4a7b0b1a4d
4
+ data.tar.gz: afc5f22b859a533e4244758b80a3f4acc9658924
5
5
  SHA512:
6
- metadata.gz: 1b570dec93a170716366cf0d433d20dc614026ebe2224b02e659216fccf006b1680a1126e763a953f8d61c8154a0c9b133dfd989b05e25e675b5b3aa1f10d980
7
- data.tar.gz: b9499b701f2eaca4a27d71ff85ff27926bd58e43a2b255dd429180eaf8b8c72704f8ba3768b2eb894f41128136f887308e6f2a29e89c000b7f02cf2dcb391e51
6
+ metadata.gz: 22698380e197c7f94fa0e690dfa181bc2312aa130aeab631fa6cd8ca515c1cd6c312b3f37b56823eef43d079c24ee1c2b539173c4dcd5f13c156dec975fdfce8
7
+ data.tar.gz: 6b95287839fe12f19e777cfdf62bc06b317247e79b1f6cf084b4ab6b938b561d763aebf9f6ec7bcd0ce280a3cea19ded061cd2fbb8e086c5591221ea9c768a4e
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'rspec' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require "pathname"
10
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path(
11
+ "../../Gemfile", Pathname.new(__FILE__).realpath
12
+ )
13
+
14
+ require "rubygems"
15
+ require "bundler/setup"
16
+
17
+ load Gem.bin_path("rspec-core", "rspec")
18
+
@@ -32,7 +32,7 @@ Gem::Specification.new do |spec|
32
32
  spec.add_dependency "htmlentities", "~> 4.3.4"
33
33
  spec.add_dependency "image_size"
34
34
  spec.add_dependency "mime-types"
35
- spec.add_dependency "nokogiri", "= 1.8.1"
35
+ spec.add_dependency "nokogiri"
36
36
  spec.add_dependency "ruby-xslt"
37
37
  spec.add_dependency "thread_safe"
38
38
  spec.add_dependency "uuidtools"
@@ -23,6 +23,7 @@ require_relative "isodoc/inline"
23
23
  require_relative "isodoc/notes"
24
24
  require_relative "isodoc/xref_gen"
25
25
  require_relative "isodoc/html"
26
+ require_relative "isodoc/i18n"
26
27
  require "pp"
27
28
 
28
29
  module IsoDoc
@@ -36,6 +37,9 @@ module IsoDoc
36
37
  # wordcoverpage: Cover page for Word
37
38
  # htmlintropage: Introductory page for HTML
38
39
  # wordintropage: Introductory page for Word
40
+ # i18nyaml: YAML file for internationalisation of text
41
+ # ulstyle: list style in Word CSS for unordered lists
42
+ # olstyle: list style in Word CSS for ordered lists
39
43
  def initialize(options)
40
44
  @htmlstylesheet = options[:htmlstylesheet]
41
45
  @wordstylesheet = options[:wordstylesheet]
@@ -45,6 +49,9 @@ module IsoDoc
45
49
  @wordcoverpage = options[:wordcoverpage]
46
50
  @htmlintropage = options[:htmlintropage]
47
51
  @wordintropage = options[:wordintropage]
52
+ @i18nyaml = options[:i18nyaml]
53
+ @ulstyle = options[:ulstyle]
54
+ @olstyle = options[:olstyle]
48
55
  @termdomain = ""
49
56
  @termexample = false
50
57
  @note = false
@@ -59,19 +66,41 @@ module IsoDoc
59
66
  @in_table = false
60
67
  @in_figure = false
61
68
  @seen_footnote = Set.new
69
+ @c = HTMLEntities.new
70
+ @openmathdelim = "`"
71
+ @closemathdelim = "`"
72
+ @lang = "en"
73
+ @script = "Latn"
62
74
  end
63
75
 
64
- def convert(filename)
65
- docxml = Nokogiri::XML(File.read(filename))
66
- filename, dir = init_file(filename)
67
- docxml.root.default_namespace = ""
68
- result = noko do |xml|
76
+ def convert1(docxml, filename, dir)
77
+ noko do |xml|
69
78
  xml.html do |html|
70
79
  html.parent.add_namespace("epub", "http://www.idpf.org/2007/ops")
71
80
  html_header(html, docxml, filename, dir)
72
81
  make_body(html, docxml)
73
82
  end
74
83
  end.join("\n")
84
+ end
85
+
86
+ def convert_init(file, filename, debug)
87
+ docxml = Nokogiri::XML(file)
88
+ filename, dir = init_file(filename, debug)
89
+ docxml.root.default_namespace = ""
90
+ i18n_init(docxml&.at(ns("//bibdata/language"))&.text || "en",
91
+ docxml&.at(ns("//bibdata/script"))&.text || "Latn")
92
+ [docxml, filename, dir]
93
+ end
94
+
95
+ def convert(filename, debug = false)
96
+ convert_file(File.read(filename), filename, debug)
97
+ end
98
+
99
+ def convert_file(file, filename, debug)
100
+ @openmathdelim, @closemathdelim = extract_delims(file)
101
+ docxml, filename, dir = convert_init(file, filename, debug)
102
+ result = convert1(docxml, filename, dir)
103
+ return result if debug
75
104
  postprocess(result, filename, dir)
76
105
  end
77
106
  end
@@ -1,30 +1,11 @@
1
1
  module IsoDoc
2
2
  class Convert
3
- #attr_accessor :termdomain, :termexample, :sourcecode, :note
4
- def set_termdomain(termdomain)
5
- @termdomain = termdomain
6
- end
7
-
8
- def get_termexample
9
- @termexample
10
- end
11
-
12
- def set_termexample(value)
13
- @termexample = value
14
- end
15
-
16
- def in_sourcecode
17
- @sourcecode
18
- end
19
-
20
- def is_note
21
- @note
22
- end
3
+ @annotation = false
23
4
 
24
5
  def note_label(node)
25
- n = get_anchors()[node["id"]]
26
- return "NOTE" if n.nil?
27
- n[:label]
6
+ n = get_anchors[node["id"]]
7
+ return @note_lbl if n.nil? || n[:label].empty?
8
+ l10n("#{@note_lbl} #{n[:label]}")
28
9
  end
29
10
 
30
11
  def note_p_parse(node, div)
@@ -36,17 +17,21 @@ module IsoDoc
36
17
  node.element_children[1..-1].each { |n| parse(n, div) }
37
18
  end
38
19
 
20
+ def note_parse1(node, div)
21
+ div.p **{ class: "Note" } do |p|
22
+ p << note_label(node)
23
+ insert_tab(p, 1)
24
+ end
25
+ node.children.each { |n| parse(n, div) }
26
+ end
27
+
39
28
  def note_parse(node, out)
40
29
  @note = true
41
30
  out.div **{ id: node["id"], class: "Note" } do |div|
42
31
  if node.first_element_child.name == "p"
43
32
  note_p_parse(node, div)
44
33
  else
45
- div.p **{ class: "Note" } do |p|
46
- p << note_label(node)
47
- insert_tab(p, 1)
48
- end
49
- node.children.each { |n| parse(n, div) }
34
+ note_parse1(node, div)
50
35
  end
51
36
  end
52
37
  @note = false
@@ -55,15 +40,15 @@ module IsoDoc
55
40
  def figure_name_parse(node, div, name)
56
41
  div.p **{ class: "FigureTitle", align: "center" } do |p|
57
42
  p.b do |b|
58
- b << "#{get_anchors()[node['id']][:label]}&nbsp;&mdash; "
59
- b << name.text
43
+ b << l10n("#{@figure_lbl} #{get_anchors[node['id']][:label]}")
44
+ b << "&nbsp;&mdash; #{name.text}" if name
60
45
  end
61
46
  end
62
47
  end
63
48
 
64
49
  def figure_key(out)
65
- out.p do |p|
66
- p.b { |b| b << "Key" }
50
+ out.p do |p|
51
+ p.b { |b| b << @key_lbl }
67
52
  end
68
53
  end
69
54
 
@@ -80,17 +65,40 @@ module IsoDoc
80
65
  @in_figure = false
81
66
  end
82
67
 
83
- def example_parse(node, out)
84
- name = node.at(ns("./name"))
85
- out.div **attr_code(id: node["id"], class: "figure") do |div|
68
+ def example_label(node)
69
+ n = get_anchors[node["id"]]
70
+ return @example_lbl if n.nil? || n[:label].empty?
71
+ l10n("#{@example_lbl} #{n[:label]}")
72
+ end
73
+
74
+ EXAMPLE_TBL_ATTR =
75
+ { width: "110pt", valign: "top",
76
+ style: "width:82.8pt;padding:.75pt .75pt .75pt .75pt" }.freeze
77
+
78
+ # used if we are boxing examples
79
+ def example_div_parse(node, out)
80
+ out.div **attr_code(id: node["id"], class: "example") do |div|
81
+ out.p { |p| p << example_label(node) }
86
82
  node.children.each do |n|
87
- parse(n, div) unless n.name == "name"
83
+ parse(n, div)
88
84
  end
89
- figure_name_parse(node, div, name) if name
90
85
  end
91
86
  end
92
87
 
93
- def sourcecode_name_parse(node, div, name)
88
+ def example_parse(node, out)
89
+ out.table **attr_code(id: node["id"], class: "example") do |t|
90
+ t.tr do |tr|
91
+ tr.td **EXAMPLE_TBL_ATTR do |td|
92
+ td << example_label(node)
93
+ end
94
+ tr.td **{ valign: "top" } do |td|
95
+ node.children.each { |n| parse(n, td) }
96
+ end
97
+ end
98
+ end
99
+ end
100
+
101
+ def sourcecode_name_parse(_node, div, name)
94
102
  div.p **{ class: "FigureTitle", align: "center" } do |p|
95
103
  p.b do |b|
96
104
  b << name.text
@@ -111,9 +119,13 @@ module IsoDoc
111
119
  end
112
120
 
113
121
  def annotation_parse(node, out)
114
- out.p **{ class: "Sourcecode" } do |li|
115
- node.children.each { |n| parse(n, li) }
122
+ @sourcecode = false
123
+ @annotation = true
124
+ out.span **{ class: "zzMoveToFollowing" } do |s|
125
+ s << "&lt;#{node.at(ns("//callout[@target='#{node['id']}']")).text}&gt; "
116
126
  end
127
+ node.children.each { |n| parse(n, out) }
128
+ @annotation = false
117
129
  end
118
130
 
119
131
  def admonition_parse(node, out)
@@ -127,29 +139,29 @@ module IsoDoc
127
139
  end
128
140
 
129
141
  def formula_where(dl, out)
130
- out.p { |p| p << "where" }
131
- parse(dl, out)
142
+ return unless dl
143
+ out.p { |p| p << @where_lbl }
144
+ parse(dl, out)
132
145
  end
133
146
 
134
147
  def formula_parse(node, out)
135
- dl = node.at(ns("./dl"))
136
148
  out.div **attr_code(id: node["id"], class: "formula") do |div|
137
149
  parse(node.at(ns("./stem")), out)
138
150
  insert_tab(div, 1)
139
- div << "(#{get_anchors()[node['id']][:label]})"
151
+ div << "(#{get_anchors[node['id']][:label]})"
140
152
  end
141
- formula_where(dl, out) if dl
153
+ formula_where(node.at(ns("./dl")), out)
142
154
  end
143
155
 
144
156
  def para_attrs(node)
145
157
  classtype = nil
146
158
  classtype = "Note" if @note
147
- # classtype = "MsoFootnoteText" if in_footnote
148
159
  classtype = "MsoCommentText" if in_comment
160
+ classtype = "Sourcecode" if @annotation
149
161
  attrs = { class: classtype, id: node["id"] }
150
162
  unless node["align"].nil?
151
163
  attrs[:align] = node["align"] unless node["align"] == "justify"
152
- attrs[:style] = "text-align:#{node["align"]}"
164
+ attrs[:style] = "text-align:#{node['align']}"
153
165
  end
154
166
  attrs
155
167
  end
@@ -169,7 +181,7 @@ module IsoDoc
169
181
  source = node.at(ns("./source"))
170
182
  out.p **{ class: "QuoteAttribution" } do |p|
171
183
  p << "&mdash; #{author.text}, " if author
172
- eref_parse(source, p)
184
+ eref_parse(source, p) if source
173
185
  end
174
186
  end
175
187
 
@@ -177,8 +189,8 @@ module IsoDoc
177
189
  attrs = para_attrs(node)
178
190
  attrs[:class] = "Quote"
179
191
  out.div **attr_code(attrs) do |p|
180
- node.children.each do
181
- |n| parse(n, p) unless ["author", "source"].include? n.name
192
+ node.children.each do |n|
193
+ parse(n, p) unless ["author", "source"].include? n.name
182
194
  end
183
195
  quote_attribution(node, out)
184
196
  end
@@ -11,8 +11,8 @@ module IsoDoc
11
11
  inline_header_cleanup(docxml)
12
12
  figure_cleanup(docxml)
13
13
  table_cleanup(docxml)
14
+ symbols_cleanup(docxml)
14
15
  admonition_cleanup(docxml)
15
- docxml
16
16
  end
17
17
 
18
18
  def admonition_cleanup(docxml)
@@ -21,12 +21,13 @@ module IsoDoc
21
21
  n = title.next_element
22
22
  n&.children&.first&.add_previous_sibling(title.text + "&mdash;")
23
23
  end
24
+ docxml
24
25
  end
25
26
 
26
27
  def figure_get_or_make_dl(t)
27
28
  dl = t.at(".//dl")
28
29
  if dl.nil?
29
- t.add_child("<p><b>Key</b></p><dl></dl>")
30
+ t.add_child("<p><b>#{@key_lbl}</b></p><dl></dl>")
30
31
  dl = t.at(".//dl")
31
32
  end
32
33
  dl
@@ -62,13 +63,12 @@ module IsoDoc
62
63
 
63
64
  def inline_header_cleanup(docxml)
64
65
  docxml.xpath('//span[@class="zzMoveToFollowing"]').each do |x|
66
+ x.delete("class")
65
67
  n = x.next_element
66
68
  if n.nil?
67
- html = Nokogiri::XML.fragment("<p></p>")
68
- html.parent = x.parent
69
- x.parent = html
69
+ x.name = "p"
70
70
  else
71
- n.children.first.add_previous_sibling(x.remove)
71
+ n.children.first.previous = x.remove
72
72
  end
73
73
  end
74
74
  end
@@ -86,10 +86,8 @@ module IsoDoc
86
86
  n&.children&.first&.add_previous_sibling(fn.remove)
87
87
  end
88
88
 
89
- TABLE_WITH_FOOTNOTES = "//table[descendant::aside]".freeze
90
-
91
89
  def table_footnote_cleanup(docxml)
92
- docxml.xpath(TABLE_WITH_FOOTNOTES).each do |t|
90
+ docxml.xpath("//table[descendant::aside]").each do |t|
93
91
  t.xpath(".//aside").each do |a|
94
92
  merge_fnref_into_fn_text(a)
95
93
  a.name = "div"
@@ -139,12 +137,38 @@ module IsoDoc
139
137
  "[ancestor::*[@class = 'Note']]").each do |p|
140
138
  p["class"] = "Note"
141
139
  end
142
-
143
140
  end
144
141
 
145
142
  def table_cleanup(docxml)
146
143
  table_footnote_cleanup(docxml)
147
144
  table_note_cleanup(docxml)
148
145
  end
146
+
147
+ # We assume AsciiMath. Indices sort after letter but before any following
148
+ # letter (x, x_m, x_1, xa); we use colon to force that sort order.
149
+ # Numbers sort *after* letters; we use thorn to force that sort order.
150
+ def symbol_key(x)
151
+ HTMLEntities.new.decode(x.text).gsub(/_/, ":").gsub(/`/, "").
152
+ gsub(/[0-9]+/, "þ\\1")
153
+ end
154
+
155
+ def extract_symbols_list(dl)
156
+ dl_out = []
157
+ dl.xpath("./dt | ./dd").each do |dtd|
158
+ if dtd.name == "dt"
159
+ dl_out << { dt: dtd.remove, key: symbol_key(dtd) }
160
+ else
161
+ dl_out.last[:dd] = dtd.remove
162
+ end
163
+ end
164
+ dl_out
165
+ end
166
+
167
+ def symbols_cleanup(docxml)
168
+ dl = docxml.at("//div[@class = 'Symbols']/dl") || return
169
+ dl_out = extract_symbols_list(dl)
170
+ dl_out.sort! { |a, b| a[:key] <=> b[:key] }
171
+ dl.replace(dl_out.map { |d| d[:dt].to_s + d[:dd].to_s }.join("\n"))
172
+ end
149
173
  end
150
174
  end
@@ -1,8 +1,6 @@
1
1
  module IsoDoc
2
2
  class Convert
3
-
4
3
  def toHTML(result, filename)
5
- # result = html_cleanup(Nokogiri::HTML(result)).to_xml
6
4
  result = from_xhtml(html_cleanup(to_xhtml(result)))
7
5
  result = populate_template(result, :html)
8
6
  File.open("#{filename}.html", "w") do |f|
@@ -11,19 +9,37 @@ module IsoDoc
11
9
  end
12
10
 
13
11
  def html_cleanup(x)
14
- footnote_backlinks(move_images(html_footnote_filter(htmlPreface(htmlstyle(x)))))
12
+ footnote_backlinks(
13
+ move_images(html_footnote_filter(html_preface(htmlstyle(x))))
14
+ )
15
15
  end
16
16
 
17
- def htmlPreface(docxml)
17
+ MATHJAX_ADDR =
18
+ "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js".freeze
19
+ MATHJAX = <<~"MATHJAX".freeze
20
+ <script type="text/x-mathjax-config">
21
+ MathJax.Hub.Config({
22
+ asciimath2jax: {
23
+ delimiters: [['OPEN', 'CLOSE']]
24
+ }
25
+ });
26
+ </script>
27
+ <script src="#{MATHJAX_ADDR}?config=AM_HTMLorMML"></script>
28
+ MATHJAX
29
+
30
+ def mathjax(open, close)
31
+ MATHJAX.gsub("OPEN", open).gsub("CLOSE", close)
32
+ end
33
+
34
+ def html_preface(docxml)
18
35
  cover = Nokogiri::HTML(File.read(@htmlcoverpage, encoding: "UTF-8"))
19
36
  d = docxml.at('//div[@class="WordSection1"]')
20
- d.children.first.add_previous_sibling cover.to_xml(encoding: 'US-ASCII')
37
+ d.children.first.add_previous_sibling cover.to_xml(encoding: "US-ASCII")
21
38
  cover = Nokogiri::HTML(File.read(@htmlintropage, encoding: "UTF-8"))
22
39
  d = docxml.at('//div[@class="WordSection2"]')
23
- d.children.first.add_previous_sibling cover.to_xml(encoding: 'US-ASCII')
24
- body = docxml.at("//*[local-name() = 'body']")
25
- body << '<script src="https://cdn.mathjax.org/mathjax/latest/'\
26
- 'MathJax.js?config=AM_HTMLorMML"></script>'
40
+ d.children.first.add_previous_sibling cover.to_xml(encoding: "US-ASCII")
41
+ docxml.at("//*[local-name() = 'body']") << mathjax(@openmathdelim,
42
+ @closemathdelim)
27
43
  docxml
28
44
  end
29
45
 
@@ -45,8 +61,7 @@ module IsoDoc
45
61
  docxml
46
62
  end
47
63
 
48
- def update_footnote_filter(docxml, x, i, seen)
49
- fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || return
64
+ def update_footnote_filter(fn, x, i, seen)
50
65
  if seen[fn.text]
51
66
  x.at("./sup").content = seen[fn.text][:num].to_s
52
67
  fn.remove unless x["href"] == seen[fn.text][:href]
@@ -63,7 +78,8 @@ module IsoDoc
63
78
  seen = {}
64
79
  i = 1
65
80
  docxml.xpath('//a[@epub:type = "footnote"]').each do |x|
66
- i, seen = update_footnote_filter(docxml, x, i, seen)
81
+ fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
82
+ i, seen = update_footnote_filter(fn, x, i, seen)
67
83
  end
68
84
  docxml
69
85
  end
@@ -73,22 +89,21 @@ module IsoDoc
73
89
  docxml.xpath('//a[@epub:type = "footnote"]').each_with_index do |x, i|
74
90
  next if seen[x["href"]]
75
91
  seen[x["href"]] = true
76
- sup = x.at("./sup").text
77
92
  fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
78
93
  x["id"] || x["id"] = "_footnote#{i + 1}"
79
- fn.elements.first.children.first.
80
- add_previous_sibling("<a href='##{x['id']}'>#{sup}) </a>")
94
+ fn.elements.first.children.first.previous =
95
+ "<a href='##{x['id']}'>#{x.at('./sup').text}) </a>"
81
96
  end
82
97
  docxml
83
98
  end
84
99
 
100
+ # presupposes that the image source is local
85
101
  def move_images(docxml)
86
102
  system "rm -r _images; mkdir _images"
87
103
  docxml.xpath("//*[local-name() = 'img']").each do |i|
88
104
  matched = /\.(?<suffix>\S+)$/.match i["src"]
89
105
  uuid = UUIDTools::UUID.random_create.to_s
90
106
  new_full_filename = File.join("_images", "#{uuid}.#{matched[:suffix]}")
91
- # presupposes that the image source is local
92
107
  system "cp #{i['src']} #{new_full_filename}"
93
108
  i["src"] = new_full_filename
94
109
  i["width"], i["height"] = Html2Doc.image_resize(i, 800, 1200)