isodoc 0.4.5 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 92ad102a0e0bc53916aaec5d7a4f23d71828f3d4
4
- data.tar.gz: 7dbbd1969e0e1714c9074ac455a335939f634335
3
+ metadata.gz: 32b644d32aaf9d2170de736e389cec4a7b0b1a4d
4
+ data.tar.gz: afc5f22b859a533e4244758b80a3f4acc9658924
5
5
  SHA512:
6
- metadata.gz: 1b570dec93a170716366cf0d433d20dc614026ebe2224b02e659216fccf006b1680a1126e763a953f8d61c8154a0c9b133dfd989b05e25e675b5b3aa1f10d980
7
- data.tar.gz: b9499b701f2eaca4a27d71ff85ff27926bd58e43a2b255dd429180eaf8b8c72704f8ba3768b2eb894f41128136f887308e6f2a29e89c000b7f02cf2dcb391e51
6
+ metadata.gz: 22698380e197c7f94fa0e690dfa181bc2312aa130aeab631fa6cd8ca515c1cd6c312b3f37b56823eef43d079c24ee1c2b539173c4dcd5f13c156dec975fdfce8
7
+ data.tar.gz: 6b95287839fe12f19e777cfdf62bc06b317247e79b1f6cf084b4ab6b938b561d763aebf9f6ec7bcd0ce280a3cea19ded061cd2fbb8e086c5591221ea9c768a4e
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'rspec' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require "pathname"
10
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path(
11
+ "../../Gemfile", Pathname.new(__FILE__).realpath
12
+ )
13
+
14
+ require "rubygems"
15
+ require "bundler/setup"
16
+
17
+ load Gem.bin_path("rspec-core", "rspec")
18
+
@@ -32,7 +32,7 @@ Gem::Specification.new do |spec|
32
32
  spec.add_dependency "htmlentities", "~> 4.3.4"
33
33
  spec.add_dependency "image_size"
34
34
  spec.add_dependency "mime-types"
35
- spec.add_dependency "nokogiri", "= 1.8.1"
35
+ spec.add_dependency "nokogiri"
36
36
  spec.add_dependency "ruby-xslt"
37
37
  spec.add_dependency "thread_safe"
38
38
  spec.add_dependency "uuidtools"
@@ -23,6 +23,7 @@ require_relative "isodoc/inline"
23
23
  require_relative "isodoc/notes"
24
24
  require_relative "isodoc/xref_gen"
25
25
  require_relative "isodoc/html"
26
+ require_relative "isodoc/i18n"
26
27
  require "pp"
27
28
 
28
29
  module IsoDoc
@@ -36,6 +37,9 @@ module IsoDoc
36
37
  # wordcoverpage: Cover page for Word
37
38
  # htmlintropage: Introductory page for HTML
38
39
  # wordintropage: Introductory page for Word
40
+ # i18nyaml: YAML file for internationalisation of text
41
+ # ulstyle: list style in Word CSS for unordered lists
42
+ # olstyle: list style in Word CSS for ordered lists
39
43
  def initialize(options)
40
44
  @htmlstylesheet = options[:htmlstylesheet]
41
45
  @wordstylesheet = options[:wordstylesheet]
@@ -45,6 +49,9 @@ module IsoDoc
45
49
  @wordcoverpage = options[:wordcoverpage]
46
50
  @htmlintropage = options[:htmlintropage]
47
51
  @wordintropage = options[:wordintropage]
52
+ @i18nyaml = options[:i18nyaml]
53
+ @ulstyle = options[:ulstyle]
54
+ @olstyle = options[:olstyle]
48
55
  @termdomain = ""
49
56
  @termexample = false
50
57
  @note = false
@@ -59,19 +66,41 @@ module IsoDoc
59
66
  @in_table = false
60
67
  @in_figure = false
61
68
  @seen_footnote = Set.new
69
+ @c = HTMLEntities.new
70
+ @openmathdelim = "`"
71
+ @closemathdelim = "`"
72
+ @lang = "en"
73
+ @script = "Latn"
62
74
  end
63
75
 
64
- def convert(filename)
65
- docxml = Nokogiri::XML(File.read(filename))
66
- filename, dir = init_file(filename)
67
- docxml.root.default_namespace = ""
68
- result = noko do |xml|
76
+ def convert1(docxml, filename, dir)
77
+ noko do |xml|
69
78
  xml.html do |html|
70
79
  html.parent.add_namespace("epub", "http://www.idpf.org/2007/ops")
71
80
  html_header(html, docxml, filename, dir)
72
81
  make_body(html, docxml)
73
82
  end
74
83
  end.join("\n")
84
+ end
85
+
86
+ def convert_init(file, filename, debug)
87
+ docxml = Nokogiri::XML(file)
88
+ filename, dir = init_file(filename, debug)
89
+ docxml.root.default_namespace = ""
90
+ i18n_init(docxml&.at(ns("//bibdata/language"))&.text || "en",
91
+ docxml&.at(ns("//bibdata/script"))&.text || "Latn")
92
+ [docxml, filename, dir]
93
+ end
94
+
95
+ def convert(filename, debug = false)
96
+ convert_file(File.read(filename), filename, debug)
97
+ end
98
+
99
+ def convert_file(file, filename, debug)
100
+ @openmathdelim, @closemathdelim = extract_delims(file)
101
+ docxml, filename, dir = convert_init(file, filename, debug)
102
+ result = convert1(docxml, filename, dir)
103
+ return result if debug
75
104
  postprocess(result, filename, dir)
76
105
  end
77
106
  end
@@ -1,30 +1,11 @@
1
1
  module IsoDoc
2
2
  class Convert
3
- #attr_accessor :termdomain, :termexample, :sourcecode, :note
4
- def set_termdomain(termdomain)
5
- @termdomain = termdomain
6
- end
7
-
8
- def get_termexample
9
- @termexample
10
- end
11
-
12
- def set_termexample(value)
13
- @termexample = value
14
- end
15
-
16
- def in_sourcecode
17
- @sourcecode
18
- end
19
-
20
- def is_note
21
- @note
22
- end
3
+ @annotation = false
23
4
 
24
5
  def note_label(node)
25
- n = get_anchors()[node["id"]]
26
- return "NOTE" if n.nil?
27
- n[:label]
6
+ n = get_anchors[node["id"]]
7
+ return @note_lbl if n.nil? || n[:label].empty?
8
+ l10n("#{@note_lbl} #{n[:label]}")
28
9
  end
29
10
 
30
11
  def note_p_parse(node, div)
@@ -36,17 +17,21 @@ module IsoDoc
36
17
  node.element_children[1..-1].each { |n| parse(n, div) }
37
18
  end
38
19
 
20
+ def note_parse1(node, div)
21
+ div.p **{ class: "Note" } do |p|
22
+ p << note_label(node)
23
+ insert_tab(p, 1)
24
+ end
25
+ node.children.each { |n| parse(n, div) }
26
+ end
27
+
39
28
  def note_parse(node, out)
40
29
  @note = true
41
30
  out.div **{ id: node["id"], class: "Note" } do |div|
42
31
  if node.first_element_child.name == "p"
43
32
  note_p_parse(node, div)
44
33
  else
45
- div.p **{ class: "Note" } do |p|
46
- p << note_label(node)
47
- insert_tab(p, 1)
48
- end
49
- node.children.each { |n| parse(n, div) }
34
+ note_parse1(node, div)
50
35
  end
51
36
  end
52
37
  @note = false
@@ -55,15 +40,15 @@ module IsoDoc
55
40
  def figure_name_parse(node, div, name)
56
41
  div.p **{ class: "FigureTitle", align: "center" } do |p|
57
42
  p.b do |b|
58
- b << "#{get_anchors()[node['id']][:label]}&nbsp;&mdash; "
59
- b << name.text
43
+ b << l10n("#{@figure_lbl} #{get_anchors[node['id']][:label]}")
44
+ b << "&nbsp;&mdash; #{name.text}" if name
60
45
  end
61
46
  end
62
47
  end
63
48
 
64
49
  def figure_key(out)
65
- out.p do |p|
66
- p.b { |b| b << "Key" }
50
+ out.p do |p|
51
+ p.b { |b| b << @key_lbl }
67
52
  end
68
53
  end
69
54
 
@@ -80,17 +65,40 @@ module IsoDoc
80
65
  @in_figure = false
81
66
  end
82
67
 
83
- def example_parse(node, out)
84
- name = node.at(ns("./name"))
85
- out.div **attr_code(id: node["id"], class: "figure") do |div|
68
+ def example_label(node)
69
+ n = get_anchors[node["id"]]
70
+ return @example_lbl if n.nil? || n[:label].empty?
71
+ l10n("#{@example_lbl} #{n[:label]}")
72
+ end
73
+
74
+ EXAMPLE_TBL_ATTR =
75
+ { width: "110pt", valign: "top",
76
+ style: "width:82.8pt;padding:.75pt .75pt .75pt .75pt" }.freeze
77
+
78
+ # used if we are boxing examples
79
+ def example_div_parse(node, out)
80
+ out.div **attr_code(id: node["id"], class: "example") do |div|
81
+ out.p { |p| p << example_label(node) }
86
82
  node.children.each do |n|
87
- parse(n, div) unless n.name == "name"
83
+ parse(n, div)
88
84
  end
89
- figure_name_parse(node, div, name) if name
90
85
  end
91
86
  end
92
87
 
93
- def sourcecode_name_parse(node, div, name)
88
+ def example_parse(node, out)
89
+ out.table **attr_code(id: node["id"], class: "example") do |t|
90
+ t.tr do |tr|
91
+ tr.td **EXAMPLE_TBL_ATTR do |td|
92
+ td << example_label(node)
93
+ end
94
+ tr.td **{ valign: "top" } do |td|
95
+ node.children.each { |n| parse(n, td) }
96
+ end
97
+ end
98
+ end
99
+ end
100
+
101
+ def sourcecode_name_parse(_node, div, name)
94
102
  div.p **{ class: "FigureTitle", align: "center" } do |p|
95
103
  p.b do |b|
96
104
  b << name.text
@@ -111,9 +119,13 @@ module IsoDoc
111
119
  end
112
120
 
113
121
  def annotation_parse(node, out)
114
- out.p **{ class: "Sourcecode" } do |li|
115
- node.children.each { |n| parse(n, li) }
122
+ @sourcecode = false
123
+ @annotation = true
124
+ out.span **{ class: "zzMoveToFollowing" } do |s|
125
+ s << "&lt;#{node.at(ns("//callout[@target='#{node['id']}']")).text}&gt; "
116
126
  end
127
+ node.children.each { |n| parse(n, out) }
128
+ @annotation = false
117
129
  end
118
130
 
119
131
  def admonition_parse(node, out)
@@ -127,29 +139,29 @@ module IsoDoc
127
139
  end
128
140
 
129
141
  def formula_where(dl, out)
130
- out.p { |p| p << "where" }
131
- parse(dl, out)
142
+ return unless dl
143
+ out.p { |p| p << @where_lbl }
144
+ parse(dl, out)
132
145
  end
133
146
 
134
147
  def formula_parse(node, out)
135
- dl = node.at(ns("./dl"))
136
148
  out.div **attr_code(id: node["id"], class: "formula") do |div|
137
149
  parse(node.at(ns("./stem")), out)
138
150
  insert_tab(div, 1)
139
- div << "(#{get_anchors()[node['id']][:label]})"
151
+ div << "(#{get_anchors[node['id']][:label]})"
140
152
  end
141
- formula_where(dl, out) if dl
153
+ formula_where(node.at(ns("./dl")), out)
142
154
  end
143
155
 
144
156
  def para_attrs(node)
145
157
  classtype = nil
146
158
  classtype = "Note" if @note
147
- # classtype = "MsoFootnoteText" if in_footnote
148
159
  classtype = "MsoCommentText" if in_comment
160
+ classtype = "Sourcecode" if @annotation
149
161
  attrs = { class: classtype, id: node["id"] }
150
162
  unless node["align"].nil?
151
163
  attrs[:align] = node["align"] unless node["align"] == "justify"
152
- attrs[:style] = "text-align:#{node["align"]}"
164
+ attrs[:style] = "text-align:#{node['align']}"
153
165
  end
154
166
  attrs
155
167
  end
@@ -169,7 +181,7 @@ module IsoDoc
169
181
  source = node.at(ns("./source"))
170
182
  out.p **{ class: "QuoteAttribution" } do |p|
171
183
  p << "&mdash; #{author.text}, " if author
172
- eref_parse(source, p)
184
+ eref_parse(source, p) if source
173
185
  end
174
186
  end
175
187
 
@@ -177,8 +189,8 @@ module IsoDoc
177
189
  attrs = para_attrs(node)
178
190
  attrs[:class] = "Quote"
179
191
  out.div **attr_code(attrs) do |p|
180
- node.children.each do
181
- |n| parse(n, p) unless ["author", "source"].include? n.name
192
+ node.children.each do |n|
193
+ parse(n, p) unless ["author", "source"].include? n.name
182
194
  end
183
195
  quote_attribution(node, out)
184
196
  end
@@ -11,8 +11,8 @@ module IsoDoc
11
11
  inline_header_cleanup(docxml)
12
12
  figure_cleanup(docxml)
13
13
  table_cleanup(docxml)
14
+ symbols_cleanup(docxml)
14
15
  admonition_cleanup(docxml)
15
- docxml
16
16
  end
17
17
 
18
18
  def admonition_cleanup(docxml)
@@ -21,12 +21,13 @@ module IsoDoc
21
21
  n = title.next_element
22
22
  n&.children&.first&.add_previous_sibling(title.text + "&mdash;")
23
23
  end
24
+ docxml
24
25
  end
25
26
 
26
27
  def figure_get_or_make_dl(t)
27
28
  dl = t.at(".//dl")
28
29
  if dl.nil?
29
- t.add_child("<p><b>Key</b></p><dl></dl>")
30
+ t.add_child("<p><b>#{@key_lbl}</b></p><dl></dl>")
30
31
  dl = t.at(".//dl")
31
32
  end
32
33
  dl
@@ -62,13 +63,12 @@ module IsoDoc
62
63
 
63
64
  def inline_header_cleanup(docxml)
64
65
  docxml.xpath('//span[@class="zzMoveToFollowing"]').each do |x|
66
+ x.delete("class")
65
67
  n = x.next_element
66
68
  if n.nil?
67
- html = Nokogiri::XML.fragment("<p></p>")
68
- html.parent = x.parent
69
- x.parent = html
69
+ x.name = "p"
70
70
  else
71
- n.children.first.add_previous_sibling(x.remove)
71
+ n.children.first.previous = x.remove
72
72
  end
73
73
  end
74
74
  end
@@ -86,10 +86,8 @@ module IsoDoc
86
86
  n&.children&.first&.add_previous_sibling(fn.remove)
87
87
  end
88
88
 
89
- TABLE_WITH_FOOTNOTES = "//table[descendant::aside]".freeze
90
-
91
89
  def table_footnote_cleanup(docxml)
92
- docxml.xpath(TABLE_WITH_FOOTNOTES).each do |t|
90
+ docxml.xpath("//table[descendant::aside]").each do |t|
93
91
  t.xpath(".//aside").each do |a|
94
92
  merge_fnref_into_fn_text(a)
95
93
  a.name = "div"
@@ -139,12 +137,38 @@ module IsoDoc
139
137
  "[ancestor::*[@class = 'Note']]").each do |p|
140
138
  p["class"] = "Note"
141
139
  end
142
-
143
140
  end
144
141
 
145
142
  def table_cleanup(docxml)
146
143
  table_footnote_cleanup(docxml)
147
144
  table_note_cleanup(docxml)
148
145
  end
146
+
147
+ # We assume AsciiMath. Indices sort after letter but before any following
148
+ # letter (x, x_m, x_1, xa); we use colon to force that sort order.
149
+ # Numbers sort *after* letters; we use thorn to force that sort order.
150
+ def symbol_key(x)
151
+ HTMLEntities.new.decode(x.text).gsub(/_/, ":").gsub(/`/, "").
152
+ gsub(/[0-9]+/, "þ\\1")
153
+ end
154
+
155
+ def extract_symbols_list(dl)
156
+ dl_out = []
157
+ dl.xpath("./dt | ./dd").each do |dtd|
158
+ if dtd.name == "dt"
159
+ dl_out << { dt: dtd.remove, key: symbol_key(dtd) }
160
+ else
161
+ dl_out.last[:dd] = dtd.remove
162
+ end
163
+ end
164
+ dl_out
165
+ end
166
+
167
+ def symbols_cleanup(docxml)
168
+ dl = docxml.at("//div[@class = 'Symbols']/dl") || return
169
+ dl_out = extract_symbols_list(dl)
170
+ dl_out.sort! { |a, b| a[:key] <=> b[:key] }
171
+ dl.replace(dl_out.map { |d| d[:dt].to_s + d[:dd].to_s }.join("\n"))
172
+ end
149
173
  end
150
174
  end
@@ -1,8 +1,6 @@
1
1
  module IsoDoc
2
2
  class Convert
3
-
4
3
  def toHTML(result, filename)
5
- # result = html_cleanup(Nokogiri::HTML(result)).to_xml
6
4
  result = from_xhtml(html_cleanup(to_xhtml(result)))
7
5
  result = populate_template(result, :html)
8
6
  File.open("#{filename}.html", "w") do |f|
@@ -11,19 +9,37 @@ module IsoDoc
11
9
  end
12
10
 
13
11
  def html_cleanup(x)
14
- footnote_backlinks(move_images(html_footnote_filter(htmlPreface(htmlstyle(x)))))
12
+ footnote_backlinks(
13
+ move_images(html_footnote_filter(html_preface(htmlstyle(x))))
14
+ )
15
15
  end
16
16
 
17
- def htmlPreface(docxml)
17
+ MATHJAX_ADDR =
18
+ "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js".freeze
19
+ MATHJAX = <<~"MATHJAX".freeze
20
+ <script type="text/x-mathjax-config">
21
+ MathJax.Hub.Config({
22
+ asciimath2jax: {
23
+ delimiters: [['OPEN', 'CLOSE']]
24
+ }
25
+ });
26
+ </script>
27
+ <script src="#{MATHJAX_ADDR}?config=AM_HTMLorMML"></script>
28
+ MATHJAX
29
+
30
+ def mathjax(open, close)
31
+ MATHJAX.gsub("OPEN", open).gsub("CLOSE", close)
32
+ end
33
+
34
+ def html_preface(docxml)
18
35
  cover = Nokogiri::HTML(File.read(@htmlcoverpage, encoding: "UTF-8"))
19
36
  d = docxml.at('//div[@class="WordSection1"]')
20
- d.children.first.add_previous_sibling cover.to_xml(encoding: 'US-ASCII')
37
+ d.children.first.add_previous_sibling cover.to_xml(encoding: "US-ASCII")
21
38
  cover = Nokogiri::HTML(File.read(@htmlintropage, encoding: "UTF-8"))
22
39
  d = docxml.at('//div[@class="WordSection2"]')
23
- d.children.first.add_previous_sibling cover.to_xml(encoding: 'US-ASCII')
24
- body = docxml.at("//*[local-name() = 'body']")
25
- body << '<script src="https://cdn.mathjax.org/mathjax/latest/'\
26
- 'MathJax.js?config=AM_HTMLorMML"></script>'
40
+ d.children.first.add_previous_sibling cover.to_xml(encoding: "US-ASCII")
41
+ docxml.at("//*[local-name() = 'body']") << mathjax(@openmathdelim,
42
+ @closemathdelim)
27
43
  docxml
28
44
  end
29
45
 
@@ -45,8 +61,7 @@ module IsoDoc
45
61
  docxml
46
62
  end
47
63
 
48
- def update_footnote_filter(docxml, x, i, seen)
49
- fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || return
64
+ def update_footnote_filter(fn, x, i, seen)
50
65
  if seen[fn.text]
51
66
  x.at("./sup").content = seen[fn.text][:num].to_s
52
67
  fn.remove unless x["href"] == seen[fn.text][:href]
@@ -63,7 +78,8 @@ module IsoDoc
63
78
  seen = {}
64
79
  i = 1
65
80
  docxml.xpath('//a[@epub:type = "footnote"]').each do |x|
66
- i, seen = update_footnote_filter(docxml, x, i, seen)
81
+ fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
82
+ i, seen = update_footnote_filter(fn, x, i, seen)
67
83
  end
68
84
  docxml
69
85
  end
@@ -73,22 +89,21 @@ module IsoDoc
73
89
  docxml.xpath('//a[@epub:type = "footnote"]').each_with_index do |x, i|
74
90
  next if seen[x["href"]]
75
91
  seen[x["href"]] = true
76
- sup = x.at("./sup").text
77
92
  fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
78
93
  x["id"] || x["id"] = "_footnote#{i + 1}"
79
- fn.elements.first.children.first.
80
- add_previous_sibling("<a href='##{x['id']}'>#{sup}) </a>")
94
+ fn.elements.first.children.first.previous =
95
+ "<a href='##{x['id']}'>#{x.at('./sup').text}) </a>"
81
96
  end
82
97
  docxml
83
98
  end
84
99
 
100
+ # presupposes that the image source is local
85
101
  def move_images(docxml)
86
102
  system "rm -r _images; mkdir _images"
87
103
  docxml.xpath("//*[local-name() = 'img']").each do |i|
88
104
  matched = /\.(?<suffix>\S+)$/.match i["src"]
89
105
  uuid = UUIDTools::UUID.random_create.to_s
90
106
  new_full_filename = File.join("_images", "#{uuid}.#{matched[:suffix]}")
91
- # presupposes that the image source is local
92
107
  system "cp #{i['src']} #{new_full_filename}"
93
108
  i["src"] = new_full_filename
94
109
  i["width"], i["height"] = Html2Doc.image_resize(i, 800, 1200)