dbtoepub 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/bin/dbtoepub +76 -0
  3. data/lib/docbook.rb +227 -0
  4. metadata +46 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d62171d3ad82dff798473353b3b55322c6ce7afb
4
+ data.tar.gz: 7bb725b3ea583d42bfcdede5957546dc4591eeec
5
+ SHA512:
6
+ metadata.gz: f28ea47865935fcd124d13b4e54d50eedbe9a53480626792dec14803afcb073a273cfd386b6a277cbd1138117b686c0936b4441068aca88f96be68eefede5535
7
+ data.tar.gz: 555e7668a6f87edae840b9ce5868dfff8b351a760eb9b06bb211486dfb7a1995f8250767356377a780015460f76acdb9ebcf7fb167a6cd65034ab58c55f1479d
data/bin/dbtoepub ADDED
@@ -0,0 +1,76 @@
1
+ #!/usr/bin/env ruby
2
+ # This program converts DocBook documents into .epub files.
3
+ #
4
+ # Usage: dbtoepub [OPTIONS] [DocBook Files]
5
+ #
6
+ # .epub is defined by the IDPF at www.idpf.org and is made up of 3 standards:
7
+ # - Open Publication Structure (OPS)
8
+ # - Open Packaging Format (OPF)
9
+ # - Open Container Format (OCF)
10
+ #
11
+ # Specific options:
12
+ # -c, --css [FILE] Use FILE for CSS on generated XHTML.
13
+ # -d, --debug Show debugging output.
14
+ # -f, --font [OTF FILE] Embed OTF FILE in .epub.
15
+ # -h, --help Display usage info.
16
+ # -s, --stylesheet [XSL FILE] Use XSL FILE as a customization
17
+ # layer (imports epub/docbook.xsl).
18
+ # -v, --verbose Make output verbose.
19
+
20
+ lib = File.expand_path(File.join(File.dirname(__FILE__), 'lib'))
21
+ $LOAD_PATH.unshift(lib) if File.exist?(lib)
22
+
23
+ require 'fileutils'
24
+ require 'optparse'
25
+ require 'tmpdir'
26
+
27
+ require 'docbook'
28
+
29
+ verbose = false
30
+ debug = false
31
+ css_file = nil
32
+ otf_files = []
33
+ customization_layer = nil
34
+ output_file = nil
35
+
36
+ #$DEBUG=true
37
+
38
+ # Set up the OptionParser
39
+ opts = OptionParser.new
40
+ opts.banner = "Usage: #{File.basename($0)} [OPTIONS] [DocBook Files]
41
+
42
+ #{File.basename($0)} converts DocBook <book> and <article>s into to .epub files.
43
+
44
+ .epub is defined by the IDPF at www.idpf.org and is made up of 3 standards:
45
+ - Open Publication Structure (OPS)
46
+ - Open Packaging Format (OPF)
47
+ - Open Container Format (OCF)
48
+
49
+ Specific options:"
50
+ opts.on("-c", "--css [FILE]", "Use FILE for CSS on generated XHTML.") {|f| css_file = f}
51
+ opts.on("-d", "--debug", "Show debugging output.") {debug = true; verbose = true}
52
+ opts.on("-f", "--font [OTF FILE]", "Embed OTF FILE in .epub.") {|f| otf_files << f}
53
+ opts.on("-h", "--help", "Display usage info.") {puts opts.to_s; exit 0}
54
+ opts.on("-o", "--output [OUTPUT FILE]", "Output ePub file as OUTPUT FILE.") {|f| output_file = f}
55
+ opts.on("-s", "--stylesheet [XSL FILE]", "Use XSL FILE as a customization layer (imports epub/docbook.xsl).") {|f| customization_layer = f}
56
+ opts.on("-v", "--verbose", "Make output verbose.") {verbose = true}
57
+
58
+ db_files = opts.parse(ARGV)
59
+ if db_files.size == 0
60
+ puts opts.to_s
61
+ exit 0
62
+ end
63
+
64
+ db_files.each {|docbook_file|
65
+ dir = File.expand_path(File.join(Dir.tmpdir, ".epubtmp#{Time.now.to_f.to_s}"))
66
+ FileUtils.mkdir_p(dir)
67
+ e = DocBook::Epub.new(docbook_file, dir, css_file, customization_layer, otf_files)
68
+
69
+ if output_file
70
+ epub_file = output_file
71
+ else
72
+ epub_file = File.basename(docbook_file, ".xml") + ".epub"
73
+ end
74
+ puts "Rendering DocBook file #{docbook_file} to #{epub_file}" if verbose
75
+ e.render_to_file(epub_file)
76
+ }
data/lib/docbook.rb ADDED
@@ -0,0 +1,227 @@
1
+ require 'fileutils'
2
+ require 'rexml/parsers/pullparser'
3
+
4
+ module DocBook
5
+
6
+ class Epub
7
+ CHECKER = "epubcheck"
8
+ STYLESHEET = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "docbook.xsl"))
9
+ CALLOUT_PATH = File.join('images', 'callouts')
10
+ CALLOUT_FULL_PATH = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', '..', CALLOUT_PATH))
11
+ CALLOUT_LIMIT = 15
12
+ CALLOUT_EXT = ".png"
13
+ XSLT_PROCESSOR = "xsltproc"
14
+ OUTPUT_DIR = ".epubtmp#{Time.now.to_f.to_s}"
15
+ MIMETYPE = "application/epub+zip"
16
+ META_DIR = "META-INF"
17
+ OEBPS_DIR = "OEBPS"
18
+ ZIPPER = "zip"
19
+
20
+ attr_reader :output_dir
21
+
22
+ def initialize(docbook_file, output_dir=OUTPUT_DIR, css_file=nil, customization_layer=nil, embedded_fonts=[])
23
+ @docbook_file = docbook_file
24
+ @output_dir = output_dir
25
+ @meta_dir = File.join(@output_dir, META_DIR)
26
+ @oebps_dir = File.join(@output_dir, OEBPS_DIR)
27
+ @css_file = css_file ? File.expand_path(css_file) : css_file
28
+ @embedded_fonts = embedded_fonts
29
+ @to_delete = []
30
+
31
+ if customization_layer
32
+ @stylesheet = File.expand_path(customization_layer)
33
+ else
34
+ @stylesheet = STYLESHEET
35
+ end
36
+
37
+ unless File.exist?(@docbook_file)
38
+ raise ArgumentError.new("File #{@docbook_file} does not exist")
39
+ end
40
+ end
41
+
42
+ def render_to_file(output_file, verbose=false)
43
+ render_to_epub(output_file, verbose)
44
+ bundle_epub(output_file, verbose)
45
+ cleanup_files(@to_delete)
46
+ end
47
+
48
+ def self.invalid?(file)
49
+ # Obnoxiously, we can't just check for a non-zero output...
50
+ cmd = %Q(#{CHECKER} "#{file}")
51
+ output = `#{cmd} 2>&1`
52
+
53
+ if $?.to_i == 0
54
+ return false
55
+ else
56
+ STDERR.puts output if $DEBUG
57
+ return output
58
+ end
59
+ end
60
+
61
+ private
62
+ def render_to_epub(output_file, verbose)
63
+ @collapsed_docbook_file = collapse_docbook()
64
+
65
+ chunk_quietly = "--stringparam chunk.quietly " + (verbose ? '0' : '1')
66
+ callout_path = "--stringparam callout.graphics.path #{CALLOUT_PATH}/"
67
+ callout_limit = "--stringparam callout.graphics.number.limit #{CALLOUT_LIMIT}"
68
+ callout_ext = "--stringparam callout.graphics.extension #{CALLOUT_EXT}"
69
+ html_stylesheet = "--stringparam html.stylesheet #{File.basename(@css_file)}" if @css_file
70
+ base = "--stringparam base.dir #{OEBPS_DIR}/"
71
+ unless @embedded_fonts.empty?
72
+ embedded_fonts = @embedded_fonts.map {|f| File.basename(f)}.join(',')
73
+ font = "--stringparam epub.embedded.fonts \"#{embedded_fonts}\""
74
+ end
75
+ meta = "--stringparam epub.metainf.dir #{META_DIR}/"
76
+ oebps = "--stringparam epub.oebps.dir #{OEBPS_DIR}/"
77
+ options = [chunk_quietly,
78
+ callout_path,
79
+ callout_limit,
80
+ callout_ext,
81
+ base,
82
+ font,
83
+ meta,
84
+ oebps,
85
+ html_stylesheet,
86
+ ].join(" ")
87
+ # Double-quote stylesheet & file to help Windows cmd.exe
88
+ db2epub_cmd = %Q(cd "#{@output_dir}" && #{XSLT_PROCESSOR} #{options} "#{@stylesheet}" "#{@collapsed_docbook_file}")
89
+ STDERR.puts db2epub_cmd if $DEBUG
90
+ success = system(db2epub_cmd)
91
+ raise "Could not render as .epub to #{output_file} (#{db2epub_cmd})" unless success
92
+ @to_delete << Dir["#{@meta_dir}/*"]
93
+ @to_delete << Dir["#{@oebps_dir}/*"]
94
+ end
95
+
96
+ def bundle_epub(output_file, verbose)
97
+
98
+ quiet = verbose ? "" : "-q"
99
+ mimetype_filename = write_mimetype()
100
+ meta = File.basename(@meta_dir)
101
+ oebps = File.basename(@oebps_dir)
102
+ images = copy_images()
103
+ csses = copy_csses()
104
+ fonts = copy_fonts()
105
+ callouts = copy_callouts()
106
+ # zip -X -r ../book.epub mimetype META-INF OEBPS
107
+ # Double-quote stylesheet & file to help Windows cmd.exe
108
+ zip_cmd = %Q(cd "#{@output_dir}" && #{ZIPPER} #{quiet} -X -r "#{File.expand_path(output_file)}" "#{mimetype_filename}" "#{meta}" "#{oebps}")
109
+ puts zip_cmd if $DEBUG
110
+ success = system(zip_cmd)
111
+ raise "Could not bundle into .epub file to #{output_file}" unless success
112
+ end
113
+
114
+ # Input must be collapsed because REXML couldn't find figures in files that
115
+ # were XIncluded or added by ENTITY
116
+ # http://sourceforge.net/tracker/?func=detail&aid=2750442&group_id=21935&atid=373747
117
+ def collapse_docbook
118
+ # Double-quote stylesheet & file to help Windows cmd.exe
119
+ collapsed_file = File.join(File.expand_path(File.dirname(@docbook_file)),
120
+ '.collapsed.' + File.basename(@docbook_file))
121
+ entity_collapse_command = %Q(xmllint --loaddtd --noent -o "#{collapsed_file}" "#{@docbook_file}")
122
+ entity_success = system(entity_collapse_command)
123
+ raise "Could not collapse named entites in #{@docbook_file}" unless entity_success
124
+
125
+ xinclude_collapse_command = %Q(xmllint --xinclude -o "#{collapsed_file}" "#{collapsed_file}")
126
+ xinclude_success = system(xinclude_collapse_command)
127
+ raise "Could not collapse XIncludes in #{@docbook_file}" unless xinclude_success
128
+
129
+ @to_delete << collapsed_file
130
+ return collapsed_file
131
+ end
132
+
133
+ def copy_callouts
134
+ new_callout_images = []
135
+ if has_callouts?
136
+ calloutglob = "#{CALLOUT_FULL_PATH}/*#{CALLOUT_EXT}"
137
+ Dir.glob(calloutglob).each {|img|
138
+ img_new_filename = File.join(@oebps_dir, CALLOUT_PATH, File.basename(img))
139
+
140
+ # TODO: What to rescue for these two?
141
+ FileUtils.mkdir_p(File.dirname(img_new_filename))
142
+ FileUtils.cp(img, img_new_filename)
143
+ @to_delete << img_new_filename
144
+ new_callout_images << img
145
+ }
146
+ end
147
+ return new_callout_images
148
+ end
149
+
150
+ def copy_fonts
151
+ new_fonts = []
152
+ @embedded_fonts.each {|font_file|
153
+ font_new_filename = File.join(@oebps_dir, File.basename(font_file))
154
+ FileUtils.cp(font_file, font_new_filename)
155
+ new_fonts << font_file
156
+ }
157
+ return new_fonts
158
+ end
159
+
160
+ def copy_csses
161
+ if @css_file
162
+ css_new_filename = File.join(@oebps_dir, File.basename(@css_file))
163
+ FileUtils.cp(@css_file, css_new_filename)
164
+ end
165
+ end
166
+
167
+ def copy_images
168
+ image_references = get_image_refs()
169
+ new_images = []
170
+ image_references.each {|img|
171
+ # TODO: It'd be cooler if we had a filetype lookup rather than just
172
+ # extension
173
+ if img =~ /\.(svg|png|gif|jpe?g|xml)/i
174
+ img_new_filename = File.join(@oebps_dir, img)
175
+ img_full = File.join(File.expand_path(File.dirname(@docbook_file)), img)
176
+
177
+ # TODO: What to rescue for these two?
178
+ FileUtils.mkdir_p(File.dirname(img_new_filename))
179
+ puts(img_full + ": " + img_new_filename) if $DEBUG
180
+ FileUtils.cp(img_full, img_new_filename)
181
+ @to_delete << img_new_filename
182
+ new_images << img_full
183
+ end
184
+ }
185
+ return new_images
186
+ end
187
+
188
+ def write_mimetype
189
+ mimetype_filename = File.join(@output_dir, "mimetype")
190
+ File.open(mimetype_filename, "w") {|f| f.print MIMETYPE}
191
+ @to_delete << mimetype_filename
192
+ return File.basename(mimetype_filename)
193
+ end
194
+
195
+ def cleanup_files(file_list)
196
+ file_list.flatten.each {|f|
197
+ # Yikes
198
+ FileUtils.rm_r(f, :force => true )
199
+ }
200
+ end
201
+
202
+ # Returns an Array of all of the (image) @filerefs in a document
203
+ def get_image_refs
204
+ parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
205
+ image_refs = []
206
+ while parser.has_next?
207
+ el = parser.pull
208
+ if el.start_element? and (el[0] == "imagedata" or el[0] == "graphic")
209
+ image_refs << el[1]['fileref']
210
+ end
211
+ end
212
+ return image_refs.uniq
213
+ end
214
+
215
+ # Returns true if the document has code callouts
216
+ def has_callouts?
217
+ parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
218
+ while parser.has_next?
219
+ el = parser.pull
220
+ if el.start_element? and (el[0] == "calloutlist" or el[0] == "co")
221
+ return true
222
+ end
223
+ end
224
+ return false
225
+ end
226
+ end
227
+ end
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dbtoepub
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Manuel Schnitzer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-11-22 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: This program converts DocBook documents into .epub files.
14
+ email: webmaster@mschnitzer.de
15
+ executables:
16
+ - dbtoepub
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - bin/dbtoepub
21
+ - lib/docbook.rb
22
+ homepage: https://github.com/docbook/xslt10-stylesheets
23
+ licenses:
24
+ - MIT
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 2.2.5
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: Hola!
46
+ test_files: []