dbtoepub 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/dbtoepub +76 -0
- data/lib/docbook.rb +227 -0
- metadata +46 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d62171d3ad82dff798473353b3b55322c6ce7afb
|
4
|
+
data.tar.gz: 7bb725b3ea583d42bfcdede5957546dc4591eeec
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f28ea47865935fcd124d13b4e54d50eedbe9a53480626792dec14803afcb073a273cfd386b6a277cbd1138117b686c0936b4441068aca88f96be68eefede5535
|
7
|
+
data.tar.gz: 555e7668a6f87edae840b9ce5868dfff8b351a760eb9b06bb211486dfb7a1995f8250767356377a780015460f76acdb9ebcf7fb167a6cd65034ab58c55f1479d
|
data/bin/dbtoepub
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# This program converts DocBook documents into .epub files.
|
3
|
+
#
|
4
|
+
# Usage: dbtoepub [OPTIONS] [DocBook Files]
|
5
|
+
#
|
6
|
+
# .epub is defined by the IDPF at www.idpf.org and is made up of 3 standards:
|
7
|
+
# - Open Publication Structure (OPS)
|
8
|
+
# - Open Packaging Format (OPF)
|
9
|
+
# - Open Container Format (OCF)
|
10
|
+
#
|
11
|
+
# Specific options:
|
12
|
+
# -c, --css [FILE] Use FILE for CSS on generated XHTML.
|
13
|
+
# -d, --debug Show debugging output.
|
14
|
+
# -f, --font [OTF FILE] Embed OTF FILE in .epub.
|
15
|
+
# -h, --help Display usage info.
|
16
|
+
# -s, --stylesheet [XSL FILE] Use XSL FILE as a customization
|
17
|
+
# layer (imports epub/docbook.xsl).
|
18
|
+
# -v, --verbose Make output verbose.
|
19
|
+
|
20
|
+
lib = File.expand_path(File.join(File.dirname(__FILE__), 'lib'))
|
21
|
+
$LOAD_PATH.unshift(lib) if File.exist?(lib)
|
22
|
+
|
23
|
+
require 'fileutils'
|
24
|
+
require 'optparse'
|
25
|
+
require 'tmpdir'
|
26
|
+
|
27
|
+
require 'docbook'
|
28
|
+
|
29
|
+
verbose = false
|
30
|
+
debug = false
|
31
|
+
css_file = nil
|
32
|
+
otf_files = []
|
33
|
+
customization_layer = nil
|
34
|
+
output_file = nil
|
35
|
+
|
36
|
+
#$DEBUG=true
|
37
|
+
|
38
|
+
# Set up the OptionParser
|
39
|
+
opts = OptionParser.new
|
40
|
+
opts.banner = "Usage: #{File.basename($0)} [OPTIONS] [DocBook Files]
|
41
|
+
|
42
|
+
#{File.basename($0)} converts DocBook <book> and <article>s into to .epub files.
|
43
|
+
|
44
|
+
.epub is defined by the IDPF at www.idpf.org and is made up of 3 standards:
|
45
|
+
- Open Publication Structure (OPS)
|
46
|
+
- Open Packaging Format (OPF)
|
47
|
+
- Open Container Format (OCF)
|
48
|
+
|
49
|
+
Specific options:"
|
50
|
+
opts.on("-c", "--css [FILE]", "Use FILE for CSS on generated XHTML.") {|f| css_file = f}
|
51
|
+
opts.on("-d", "--debug", "Show debugging output.") {debug = true; verbose = true}
|
52
|
+
opts.on("-f", "--font [OTF FILE]", "Embed OTF FILE in .epub.") {|f| otf_files << f}
|
53
|
+
opts.on("-h", "--help", "Display usage info.") {puts opts.to_s; exit 0}
|
54
|
+
opts.on("-o", "--output [OUTPUT FILE]", "Output ePub file as OUTPUT FILE.") {|f| output_file = f}
|
55
|
+
opts.on("-s", "--stylesheet [XSL FILE]", "Use XSL FILE as a customization layer (imports epub/docbook.xsl).") {|f| customization_layer = f}
|
56
|
+
opts.on("-v", "--verbose", "Make output verbose.") {verbose = true}
|
57
|
+
|
58
|
+
db_files = opts.parse(ARGV)
|
59
|
+
if db_files.size == 0
|
60
|
+
puts opts.to_s
|
61
|
+
exit 0
|
62
|
+
end
|
63
|
+
|
64
|
+
db_files.each {|docbook_file|
|
65
|
+
dir = File.expand_path(File.join(Dir.tmpdir, ".epubtmp#{Time.now.to_f.to_s}"))
|
66
|
+
FileUtils.mkdir_p(dir)
|
67
|
+
e = DocBook::Epub.new(docbook_file, dir, css_file, customization_layer, otf_files)
|
68
|
+
|
69
|
+
if output_file
|
70
|
+
epub_file = output_file
|
71
|
+
else
|
72
|
+
epub_file = File.basename(docbook_file, ".xml") + ".epub"
|
73
|
+
end
|
74
|
+
puts "Rendering DocBook file #{docbook_file} to #{epub_file}" if verbose
|
75
|
+
e.render_to_file(epub_file)
|
76
|
+
}
|
data/lib/docbook.rb
ADDED
@@ -0,0 +1,227 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'rexml/parsers/pullparser'
|
3
|
+
|
4
|
+
module DocBook
|
5
|
+
|
6
|
+
class Epub
|
7
|
+
CHECKER = "epubcheck"
|
8
|
+
STYLESHEET = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "docbook.xsl"))
|
9
|
+
CALLOUT_PATH = File.join('images', 'callouts')
|
10
|
+
CALLOUT_FULL_PATH = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', '..', CALLOUT_PATH))
|
11
|
+
CALLOUT_LIMIT = 15
|
12
|
+
CALLOUT_EXT = ".png"
|
13
|
+
XSLT_PROCESSOR = "xsltproc"
|
14
|
+
OUTPUT_DIR = ".epubtmp#{Time.now.to_f.to_s}"
|
15
|
+
MIMETYPE = "application/epub+zip"
|
16
|
+
META_DIR = "META-INF"
|
17
|
+
OEBPS_DIR = "OEBPS"
|
18
|
+
ZIPPER = "zip"
|
19
|
+
|
20
|
+
attr_reader :output_dir
|
21
|
+
|
22
|
+
def initialize(docbook_file, output_dir=OUTPUT_DIR, css_file=nil, customization_layer=nil, embedded_fonts=[])
|
23
|
+
@docbook_file = docbook_file
|
24
|
+
@output_dir = output_dir
|
25
|
+
@meta_dir = File.join(@output_dir, META_DIR)
|
26
|
+
@oebps_dir = File.join(@output_dir, OEBPS_DIR)
|
27
|
+
@css_file = css_file ? File.expand_path(css_file) : css_file
|
28
|
+
@embedded_fonts = embedded_fonts
|
29
|
+
@to_delete = []
|
30
|
+
|
31
|
+
if customization_layer
|
32
|
+
@stylesheet = File.expand_path(customization_layer)
|
33
|
+
else
|
34
|
+
@stylesheet = STYLESHEET
|
35
|
+
end
|
36
|
+
|
37
|
+
unless File.exist?(@docbook_file)
|
38
|
+
raise ArgumentError.new("File #{@docbook_file} does not exist")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def render_to_file(output_file, verbose=false)
|
43
|
+
render_to_epub(output_file, verbose)
|
44
|
+
bundle_epub(output_file, verbose)
|
45
|
+
cleanup_files(@to_delete)
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.invalid?(file)
|
49
|
+
# Obnoxiously, we can't just check for a non-zero output...
|
50
|
+
cmd = %Q(#{CHECKER} "#{file}")
|
51
|
+
output = `#{cmd} 2>&1`
|
52
|
+
|
53
|
+
if $?.to_i == 0
|
54
|
+
return false
|
55
|
+
else
|
56
|
+
STDERR.puts output if $DEBUG
|
57
|
+
return output
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
def render_to_epub(output_file, verbose)
|
63
|
+
@collapsed_docbook_file = collapse_docbook()
|
64
|
+
|
65
|
+
chunk_quietly = "--stringparam chunk.quietly " + (verbose ? '0' : '1')
|
66
|
+
callout_path = "--stringparam callout.graphics.path #{CALLOUT_PATH}/"
|
67
|
+
callout_limit = "--stringparam callout.graphics.number.limit #{CALLOUT_LIMIT}"
|
68
|
+
callout_ext = "--stringparam callout.graphics.extension #{CALLOUT_EXT}"
|
69
|
+
html_stylesheet = "--stringparam html.stylesheet #{File.basename(@css_file)}" if @css_file
|
70
|
+
base = "--stringparam base.dir #{OEBPS_DIR}/"
|
71
|
+
unless @embedded_fonts.empty?
|
72
|
+
embedded_fonts = @embedded_fonts.map {|f| File.basename(f)}.join(',')
|
73
|
+
font = "--stringparam epub.embedded.fonts \"#{embedded_fonts}\""
|
74
|
+
end
|
75
|
+
meta = "--stringparam epub.metainf.dir #{META_DIR}/"
|
76
|
+
oebps = "--stringparam epub.oebps.dir #{OEBPS_DIR}/"
|
77
|
+
options = [chunk_quietly,
|
78
|
+
callout_path,
|
79
|
+
callout_limit,
|
80
|
+
callout_ext,
|
81
|
+
base,
|
82
|
+
font,
|
83
|
+
meta,
|
84
|
+
oebps,
|
85
|
+
html_stylesheet,
|
86
|
+
].join(" ")
|
87
|
+
# Double-quote stylesheet & file to help Windows cmd.exe
|
88
|
+
db2epub_cmd = %Q(cd "#{@output_dir}" && #{XSLT_PROCESSOR} #{options} "#{@stylesheet}" "#{@collapsed_docbook_file}")
|
89
|
+
STDERR.puts db2epub_cmd if $DEBUG
|
90
|
+
success = system(db2epub_cmd)
|
91
|
+
raise "Could not render as .epub to #{output_file} (#{db2epub_cmd})" unless success
|
92
|
+
@to_delete << Dir["#{@meta_dir}/*"]
|
93
|
+
@to_delete << Dir["#{@oebps_dir}/*"]
|
94
|
+
end
|
95
|
+
|
96
|
+
def bundle_epub(output_file, verbose)
|
97
|
+
|
98
|
+
quiet = verbose ? "" : "-q"
|
99
|
+
mimetype_filename = write_mimetype()
|
100
|
+
meta = File.basename(@meta_dir)
|
101
|
+
oebps = File.basename(@oebps_dir)
|
102
|
+
images = copy_images()
|
103
|
+
csses = copy_csses()
|
104
|
+
fonts = copy_fonts()
|
105
|
+
callouts = copy_callouts()
|
106
|
+
# zip -X -r ../book.epub mimetype META-INF OEBPS
|
107
|
+
# Double-quote stylesheet & file to help Windows cmd.exe
|
108
|
+
zip_cmd = %Q(cd "#{@output_dir}" && #{ZIPPER} #{quiet} -X -r "#{File.expand_path(output_file)}" "#{mimetype_filename}" "#{meta}" "#{oebps}")
|
109
|
+
puts zip_cmd if $DEBUG
|
110
|
+
success = system(zip_cmd)
|
111
|
+
raise "Could not bundle into .epub file to #{output_file}" unless success
|
112
|
+
end
|
113
|
+
|
114
|
+
# Input must be collapsed because REXML couldn't find figures in files that
|
115
|
+
# were XIncluded or added by ENTITY
|
116
|
+
# http://sourceforge.net/tracker/?func=detail&aid=2750442&group_id=21935&atid=373747
|
117
|
+
def collapse_docbook
|
118
|
+
# Double-quote stylesheet & file to help Windows cmd.exe
|
119
|
+
collapsed_file = File.join(File.expand_path(File.dirname(@docbook_file)),
|
120
|
+
'.collapsed.' + File.basename(@docbook_file))
|
121
|
+
entity_collapse_command = %Q(xmllint --loaddtd --noent -o "#{collapsed_file}" "#{@docbook_file}")
|
122
|
+
entity_success = system(entity_collapse_command)
|
123
|
+
raise "Could not collapse named entites in #{@docbook_file}" unless entity_success
|
124
|
+
|
125
|
+
xinclude_collapse_command = %Q(xmllint --xinclude -o "#{collapsed_file}" "#{collapsed_file}")
|
126
|
+
xinclude_success = system(xinclude_collapse_command)
|
127
|
+
raise "Could not collapse XIncludes in #{@docbook_file}" unless xinclude_success
|
128
|
+
|
129
|
+
@to_delete << collapsed_file
|
130
|
+
return collapsed_file
|
131
|
+
end
|
132
|
+
|
133
|
+
def copy_callouts
|
134
|
+
new_callout_images = []
|
135
|
+
if has_callouts?
|
136
|
+
calloutglob = "#{CALLOUT_FULL_PATH}/*#{CALLOUT_EXT}"
|
137
|
+
Dir.glob(calloutglob).each {|img|
|
138
|
+
img_new_filename = File.join(@oebps_dir, CALLOUT_PATH, File.basename(img))
|
139
|
+
|
140
|
+
# TODO: What to rescue for these two?
|
141
|
+
FileUtils.mkdir_p(File.dirname(img_new_filename))
|
142
|
+
FileUtils.cp(img, img_new_filename)
|
143
|
+
@to_delete << img_new_filename
|
144
|
+
new_callout_images << img
|
145
|
+
}
|
146
|
+
end
|
147
|
+
return new_callout_images
|
148
|
+
end
|
149
|
+
|
150
|
+
def copy_fonts
|
151
|
+
new_fonts = []
|
152
|
+
@embedded_fonts.each {|font_file|
|
153
|
+
font_new_filename = File.join(@oebps_dir, File.basename(font_file))
|
154
|
+
FileUtils.cp(font_file, font_new_filename)
|
155
|
+
new_fonts << font_file
|
156
|
+
}
|
157
|
+
return new_fonts
|
158
|
+
end
|
159
|
+
|
160
|
+
def copy_csses
|
161
|
+
if @css_file
|
162
|
+
css_new_filename = File.join(@oebps_dir, File.basename(@css_file))
|
163
|
+
FileUtils.cp(@css_file, css_new_filename)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def copy_images
|
168
|
+
image_references = get_image_refs()
|
169
|
+
new_images = []
|
170
|
+
image_references.each {|img|
|
171
|
+
# TODO: It'd be cooler if we had a filetype lookup rather than just
|
172
|
+
# extension
|
173
|
+
if img =~ /\.(svg|png|gif|jpe?g|xml)/i
|
174
|
+
img_new_filename = File.join(@oebps_dir, img)
|
175
|
+
img_full = File.join(File.expand_path(File.dirname(@docbook_file)), img)
|
176
|
+
|
177
|
+
# TODO: What to rescue for these two?
|
178
|
+
FileUtils.mkdir_p(File.dirname(img_new_filename))
|
179
|
+
puts(img_full + ": " + img_new_filename) if $DEBUG
|
180
|
+
FileUtils.cp(img_full, img_new_filename)
|
181
|
+
@to_delete << img_new_filename
|
182
|
+
new_images << img_full
|
183
|
+
end
|
184
|
+
}
|
185
|
+
return new_images
|
186
|
+
end
|
187
|
+
|
188
|
+
def write_mimetype
|
189
|
+
mimetype_filename = File.join(@output_dir, "mimetype")
|
190
|
+
File.open(mimetype_filename, "w") {|f| f.print MIMETYPE}
|
191
|
+
@to_delete << mimetype_filename
|
192
|
+
return File.basename(mimetype_filename)
|
193
|
+
end
|
194
|
+
|
195
|
+
def cleanup_files(file_list)
|
196
|
+
file_list.flatten.each {|f|
|
197
|
+
# Yikes
|
198
|
+
FileUtils.rm_r(f, :force => true )
|
199
|
+
}
|
200
|
+
end
|
201
|
+
|
202
|
+
# Returns an Array of all of the (image) @filerefs in a document
|
203
|
+
def get_image_refs
|
204
|
+
parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
|
205
|
+
image_refs = []
|
206
|
+
while parser.has_next?
|
207
|
+
el = parser.pull
|
208
|
+
if el.start_element? and (el[0] == "imagedata" or el[0] == "graphic")
|
209
|
+
image_refs << el[1]['fileref']
|
210
|
+
end
|
211
|
+
end
|
212
|
+
return image_refs.uniq
|
213
|
+
end
|
214
|
+
|
215
|
+
# Returns true if the document has code callouts
|
216
|
+
def has_callouts?
|
217
|
+
parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
|
218
|
+
while parser.has_next?
|
219
|
+
el = parser.pull
|
220
|
+
if el.start_element? and (el[0] == "calloutlist" or el[0] == "co")
|
221
|
+
return true
|
222
|
+
end
|
223
|
+
end
|
224
|
+
return false
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
metadata
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dbtoepub
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Manuel Schnitzer
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-11-22 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: This program converts DocBook documents into .epub files.
|
14
|
+
email: webmaster@mschnitzer.de
|
15
|
+
executables:
|
16
|
+
- dbtoepub
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- bin/dbtoepub
|
21
|
+
- lib/docbook.rb
|
22
|
+
homepage: https://github.com/docbook/xslt10-stylesheets
|
23
|
+
licenses:
|
24
|
+
- MIT
|
25
|
+
metadata: {}
|
26
|
+
post_install_message:
|
27
|
+
rdoc_options: []
|
28
|
+
require_paths:
|
29
|
+
- lib
|
30
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
requirements: []
|
41
|
+
rubyforge_project:
|
42
|
+
rubygems_version: 2.2.5
|
43
|
+
signing_key:
|
44
|
+
specification_version: 4
|
45
|
+
summary: Hola!
|
46
|
+
test_files: []
|