dbtoepub 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/bin/dbtoepub +76 -0
  3. data/lib/docbook.rb +227 -0
  4. metadata +46 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d62171d3ad82dff798473353b3b55322c6ce7afb
4
+ data.tar.gz: 7bb725b3ea583d42bfcdede5957546dc4591eeec
5
+ SHA512:
6
+ metadata.gz: f28ea47865935fcd124d13b4e54d50eedbe9a53480626792dec14803afcb073a273cfd386b6a277cbd1138117b686c0936b4441068aca88f96be68eefede5535
7
+ data.tar.gz: 555e7668a6f87edae840b9ce5868dfff8b351a760eb9b06bb211486dfb7a1995f8250767356377a780015460f76acdb9ebcf7fb167a6cd65034ab58c55f1479d
data/bin/dbtoepub ADDED
@@ -0,0 +1,76 @@
1
+ #!/usr/bin/env ruby
2
+ # This program converts DocBook documents into .epub files.
3
+ #
4
+ # Usage: dbtoepub [OPTIONS] [DocBook Files]
5
+ #
6
+ # .epub is defined by the IDPF at www.idpf.org and is made up of 3 standards:
7
+ # - Open Publication Structure (OPS)
8
+ # - Open Packaging Format (OPF)
9
+ # - Open Container Format (OCF)
10
+ #
11
+ # Specific options:
12
+ # -c, --css [FILE] Use FILE for CSS on generated XHTML.
13
+ # -d, --debug Show debugging output.
14
+ # -f, --font [OTF FILE] Embed OTF FILE in .epub.
15
+ # -h, --help Display usage info.
16
+ # -s, --stylesheet [XSL FILE] Use XSL FILE as a customization
17
+ # layer (imports epub/docbook.xsl).
18
+ # -v, --verbose Make output verbose.
19
+
20
+ lib = File.expand_path(File.join(File.dirname(__FILE__), 'lib'))
21
+ $LOAD_PATH.unshift(lib) if File.exist?(lib)
22
+
23
+ require 'fileutils'
24
+ require 'optparse'
25
+ require 'tmpdir'
26
+
27
+ require 'docbook'
28
+
29
+ verbose = false
30
+ debug = false
31
+ css_file = nil
32
+ otf_files = []
33
+ customization_layer = nil
34
+ output_file = nil
35
+
36
+ #$DEBUG=true
37
+
38
+ # Set up the OptionParser
39
+ opts = OptionParser.new
40
+ opts.banner = "Usage: #{File.basename($0)} [OPTIONS] [DocBook Files]
41
+
42
+ #{File.basename($0)} converts DocBook <book> and <article>s into to .epub files.
43
+
44
+ .epub is defined by the IDPF at www.idpf.org and is made up of 3 standards:
45
+ - Open Publication Structure (OPS)
46
+ - Open Packaging Format (OPF)
47
+ - Open Container Format (OCF)
48
+
49
+ Specific options:"
50
+ opts.on("-c", "--css [FILE]", "Use FILE for CSS on generated XHTML.") {|f| css_file = f}
51
+ opts.on("-d", "--debug", "Show debugging output.") {debug = true; verbose = true}
52
+ opts.on("-f", "--font [OTF FILE]", "Embed OTF FILE in .epub.") {|f| otf_files << f}
53
+ opts.on("-h", "--help", "Display usage info.") {puts opts.to_s; exit 0}
54
+ opts.on("-o", "--output [OUTPUT FILE]", "Output ePub file as OUTPUT FILE.") {|f| output_file = f}
55
+ opts.on("-s", "--stylesheet [XSL FILE]", "Use XSL FILE as a customization layer (imports epub/docbook.xsl).") {|f| customization_layer = f}
56
+ opts.on("-v", "--verbose", "Make output verbose.") {verbose = true}
57
+
58
+ db_files = opts.parse(ARGV)
59
+ if db_files.size == 0
60
+ puts opts.to_s
61
+ exit 0
62
+ end
63
+
64
+ db_files.each {|docbook_file|
65
+ dir = File.expand_path(File.join(Dir.tmpdir, ".epubtmp#{Time.now.to_f.to_s}"))
66
+ FileUtils.mkdir_p(dir)
67
+ e = DocBook::Epub.new(docbook_file, dir, css_file, customization_layer, otf_files)
68
+
69
+ if output_file
70
+ epub_file = output_file
71
+ else
72
+ epub_file = File.basename(docbook_file, ".xml") + ".epub"
73
+ end
74
+ puts "Rendering DocBook file #{docbook_file} to #{epub_file}" if verbose
75
+ e.render_to_file(epub_file)
76
+ }
data/lib/docbook.rb ADDED
@@ -0,0 +1,227 @@
1
+ require 'fileutils'
2
+ require 'rexml/parsers/pullparser'
3
+
4
+ module DocBook
5
+
6
+ class Epub
7
+ CHECKER = "epubcheck"
8
+ STYLESHEET = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "docbook.xsl"))
9
+ CALLOUT_PATH = File.join('images', 'callouts')
10
+ CALLOUT_FULL_PATH = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', '..', CALLOUT_PATH))
11
+ CALLOUT_LIMIT = 15
12
+ CALLOUT_EXT = ".png"
13
+ XSLT_PROCESSOR = "xsltproc"
14
+ OUTPUT_DIR = ".epubtmp#{Time.now.to_f.to_s}"
15
+ MIMETYPE = "application/epub+zip"
16
+ META_DIR = "META-INF"
17
+ OEBPS_DIR = "OEBPS"
18
+ ZIPPER = "zip"
19
+
20
+ attr_reader :output_dir
21
+
22
+ def initialize(docbook_file, output_dir=OUTPUT_DIR, css_file=nil, customization_layer=nil, embedded_fonts=[])
23
+ @docbook_file = docbook_file
24
+ @output_dir = output_dir
25
+ @meta_dir = File.join(@output_dir, META_DIR)
26
+ @oebps_dir = File.join(@output_dir, OEBPS_DIR)
27
+ @css_file = css_file ? File.expand_path(css_file) : css_file
28
+ @embedded_fonts = embedded_fonts
29
+ @to_delete = []
30
+
31
+ if customization_layer
32
+ @stylesheet = File.expand_path(customization_layer)
33
+ else
34
+ @stylesheet = STYLESHEET
35
+ end
36
+
37
+ unless File.exist?(@docbook_file)
38
+ raise ArgumentError.new("File #{@docbook_file} does not exist")
39
+ end
40
+ end
41
+
42
+ def render_to_file(output_file, verbose=false)
43
+ render_to_epub(output_file, verbose)
44
+ bundle_epub(output_file, verbose)
45
+ cleanup_files(@to_delete)
46
+ end
47
+
48
+ def self.invalid?(file)
49
+ # Obnoxiously, we can't just check for a non-zero output...
50
+ cmd = %Q(#{CHECKER} "#{file}")
51
+ output = `#{cmd} 2>&1`
52
+
53
+ if $?.to_i == 0
54
+ return false
55
+ else
56
+ STDERR.puts output if $DEBUG
57
+ return output
58
+ end
59
+ end
60
+
61
+ private
62
+ def render_to_epub(output_file, verbose)
63
+ @collapsed_docbook_file = collapse_docbook()
64
+
65
+ chunk_quietly = "--stringparam chunk.quietly " + (verbose ? '0' : '1')
66
+ callout_path = "--stringparam callout.graphics.path #{CALLOUT_PATH}/"
67
+ callout_limit = "--stringparam callout.graphics.number.limit #{CALLOUT_LIMIT}"
68
+ callout_ext = "--stringparam callout.graphics.extension #{CALLOUT_EXT}"
69
+ html_stylesheet = "--stringparam html.stylesheet #{File.basename(@css_file)}" if @css_file
70
+ base = "--stringparam base.dir #{OEBPS_DIR}/"
71
+ unless @embedded_fonts.empty?
72
+ embedded_fonts = @embedded_fonts.map {|f| File.basename(f)}.join(',')
73
+ font = "--stringparam epub.embedded.fonts \"#{embedded_fonts}\""
74
+ end
75
+ meta = "--stringparam epub.metainf.dir #{META_DIR}/"
76
+ oebps = "--stringparam epub.oebps.dir #{OEBPS_DIR}/"
77
+ options = [chunk_quietly,
78
+ callout_path,
79
+ callout_limit,
80
+ callout_ext,
81
+ base,
82
+ font,
83
+ meta,
84
+ oebps,
85
+ html_stylesheet,
86
+ ].join(" ")
87
+ # Double-quote stylesheet & file to help Windows cmd.exe
88
+ db2epub_cmd = %Q(cd "#{@output_dir}" && #{XSLT_PROCESSOR} #{options} "#{@stylesheet}" "#{@collapsed_docbook_file}")
89
+ STDERR.puts db2epub_cmd if $DEBUG
90
+ success = system(db2epub_cmd)
91
+ raise "Could not render as .epub to #{output_file} (#{db2epub_cmd})" unless success
92
+ @to_delete << Dir["#{@meta_dir}/*"]
93
+ @to_delete << Dir["#{@oebps_dir}/*"]
94
+ end
95
+
96
+ def bundle_epub(output_file, verbose)
97
+
98
+ quiet = verbose ? "" : "-q"
99
+ mimetype_filename = write_mimetype()
100
+ meta = File.basename(@meta_dir)
101
+ oebps = File.basename(@oebps_dir)
102
+ images = copy_images()
103
+ csses = copy_csses()
104
+ fonts = copy_fonts()
105
+ callouts = copy_callouts()
106
+ # zip -X -r ../book.epub mimetype META-INF OEBPS
107
+ # Double-quote stylesheet & file to help Windows cmd.exe
108
+ zip_cmd = %Q(cd "#{@output_dir}" && #{ZIPPER} #{quiet} -X -r "#{File.expand_path(output_file)}" "#{mimetype_filename}" "#{meta}" "#{oebps}")
109
+ puts zip_cmd if $DEBUG
110
+ success = system(zip_cmd)
111
+ raise "Could not bundle into .epub file to #{output_file}" unless success
112
+ end
113
+
114
+ # Input must be collapsed because REXML couldn't find figures in files that
115
+ # were XIncluded or added by ENTITY
116
+ # http://sourceforge.net/tracker/?func=detail&aid=2750442&group_id=21935&atid=373747
117
+ def collapse_docbook
118
+ # Double-quote stylesheet & file to help Windows cmd.exe
119
+ collapsed_file = File.join(File.expand_path(File.dirname(@docbook_file)),
120
+ '.collapsed.' + File.basename(@docbook_file))
121
+ entity_collapse_command = %Q(xmllint --loaddtd --noent -o "#{collapsed_file}" "#{@docbook_file}")
122
+ entity_success = system(entity_collapse_command)
123
+ raise "Could not collapse named entites in #{@docbook_file}" unless entity_success
124
+
125
+ xinclude_collapse_command = %Q(xmllint --xinclude -o "#{collapsed_file}" "#{collapsed_file}")
126
+ xinclude_success = system(xinclude_collapse_command)
127
+ raise "Could not collapse XIncludes in #{@docbook_file}" unless xinclude_success
128
+
129
+ @to_delete << collapsed_file
130
+ return collapsed_file
131
+ end
132
+
133
+ def copy_callouts
134
+ new_callout_images = []
135
+ if has_callouts?
136
+ calloutglob = "#{CALLOUT_FULL_PATH}/*#{CALLOUT_EXT}"
137
+ Dir.glob(calloutglob).each {|img|
138
+ img_new_filename = File.join(@oebps_dir, CALLOUT_PATH, File.basename(img))
139
+
140
+ # TODO: What to rescue for these two?
141
+ FileUtils.mkdir_p(File.dirname(img_new_filename))
142
+ FileUtils.cp(img, img_new_filename)
143
+ @to_delete << img_new_filename
144
+ new_callout_images << img
145
+ }
146
+ end
147
+ return new_callout_images
148
+ end
149
+
150
+ def copy_fonts
151
+ new_fonts = []
152
+ @embedded_fonts.each {|font_file|
153
+ font_new_filename = File.join(@oebps_dir, File.basename(font_file))
154
+ FileUtils.cp(font_file, font_new_filename)
155
+ new_fonts << font_file
156
+ }
157
+ return new_fonts
158
+ end
159
+
160
+ def copy_csses
161
+ if @css_file
162
+ css_new_filename = File.join(@oebps_dir, File.basename(@css_file))
163
+ FileUtils.cp(@css_file, css_new_filename)
164
+ end
165
+ end
166
+
167
+ def copy_images
168
+ image_references = get_image_refs()
169
+ new_images = []
170
+ image_references.each {|img|
171
+ # TODO: It'd be cooler if we had a filetype lookup rather than just
172
+ # extension
173
+ if img =~ /\.(svg|png|gif|jpe?g|xml)/i
174
+ img_new_filename = File.join(@oebps_dir, img)
175
+ img_full = File.join(File.expand_path(File.dirname(@docbook_file)), img)
176
+
177
+ # TODO: What to rescue for these two?
178
+ FileUtils.mkdir_p(File.dirname(img_new_filename))
179
+ puts(img_full + ": " + img_new_filename) if $DEBUG
180
+ FileUtils.cp(img_full, img_new_filename)
181
+ @to_delete << img_new_filename
182
+ new_images << img_full
183
+ end
184
+ }
185
+ return new_images
186
+ end
187
+
188
+ def write_mimetype
189
+ mimetype_filename = File.join(@output_dir, "mimetype")
190
+ File.open(mimetype_filename, "w") {|f| f.print MIMETYPE}
191
+ @to_delete << mimetype_filename
192
+ return File.basename(mimetype_filename)
193
+ end
194
+
195
+ def cleanup_files(file_list)
196
+ file_list.flatten.each {|f|
197
+ # Yikes
198
+ FileUtils.rm_r(f, :force => true )
199
+ }
200
+ end
201
+
202
+ # Returns an Array of all of the (image) @filerefs in a document
203
+ def get_image_refs
204
+ parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
205
+ image_refs = []
206
+ while parser.has_next?
207
+ el = parser.pull
208
+ if el.start_element? and (el[0] == "imagedata" or el[0] == "graphic")
209
+ image_refs << el[1]['fileref']
210
+ end
211
+ end
212
+ return image_refs.uniq
213
+ end
214
+
215
+ # Returns true if the document has code callouts
216
+ def has_callouts?
217
+ parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
218
+ while parser.has_next?
219
+ el = parser.pull
220
+ if el.start_element? and (el[0] == "calloutlist" or el[0] == "co")
221
+ return true
222
+ end
223
+ end
224
+ return false
225
+ end
226
+ end
227
+ end
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dbtoepub
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Manuel Schnitzer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-11-22 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: This program converts DocBook documents into .epub files.
14
+ email: webmaster@mschnitzer.de
15
+ executables:
16
+ - dbtoepub
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - bin/dbtoepub
21
+ - lib/docbook.rb
22
+ homepage: https://github.com/docbook/xslt10-stylesheets
23
+ licenses:
24
+ - MIT
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 2.2.5
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: Hola!
46
+ test_files: []