bookshelf 1.2.1 → 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile.lock +23 -19
- data/README.md +6 -43
- data/bookshelf.gemspec +3 -4
- data/lib/bookshelf.rb +5 -18
- data/lib/bookshelf/adapters/markdown.rb +2 -20
- data/lib/bookshelf/cli.rb +2 -32
- data/lib/bookshelf/dependency.rb +0 -8
- data/lib/bookshelf/exporter.rb +1 -5
- data/lib/bookshelf/generator.rb +7 -6
- data/lib/bookshelf/parser.rb +1 -9
- data/lib/bookshelf/parser/epub.rb +166 -93
- data/lib/bookshelf/parser/html.rb +25 -102
- data/lib/bookshelf/parser/pdf.rb +2 -29
- data/lib/bookshelf/stats.rb +0 -8
- data/lib/bookshelf/version.rb +1 -1
- data/templates/cover.erb +6 -4
- data/templates/epub.erb +2 -2
- data/templates/helper.rb +0 -29
- data/templates/layout.erb +9 -17
- data/templates/toc.erb +20 -0
- metadata +179 -189
- data/lib/bookshelf/errors.rb +0 -3
- data/lib/bookshelf/extensions/redcloth.rb +0 -69
- data/lib/bookshelf/extensions/string.rb +0 -11
- data/lib/bookshelf/parser/mobi.rb +0 -14
- data/lib/bookshelf/parser/txt.rb +0 -18
- data/lib/bookshelf/stream.rb +0 -27
- data/lib/bookshelf/syntax.rb +0 -124
- data/lib/bookshelf/toc.rb +0 -6
- data/lib/bookshelf/toc/epub.rb +0 -41
- data/lib/bookshelf/toc/html.rb +0 -78
@@ -1,132 +1,209 @@
|
|
1
1
|
module Bookshelf
|
2
2
|
module Parser
|
3
3
|
class Epub < Base
|
4
|
-
def
|
5
|
-
@
|
6
|
-
OpenStruct.new({
|
7
|
-
:index => index,
|
8
|
-
:filename => "section_#{index}.html",
|
9
|
-
:filepath => tmp_dir.join("section_#{index}.html").to_s,
|
10
|
-
:html => Nokogiri::HTML(chapter.inner_html)
|
11
|
-
})
|
12
|
-
end
|
4
|
+
def html
|
5
|
+
@html ||= Nokogiri::HTML(html_path.read)
|
13
6
|
end
|
14
7
|
|
15
|
-
def
|
16
|
-
@
|
8
|
+
def chapters
|
9
|
+
@chapters ||= []
|
17
10
|
end
|
18
11
|
|
19
|
-
def
|
20
|
-
@
|
12
|
+
def assets
|
13
|
+
@assets ||= []
|
21
14
|
end
|
22
15
|
|
23
16
|
def parse
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
write_sections!
|
34
|
-
write_toc!
|
35
|
-
|
36
|
-
epub.files sections.map(&:filepath) + assets
|
37
|
-
epub.nav navigation
|
38
|
-
epub.toc_page toc_path
|
39
|
-
|
40
|
-
epub.save(epub_path)
|
41
|
-
|
17
|
+
_create_directories
|
18
|
+
_create_container_xml
|
19
|
+
_create_cover_html
|
20
|
+
_create_chapter_html
|
21
|
+
_create_toc_html
|
22
|
+
_create_toc_ncx
|
23
|
+
_create_assets
|
24
|
+
_create_content_opf
|
25
|
+
_create_epub
|
42
26
|
true
|
43
27
|
rescue Exception
|
44
28
|
p $!, $@
|
45
29
|
false
|
46
30
|
end
|
47
31
|
|
48
|
-
def
|
49
|
-
|
32
|
+
def _create_directories
|
33
|
+
FileUtils.rm_rf(tmp_path)
|
34
|
+
FileUtils.mkdir_p(tmp_path)
|
35
|
+
FileUtils.mkdir_p(File.join(tmp_path, "META-INF"))
|
36
|
+
FileUtils.mkdir_p(File.join(tmp_path, "styles"))
|
37
|
+
FileUtils.mkdir_p(File.join(tmp_path, "fonts"))
|
38
|
+
FileUtils.mkdir_p(File.join(tmp_path, "images"))
|
39
|
+
end
|
50
40
|
|
51
|
-
|
52
|
-
|
41
|
+
def _create_container_xml
|
42
|
+
builder = Nokogiri::XML::Builder.new("encoding" => "utf-8") do |xml|
|
43
|
+
xml.container("xmlns" => "urn:oasis:names:tc:opendocument:xmlns:container", "version" => "1.0") {
|
44
|
+
xml.rootfiles {
|
45
|
+
xml.rootfile "full-path" => "content.opf", "media-type" => "application/oebps-package+xml"
|
46
|
+
}
|
47
|
+
}
|
48
|
+
end
|
49
|
+
File.open(File.join(tmp_path, "META-INF", "container.xml"), "w") do |f|
|
50
|
+
f.write(builder.to_xml)
|
53
51
|
end
|
54
52
|
end
|
55
53
|
|
56
|
-
def
|
57
|
-
|
58
|
-
|
59
|
-
#
|
60
|
-
links = sections.inject({}) do |buffer, section|
|
61
|
-
section.html.css("[id]").each do |element|
|
62
|
-
anchor = "##{element["id"]}"
|
63
|
-
buffer[anchor] = "#{section.filename}#{anchor}"
|
64
|
-
end
|
65
|
-
|
66
|
-
buffer
|
54
|
+
def _create_cover_html
|
55
|
+
File.open(File.join(tmp_path, "cover.html"), "w") do |f|
|
56
|
+
f.write(Bookshelf.render_template(cover_template_path, :title => config[:title], :authors => config[:authors]))
|
67
57
|
end
|
58
|
+
end
|
68
59
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
href = link["href"]
|
75
|
-
link.set_attribute("href", links.fetch(href, href))
|
76
|
-
end
|
77
|
-
|
78
|
-
# Replace all srcs.
|
79
|
-
#
|
80
|
-
section.html.css("[src]").each do |element|
|
81
|
-
src = File.basename(element["src"]).gsub(/\.svg$/, ".png")
|
82
|
-
element.set_attribute("src", src)
|
83
|
-
element.set_attribute("alt", "")
|
84
|
-
element.node_name = "img"
|
60
|
+
def _create_chapter_html
|
61
|
+
html.css("div.chapter").each_with_index.map do |chapter, index|
|
62
|
+
filename = "chapter_#{index}.html"
|
63
|
+
File.open(File.join(tmp_path, filename), "w") do |f|
|
64
|
+
f.write(Bookshelf.render_template(chapter_template_path, :content => chapter.inner_html))
|
85
65
|
end
|
86
66
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
File.open(section.filepath, "w") do |file|
|
92
|
-
body = section.html.css("body").to_xhtml.gsub(%r[<body>(.*?)</body>]m, "\\1")
|
93
|
-
file << render_chapter(body)
|
94
|
-
end
|
67
|
+
chapters << {
|
68
|
+
:text => chapter.css("h2:first-of-type").text,
|
69
|
+
:src => filename
|
70
|
+
}
|
95
71
|
end
|
96
72
|
end
|
97
73
|
|
98
|
-
def
|
99
|
-
|
100
|
-
|
74
|
+
def _create_toc_html
|
75
|
+
toc = chapters.map { |chapter| [chapter[:text], chapter[:src]] }
|
76
|
+
File.open(File.join(tmp_path, "toc.html"), "w") do |f|
|
77
|
+
f.write(Bookshelf.render_template(toc_template_path, :toc => toc))
|
78
|
+
end
|
101
79
|
end
|
102
80
|
|
103
|
-
def
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
81
|
+
def _create_toc_ncx
|
82
|
+
# toc.ncx
|
83
|
+
builder = Nokogiri::XML::Builder.new do |xml|
|
84
|
+
xml.doc.create_internal_subset(
|
85
|
+
"html",
|
86
|
+
"-//W3C//DTD XHTML 1.1//EN",
|
87
|
+
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"
|
88
|
+
)
|
89
|
+
xml.ncx("xmlns" => "http://www.daisy.org/z3986/2005/ncx/", "version" => "2005-1") {
|
90
|
+
xml.head {
|
91
|
+
xml.meta("name" => "dtb:uid", "content" => config[:identifier][:id])
|
92
|
+
xml.meta("name" => "dtb:depth", "content" => "1")
|
93
|
+
xml.meta("name" => "dtb:totalPageCount", "content" => "0")
|
94
|
+
xml.meta("name" => "dtb:maxPageNumber", "content" => "0")
|
95
|
+
}
|
96
|
+
xml.docTitle {
|
97
|
+
xml.text_ config["title"]
|
98
|
+
}
|
99
|
+
xml.navMap {
|
100
|
+
chapters.each_with_index do |chapter, index|
|
101
|
+
xml.navPoint("id" => "navpoint-#{index}", "playOrder" => "#{index}") {
|
102
|
+
xml.navLabel {
|
103
|
+
xml.text_ chapter[:text]
|
104
|
+
}
|
105
|
+
xml.content("src" => chapter[:src])
|
106
|
+
}
|
107
|
+
end
|
108
|
+
}
|
109
|
+
}
|
110
|
+
end
|
111
|
+
File.open(File.join(tmp_path, "toc.ncx"), "w") do |f|
|
112
|
+
f.write builder.to_xml
|
109
113
|
end
|
110
114
|
end
|
111
115
|
|
112
|
-
def
|
113
|
-
|
114
|
-
|
116
|
+
def _create_assets
|
117
|
+
base_assets = []
|
118
|
+
base_assets << asset_path.join("styles/epub.css").to_s
|
119
|
+
base_assets.concat( Dir[asset_path.join("fonts/*.*")])
|
120
|
+
base_assets.concat(Dir[asset_path.join("images/*.{jpg,png,gif}")])
|
121
|
+
base_assets.each do |base_asset|
|
122
|
+
asset = base_asset.sub("#{asset_path.to_s}/", "")
|
123
|
+
FileUtils.cp(base_asset, File.join(tmp_path, asset))
|
124
|
+
assets << asset
|
125
|
+
end
|
115
126
|
end
|
116
127
|
|
117
|
-
def
|
118
|
-
|
119
|
-
{
|
120
|
-
:
|
121
|
-
|
128
|
+
def _create_content_opf
|
129
|
+
builder = Nokogiri::XML::Builder.new do |xml|
|
130
|
+
xml.package("xmlns" => "http://www.idpf.org/2007/opf", "unique-identifier" => config[:uid], "version" => 2.0 ) {
|
131
|
+
xml.metadata("xmlns:dc" => "http://purl.org/dc/elements/1.1/", "xmlns:dcterms" => "http://purl.org/dc/terms/", "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance", "xmlns:opf" => "http://www.idpf.org/2007/opf") {
|
132
|
+
xml["dc"].identifier config[:identifier][:id], {"opf:scheme" => config[:identifier][:type], "id" => config[:uid]}
|
133
|
+
xml["dc"].title config[:title]
|
134
|
+
xml["dc"].language config[:language]
|
135
|
+
xml["dc"].creator config[:authors].join(",")
|
136
|
+
xml["dc"].publisher config[:publisher]
|
137
|
+
xml["dc"].date config[:published_at]
|
138
|
+
xml.meta("name" => "cover", "content" => "cover")
|
139
|
+
}
|
140
|
+
xml.manifest {
|
141
|
+
chapters.each do |chapter|
|
142
|
+
xml.item("id" => chapter[:src], "href" => chapter[:src], "media-type" => "application/xhtml+xml")
|
143
|
+
end
|
144
|
+
assets.each do |asset|
|
145
|
+
id = asset.sub(/(styles|fonts|images)\//, "")
|
146
|
+
media_type = if asset.ends_with?("css")
|
147
|
+
"text/css"
|
148
|
+
else
|
149
|
+
""
|
150
|
+
end
|
151
|
+
xml.item("id" => id, "href" => asset, "media-type" => media_type)
|
152
|
+
end
|
153
|
+
xml.item("id" => "cover", "href" => "cover.html", "media-type" => "application/xhtml+xml")
|
154
|
+
xml.item("id" => "toc", "href" => "toc.html", "media-type" => "application/xhtml+xml")
|
155
|
+
xml.item("id" => "ncx", "href" => "toc.ncx", "media-type" => "application/x-dtbncx+xml")
|
156
|
+
}
|
157
|
+
xml.spine("toc" => "ncx") {
|
158
|
+
xml.itemref("idref" => "cover")
|
159
|
+
xml.itemref("idref" => "toc")
|
160
|
+
chapters.each do |chapter|
|
161
|
+
xml.itemref("idref" => chapter[:src])
|
162
|
+
end
|
163
|
+
}
|
164
|
+
xml.guide {
|
165
|
+
xml.reference("type" => "toc", "title" => "Table of Contents", "href" => "toc.html")
|
166
|
+
if chapters.length > 0
|
167
|
+
xml.reference("type" => "text", "title" => chapters[0][:text], "href" => chapters[0][:src])
|
168
|
+
end
|
169
|
+
}
|
122
170
|
}
|
123
171
|
end
|
172
|
+
File.open(File.join(tmp_path, "content.opf"), "w") do |f|
|
173
|
+
f.write builder.to_xml
|
174
|
+
end
|
124
175
|
end
|
125
176
|
|
126
|
-
def
|
177
|
+
def _create_epub
|
178
|
+
require "zip"
|
179
|
+
# mimetype needs to be uncompressed
|
180
|
+
Zip::OutputStream::open(epub_path) do |os|
|
181
|
+
os.put_next_entry("mimetype", nil, nil, Zip::Entry::STORED, Zlib::NO_COMPRESSION)
|
182
|
+
os << "application/epub+zip"
|
183
|
+
end
|
184
|
+
zipfile = Zip::File.open(epub_path)
|
185
|
+
Dir.glob(File.join(tmp_path, "**/*")).each do |path|
|
186
|
+
zipfile.add(path.sub("#{tmp_path.to_s}/", ""), path )
|
187
|
+
end
|
188
|
+
zipfile.commit
|
189
|
+
end
|
190
|
+
|
191
|
+
def cover_template_path
|
192
|
+
Bookshelf.root_dir.join("templates/epub/cover.erb")
|
193
|
+
end
|
194
|
+
|
195
|
+
def toc_template_path
|
196
|
+
Bookshelf.root_dir.join("templates/epub/toc.erb")
|
197
|
+
end
|
198
|
+
|
199
|
+
def chapter_template_path
|
127
200
|
Bookshelf.root_dir.join("templates/epub/page.erb")
|
128
201
|
end
|
129
202
|
|
203
|
+
def asset_path
|
204
|
+
Bookshelf.root_dir.join("output/assets")
|
205
|
+
end
|
206
|
+
|
130
207
|
def html_path
|
131
208
|
Bookshelf.root_dir.join("output/#{name}.html")
|
132
209
|
end
|
@@ -135,13 +212,9 @@ module Bookshelf
|
|
135
212
|
Bookshelf.root_dir.join("output/#{name}.epub")
|
136
213
|
end
|
137
214
|
|
138
|
-
def
|
215
|
+
def tmp_path
|
139
216
|
Bookshelf.root_dir.join("output/tmp")
|
140
217
|
end
|
141
|
-
|
142
|
-
def toc_path
|
143
|
-
tmp_dir.join("toc.html")
|
144
|
-
end
|
145
218
|
end
|
146
219
|
end
|
147
220
|
end
|
@@ -11,48 +11,46 @@ module Bookshelf
|
|
11
11
|
|
12
12
|
# List of recognized extensions.
|
13
13
|
#
|
14
|
-
EXTENSIONS = %w[md mkdn markdown
|
15
|
-
|
16
|
-
class << self
|
17
|
-
# The footnote index control. We have to manipulate footnotes
|
18
|
-
# because each chapter starts from 1, so we have duplicated references.
|
19
|
-
#
|
20
|
-
attr_accessor :footnote_index
|
21
|
-
end
|
14
|
+
EXTENSIONS = %w[md mkdn markdown html]
|
22
15
|
|
23
16
|
# Parse all files and save the parsed content
|
24
17
|
# to <tt>output/book_name.html</tt>.
|
25
18
|
#
|
26
19
|
def parse
|
27
|
-
reset_footnote_index!
|
28
|
-
|
29
20
|
File.open(Bookshelf.root_dir.join("output/#{name}.html"), "w") do |file|
|
30
|
-
|
21
|
+
locals = config.merge({
|
22
|
+
:content => content,
|
23
|
+
:toc => toc
|
24
|
+
})
|
25
|
+
file << Bookshelf.render_template(Bookshelf.root_dir.join("templates/html/layout.erb"), locals)
|
31
26
|
end
|
32
27
|
true
|
33
|
-
|
34
|
-
|
35
|
-
end
|
36
|
-
|
37
|
-
def reset_footnote_index!
|
38
|
-
self.class.footnote_index = 1
|
28
|
+
rescue Exception
|
29
|
+
false
|
39
30
|
end
|
40
31
|
|
41
32
|
# Return all chapters wrapped in a <tt>div.chapter</tt> tag.
|
42
33
|
#
|
43
34
|
def content
|
44
|
-
String.new.tap do |chapters|
|
35
|
+
@content ||= String.new.tap do |chapters|
|
45
36
|
entries.each do |entry|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
37
|
+
chapters << %[<div class="chapter">#{render_file(entry)}</div>]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
50
41
|
|
51
|
-
|
42
|
+
def toc
|
43
|
+
if @toc.blank?
|
44
|
+
@toc = ""
|
45
|
+
Nokogiri::HTML(content).css(".chapter h2:first-of-type").each do |xml|
|
46
|
+
@toc << %[<div><a href="##{xml.attribute("id")}"><span>#{CGI.escape_html(xml.text)}</span></a></div>]
|
52
47
|
end
|
53
48
|
end
|
49
|
+
return @toc
|
54
50
|
end
|
55
51
|
|
52
|
+
private
|
53
|
+
|
56
54
|
# Return a list of all recognized files.
|
57
55
|
#
|
58
56
|
def entries
|
@@ -62,27 +60,11 @@ module Bookshelf
|
|
62
60
|
end
|
63
61
|
end
|
64
62
|
|
65
|
-
private
|
66
|
-
def chapter_files(entry)
|
67
|
-
# Chapters can be files outside a directory.
|
68
|
-
if File.file?(entry)
|
69
|
-
[entry]
|
70
|
-
else
|
71
|
-
Dir.glob("#{entry}/**/*.{#{EXTENSIONS.join(",")}}").sort
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
63
|
# Check if path is a valid entry.
|
76
|
-
# Files
|
64
|
+
# Files that start with a dot or underscore will be skipped.
|
77
65
|
#
|
78
66
|
def valid_entry?(entry)
|
79
|
-
entry !~ /^(\.|_)/ &&
|
80
|
-
end
|
81
|
-
|
82
|
-
# Check if path is a valid directory.
|
83
|
-
#
|
84
|
-
def valid_directory?(entry)
|
85
|
-
File.directory?(book_dir.join(entry)) && !IGNORE_DIR.include?(File.basename(entry))
|
67
|
+
entry !~ /^(\.|_)/ && valid_file?(entry)
|
86
68
|
end
|
87
69
|
|
88
70
|
# Check if path is a valid file.
|
@@ -94,84 +76,25 @@ module Bookshelf
|
|
94
76
|
|
95
77
|
# Render +file+ considering its extension.
|
96
78
|
#
|
97
|
-
def render_file(file
|
79
|
+
def render_file(file)
|
98
80
|
file_format = format(file)
|
99
|
-
content =
|
81
|
+
content = File.read(file)
|
100
82
|
content = case file_format
|
101
83
|
when :markdown
|
102
84
|
Markdown.to_html(content)
|
103
|
-
when :textile
|
104
|
-
RedCloth.convert(content)
|
105
85
|
else
|
106
86
|
content
|
107
87
|
end
|
108
|
-
|
109
|
-
render_footnotes(content, plain_syntax)
|
110
|
-
end
|
111
|
-
|
112
|
-
def render_footnotes(content, plain_syntax = false)
|
113
|
-
html = Nokogiri::HTML(content)
|
114
|
-
footnotes = html.css("p[id^='fn']")
|
115
|
-
|
116
|
-
return content if footnotes.empty?
|
117
|
-
|
118
|
-
reset_footnote_index! unless self.class.footnote_index
|
119
|
-
|
120
|
-
footnotes.each do |fn|
|
121
|
-
index = self.class.footnote_index
|
122
|
-
actual_index = fn["id"].gsub(/[^\d]/, "")
|
123
|
-
|
124
|
-
fn.set_attribute("id", "_fn#{index}")
|
125
|
-
|
126
|
-
html.css("a[href='#fn#{actual_index}']").each do |link|
|
127
|
-
link.set_attribute("href", "#_fn#{index}")
|
128
|
-
end
|
129
|
-
|
130
|
-
html.css("a[href='#fnr#{actual_index}']").each do |link|
|
131
|
-
link.set_attribute("href", "#_fnr#{index}")
|
132
|
-
end
|
133
|
-
|
134
|
-
html.css("[id=fnr#{actual_index}]").each do |tag|
|
135
|
-
tag.set_attribute("id", "_fnr#{index}")
|
136
|
-
end
|
137
|
-
|
138
|
-
self.class.footnote_index += 1
|
139
|
-
end
|
140
|
-
|
141
|
-
html.css("body").inner_html
|
142
88
|
end
|
143
89
|
|
144
90
|
def format(file)
|
145
91
|
case File.extname(file).downcase
|
146
92
|
when ".markdown", ".mkdn", ".md"
|
147
93
|
:markdown
|
148
|
-
when ".textile"
|
149
|
-
:textile
|
150
94
|
else
|
151
95
|
:html
|
152
96
|
end
|
153
97
|
end
|
154
|
-
|
155
|
-
# Parse layout file, making available all configuration entries.
|
156
|
-
#
|
157
|
-
def parse_layout(html)
|
158
|
-
toc = TOC::HTML.generate(html)
|
159
|
-
locals = config.merge({
|
160
|
-
:content => toc.content,
|
161
|
-
:toc => toc.to_html
|
162
|
-
})
|
163
|
-
render_template(Bookshelf.root_dir.join("templates/html/layout.erb"), locals)
|
164
|
-
end
|
165
|
-
|
166
|
-
# Render all +files+ from a given chapter.
|
167
|
-
#
|
168
|
-
def render_chapter(files, plain_syntax = false)
|
169
|
-
String.new.tap do |chapter|
|
170
|
-
files.each do |file|
|
171
|
-
chapter << render_file(file, plain_syntax) << "\n\n"
|
172
|
-
end
|
173
|
-
end
|
174
|
-
end
|
175
98
|
end
|
176
99
|
end
|
177
100
|
end
|