epub-rb 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/CODEOWNERS +4 -0
- data/.github/FUNDING.yml +4 -0
- data/.github/ISSUE_TEMPLATE/bug_report.md +41 -0
- data/.github/ISSUE_TEMPLATE/config.yml +5 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +23 -0
- data/.github/PULL_REQUEST_TEMPLATE.md +38 -0
- data/.github/dependabot.yml +15 -0
- data/.github/workflows/ruby-tests.yml +55 -0
- data/.gitignore +13 -0
- data/.rubocop.yml +13 -0
- data/CHANGELOG.md +16 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/CONTRIBUTING.md +81 -0
- data/Gemfile +5 -0
- data/LICENSE.md +20 -0
- data/README.md +116 -0
- data/Rakefile +15 -0
- data/bin/console +16 -0
- data/bin/setup +10 -0
- data/epub-rb.gemspec +49 -0
- data/exe/epub +5 -0
- data/lib/epub/cli.rb +23 -0
- data/lib/epub/epub.rb +119 -0
- data/lib/epub/generator.rb +17 -0
- data/lib/epub/mime_type.rb +33 -0
- data/lib/epub/navigation.rb +90 -0
- data/lib/epub/templates/.keep +0 -0
- data/lib/epub/v3.rb +208 -0
- data/lib/epub/version.rb +5 -0
- data/lib/epub-rb.rb +3 -0
- data/lib/epub.rb +18 -0
- data/playground.rb +32 -0
- metadata +237 -0
data/lib/epub/epub.rb
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Epub
|
4
|
+
class Epub
|
5
|
+
# Root directory, so we can get the relative path for `:files`.
|
6
|
+
#
|
7
|
+
attr_accessor :root_dir
|
8
|
+
|
9
|
+
# EPUB spec version engine. Only EPUBv3 is supported.
|
10
|
+
#
|
11
|
+
attr_accessor :epub_engine
|
12
|
+
|
13
|
+
# The book's title.
|
14
|
+
#
|
15
|
+
attr_accessor :title
|
16
|
+
|
17
|
+
# The book's subtitle.
|
18
|
+
#
|
19
|
+
attr_accessor :subtitle
|
20
|
+
|
21
|
+
# The list of files that will be package.
|
22
|
+
# Must include everything (fonts, stylesheets, videos, etc).
|
23
|
+
#
|
24
|
+
attr_reader :files
|
25
|
+
|
26
|
+
# The path to the cover image (can be .png or .jpg).
|
27
|
+
#
|
28
|
+
attr_accessor :cover_image
|
29
|
+
|
30
|
+
# The epub identifier.
|
31
|
+
# This must be a UUIDv4. For additional ids based on identifier schemes,
|
32
|
+
# use `Epub::Epub#identifiers`.
|
33
|
+
#
|
34
|
+
attr_accessor :id
|
35
|
+
|
36
|
+
# The epub's identifier schemes. Must be a list of URN strings.
|
37
|
+
#
|
38
|
+
# URNs look like:
|
39
|
+
#
|
40
|
+
# - UUID: `urn:uuid:A1B0D67E-2E81-4DF5-9E67-A64CBE366809`
|
41
|
+
# - ISBN: `urn:isbn:9780000000001`
|
42
|
+
# - DOI: `doi:10.1016/j.iheduc.2008.03.001`
|
43
|
+
# - JDCN: `915869090000000000DD`
|
44
|
+
#
|
45
|
+
# http://idpf.github.io/epub-registries/identifiers/identifiers.html
|
46
|
+
#
|
47
|
+
attr_accessor :identifiers
|
48
|
+
|
49
|
+
# The publication date. Must be a string like `2024-01-24`.
|
50
|
+
#
|
51
|
+
attr_accessor :date
|
52
|
+
|
53
|
+
# The publisher entity.
|
54
|
+
#
|
55
|
+
attr_accessor :publisher
|
56
|
+
|
57
|
+
# A list of creators.
|
58
|
+
#
|
59
|
+
# @type String[]
|
60
|
+
#
|
61
|
+
attr_accessor :creators
|
62
|
+
|
63
|
+
# A list of contributors.
|
64
|
+
#
|
65
|
+
# @type String[]
|
66
|
+
#
|
67
|
+
attr_accessor :contributors
|
68
|
+
|
69
|
+
# The copyright notice.
|
70
|
+
#
|
71
|
+
attr_accessor :copyright
|
72
|
+
|
73
|
+
# The book language.
|
74
|
+
# Defaults to `en`.
|
75
|
+
#
|
76
|
+
attr_accessor :language
|
77
|
+
|
78
|
+
# Temporary directory. Defaults to `Dir.mktmpdir`.
|
79
|
+
#
|
80
|
+
attr_accessor :tmpdir
|
81
|
+
|
82
|
+
# Set debug mode.
|
83
|
+
# It will output debugging info, and won't remove tmpdir after book is
|
84
|
+
# generated.
|
85
|
+
#
|
86
|
+
attr_accessor :debug
|
87
|
+
alias debug? debug
|
88
|
+
|
89
|
+
def initialize(**kwargs)
|
90
|
+
kwargs.each {|key, value| public_send(:"#{key}=", value) }
|
91
|
+
|
92
|
+
self.date = Date.parse(date || Date.today.to_s)
|
93
|
+
self.id ||= SecureRandom.uuid
|
94
|
+
self.epub_engine ||= V3
|
95
|
+
self.language ||= "en"
|
96
|
+
self.tmpdir ||= Pathname.new(File.join(Dir.tmpdir, SecureRandom.uuid))
|
97
|
+
self.tmpdir = Pathname.new(tmpdir)
|
98
|
+
self.root_dir = Pathname.new(root_dir || Dir.pwd)
|
99
|
+
self.creators = Array(creators)
|
100
|
+
self.contributors = Array(contributors)
|
101
|
+
self.identifiers = Array(identifiers)
|
102
|
+
self.files ||= []
|
103
|
+
identifiers << "urn:uuid:#{id}"
|
104
|
+
end
|
105
|
+
|
106
|
+
def files=(files)
|
107
|
+
@files = Array(files).map {|file| Pathname.new(file) }
|
108
|
+
end
|
109
|
+
|
110
|
+
# Save the file at specified output path.
|
111
|
+
# If a file already exists, an error will be raised.
|
112
|
+
def save(output_path)
|
113
|
+
FileUtils.mkdir_p(tmpdir)
|
114
|
+
epub_engine.new(config: self, output_path:).save
|
115
|
+
ensure
|
116
|
+
FileUtils.rm_rf(tmpdir) unless debug?
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Epub
|
4
|
+
class Generator < Thor::Group
|
5
|
+
include Thor::Actions
|
6
|
+
|
7
|
+
attr_accessor :options
|
8
|
+
|
9
|
+
def self.source_root
|
10
|
+
File.join(__dir__, "templates")
|
11
|
+
end
|
12
|
+
|
13
|
+
no_commands do
|
14
|
+
# Add helper methods here
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Epub
|
4
|
+
module MimeType
|
5
|
+
# https://www.w3.org/TR/epub/#sec-core-media-types
|
6
|
+
def self.list
|
7
|
+
@list ||= {
|
8
|
+
".html" => "application/xhtml+xml",
|
9
|
+
".xhtml" => "application/xhtml+xml",
|
10
|
+
".png" => "image/png",
|
11
|
+
".jpeg" => "image/jpeg",
|
12
|
+
".jpg" => "image/jpeg",
|
13
|
+
".gif" => "image/gif",
|
14
|
+
".webp" => "image/webp",
|
15
|
+
".svg" => "image/svg+xml",
|
16
|
+
".woff" => "font/woff",
|
17
|
+
".woff2" => "font/woff2",
|
18
|
+
".ttf" => "font/ttf",
|
19
|
+
".otf" => "font/otf",
|
20
|
+
".css" => "text/css",
|
21
|
+
".mp3" => "audio/mpeg",
|
22
|
+
".m4a" => "audio/mp4",
|
23
|
+
".ogg" => "audio/ogg; codecs=opus",
|
24
|
+
".js" => "application/javascript",
|
25
|
+
".ncx" => "application/x-dtbncx+xml"
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.[](file)
|
30
|
+
list[File.extname(file).downcase]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Epub
|
4
|
+
class Navigation
|
5
|
+
Node = Struct.new(:level, :entry, :parent, keyword_init: true)
|
6
|
+
Entry = Struct.new(:title, :link, :navigation, keyword_init: true)
|
7
|
+
SELECTOR = "h1, h2[id], h3[id], h4[id], h5[id], h6[id]"
|
8
|
+
|
9
|
+
# Go through each files, sequentially and extract the table of contents
|
10
|
+
# hierarchy, so you don't have to do it yourself.
|
11
|
+
#
|
12
|
+
# Notice that only `h2-h6` headings with an `id` attribute will be added to
|
13
|
+
# the list. `h1` headings will always be added; if they don't have an id,
|
14
|
+
# then they'll be linked to the file itself.
|
15
|
+
#
|
16
|
+
# The output structure doesn't look like the one you're expecting, make sure
|
17
|
+
# your headings have the `id` attribute.
|
18
|
+
#
|
19
|
+
def self.extract_html(files, root_dir:)
|
20
|
+
navigation = extract(files, root_dir:)
|
21
|
+
html = renderer(navigation)
|
22
|
+
<<~HTML
|
23
|
+
<nav epub:type="toc">
|
24
|
+
#{html}
|
25
|
+
</nav>
|
26
|
+
HTML
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.renderer(navigation)
|
30
|
+
return "" if navigation.empty?
|
31
|
+
|
32
|
+
html = []
|
33
|
+
html << "<ol>"
|
34
|
+
|
35
|
+
navigation.each do |item|
|
36
|
+
title = CGI.escape_html(item.title)
|
37
|
+
|
38
|
+
html << "<li>\n"
|
39
|
+
html << %[<a href="#{item.link}">#{title}</a>]
|
40
|
+
html << renderer(item.navigation)
|
41
|
+
html << "\n</li>"
|
42
|
+
end
|
43
|
+
|
44
|
+
html << "</ol>"
|
45
|
+
|
46
|
+
html.join
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.extract(files, root_dir:)
|
50
|
+
root = Node.new(level: 0, entry: Entry.new(navigation: []))
|
51
|
+
current = root
|
52
|
+
|
53
|
+
sections = files.map do |file|
|
54
|
+
{
|
55
|
+
html: Nokogiri::HTML(File.read(file)),
|
56
|
+
path: Pathname.new(file).relative_path_from(root_dir).to_s
|
57
|
+
}
|
58
|
+
end
|
59
|
+
|
60
|
+
sections.each do |section|
|
61
|
+
section[:html].css(SELECTOR).each do |node|
|
62
|
+
title = node.text.strip
|
63
|
+
level = node.name[1].to_i
|
64
|
+
|
65
|
+
entry = Entry.new(
|
66
|
+
title:,
|
67
|
+
link: "#{section[:path]}##{node.attributes['id']}",
|
68
|
+
navigation: []
|
69
|
+
)
|
70
|
+
|
71
|
+
if level > current.level
|
72
|
+
current = Node.new(level:, entry:, parent: current)
|
73
|
+
elsif level == current.level
|
74
|
+
current = Node.new(level:, entry:, parent: current.parent)
|
75
|
+
else
|
76
|
+
while current.parent && current.parent.level >= level
|
77
|
+
current = current.parent
|
78
|
+
end
|
79
|
+
|
80
|
+
current = Node.new(level:, entry:, parent: current.parent)
|
81
|
+
end
|
82
|
+
|
83
|
+
current.parent.entry[:navigation] << entry
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
root.entry[:navigation]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
File without changes
|
data/lib/epub/v3.rb
ADDED
@@ -0,0 +1,208 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Epub
|
4
|
+
ContainerBreakoutError = Class.new(StandardError)
|
5
|
+
|
6
|
+
class V3
|
7
|
+
# The configuration object (`Epub::Epub`).
|
8
|
+
#
|
9
|
+
attr_reader :config
|
10
|
+
|
11
|
+
# The output path.
|
12
|
+
#
|
13
|
+
attr_reader :output_path
|
14
|
+
|
15
|
+
attr_reader :oebps_dir, :meta_inf_dir
|
16
|
+
|
17
|
+
def initialize(config:, output_path:)
|
18
|
+
@config = config
|
19
|
+
@output_path = Pathname.new(output_path)
|
20
|
+
@oebps_dir = config.tmpdir.join("OEBPS")
|
21
|
+
@meta_inf_dir = config.tmpdir.join("META-INF")
|
22
|
+
end
|
23
|
+
|
24
|
+
def save
|
25
|
+
create_dirs
|
26
|
+
create_mimetype_file
|
27
|
+
create_container_file
|
28
|
+
create_opf_file
|
29
|
+
copy_files
|
30
|
+
create_epub_file
|
31
|
+
end
|
32
|
+
|
33
|
+
private def create_epub_file
|
34
|
+
FileUtils.rm_rf(output_path)
|
35
|
+
|
36
|
+
Zip::File.open(output_path, Zip::File::CREATE) do |zip|
|
37
|
+
# The `mimetype` file must be stored first and it should be
|
38
|
+
# uncompressed.
|
39
|
+
zip.add_stored("mimetype", config.tmpdir.join("mimetype"))
|
40
|
+
|
41
|
+
config.tmpdir.glob("**/*").each do |source_path|
|
42
|
+
relative_path = source_path.relative_path_from(config.tmpdir)
|
43
|
+
|
44
|
+
next if source_path.directory?
|
45
|
+
next if relative_path.to_s == "mimetype"
|
46
|
+
|
47
|
+
zip.add(relative_path, source_path)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
private def copy_files
|
53
|
+
config.files.each do |source_path|
|
54
|
+
relative_path = source_path.relative_path_from(config.root_dir)
|
55
|
+
target_path = oebps_dir.join(relative_path)
|
56
|
+
|
57
|
+
if relative_path.to_s.start_with?("..")
|
58
|
+
raise ContainerBreakoutError,
|
59
|
+
"Cannot copy #{source_path.expand_path}, " \
|
60
|
+
"as it breaks out the epub container (#{relative_path}).\n" \
|
61
|
+
"Ensure your files exist within #{config.root_dir.expand_path}."
|
62
|
+
end
|
63
|
+
|
64
|
+
FileUtils.mkdir_p(target_path.dirname)
|
65
|
+
FileUtils.cp(source_path, target_path)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
private def create_mimetype_file
|
70
|
+
config.tmpdir.join("mimetype").open("w") do |file|
|
71
|
+
file << "application/epub+zip"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
private def create_dirs
|
76
|
+
oebps_dir.mkdir
|
77
|
+
meta_inf_dir.mkdir
|
78
|
+
end
|
79
|
+
|
80
|
+
private def create_container_file
|
81
|
+
meta_inf_dir.join("container.xml").open("w") do |file|
|
82
|
+
file << <<~XML
|
83
|
+
<?xml version="1.0"?>
|
84
|
+
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
85
|
+
<rootfiles>
|
86
|
+
<rootfile full-path="OEBPS/content.opf"
|
87
|
+
media-type="application/oebps-package+xml" />
|
88
|
+
</rootfiles>
|
89
|
+
</container>
|
90
|
+
XML
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
private def create_opf_file
|
95
|
+
xml = Builder::XmlMarkup.new(indent: 2)
|
96
|
+
xml.instruct! :xml, version: "1.0", encoding: "UTF-8"
|
97
|
+
xml.package(
|
98
|
+
xmlns: "http://www.idpf.org/2007/opf",
|
99
|
+
version: "3.0",
|
100
|
+
"xmlns:opf" => "http://www.idpf.org/2007/opf",
|
101
|
+
"unique-identifier" => "book-id"
|
102
|
+
) do
|
103
|
+
render_opf_metadata(xml)
|
104
|
+
render_opf_manifest(xml)
|
105
|
+
render_opf_spine(xml)
|
106
|
+
end
|
107
|
+
|
108
|
+
oebps_dir.join("content.opf").open("w") do |file|
|
109
|
+
file << xml.target!
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
private def render_opf_metadata(xml)
|
114
|
+
xml.metadata("xmlns:dc" => "http://purl.org/dc/elements/1.1/") do
|
115
|
+
xml.dc(:identifier, config.id, id: "book-id")
|
116
|
+
|
117
|
+
config.identifiers.each do |id|
|
118
|
+
xml.dc(:identifier, id)
|
119
|
+
end
|
120
|
+
|
121
|
+
xml.dc(:title, config.title, id: "book-title")
|
122
|
+
xml.meta("main", refines: "#book-title", property: "title-type")
|
123
|
+
|
124
|
+
xml.meta(config.title, property: "dcterms:title")
|
125
|
+
|
126
|
+
unless config.subtitle.to_s == ""
|
127
|
+
xml.dc(:title, config.subtitle, id: "book-subtitle")
|
128
|
+
xml.meta("subtitle", refines: "#book-subtitle",
|
129
|
+
property: "title-type")
|
130
|
+
end
|
131
|
+
|
132
|
+
xml.meta name: "cover", content: "cover-image"
|
133
|
+
|
134
|
+
xml.dc(:date, config.date.iso8601)
|
135
|
+
xml.meta("#{config.date.iso8601}T00:00:00Z",
|
136
|
+
property: "dcterms:modified")
|
137
|
+
xml.dc(:language, config.language)
|
138
|
+
xml.meta(config.language, property: "dcterms:language")
|
139
|
+
|
140
|
+
config.creators.each_with_index do |creator, index|
|
141
|
+
xml.dc(:creator, creator, id: "creator-#{index}")
|
142
|
+
xml.meta(creator, id: "creator-#{index}-meta",
|
143
|
+
property: "dcterms:creator")
|
144
|
+
end
|
145
|
+
|
146
|
+
config.contributors.each_with_index do |contributor, index|
|
147
|
+
xml.dc(:contributor, contributor, id: "contributor-#{index}")
|
148
|
+
xml.meta(contributor,
|
149
|
+
id: "contributor-#{index}-meta",
|
150
|
+
property: "dcterms:contributor")
|
151
|
+
end
|
152
|
+
|
153
|
+
xml.dc(:publisher, config.publisher)
|
154
|
+
xml.meta(config.publisher, property: "dcterms:publisher")
|
155
|
+
|
156
|
+
if config.copyright
|
157
|
+
xml.dc(:rights, config.copyright)
|
158
|
+
xml.meta(config.copyright, property: "dcterms:rights")
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
private def render_opf_manifest(xml)
|
164
|
+
xml.manifest do
|
165
|
+
config.files.each do |file|
|
166
|
+
id = guess_id(file)
|
167
|
+
props = {}
|
168
|
+
props[:properties] = "cover-image" if id == "cover-image"
|
169
|
+
props[:properties] = "nav" if id == "toc"
|
170
|
+
|
171
|
+
xml.item href: file.relative_path_from(config.root_dir),
|
172
|
+
"media-type" => guess_media_type(file),
|
173
|
+
id:,
|
174
|
+
**props
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
private def guess_id(filename)
|
180
|
+
filename = filename.basename.to_s
|
181
|
+
|
182
|
+
case filename
|
183
|
+
when /^cover\.(png|jpe?g|gif|svg)$/i
|
184
|
+
"cover-image"
|
185
|
+
when /^cover\.x?html$/i
|
186
|
+
"cover"
|
187
|
+
when /^toc\.x?html$/i
|
188
|
+
"toc"
|
189
|
+
else
|
190
|
+
filename.tr(".", "-")
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
private def guess_media_type(file)
|
195
|
+
MimeType[file]
|
196
|
+
end
|
197
|
+
|
198
|
+
private def render_opf_spine(xml)
|
199
|
+
xml.spine do
|
200
|
+
config.files.each do |file|
|
201
|
+
next unless file.extname.match?(/\.x?html$/i)
|
202
|
+
|
203
|
+
xml.itemref idref: guess_id(file)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
data/lib/epub/version.rb
ADDED
data/lib/epub-rb.rb
ADDED
data/lib/epub.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "securerandom"
|
4
|
+
require "builder"
|
5
|
+
require "zip"
|
6
|
+
require "nokogiri"
|
7
|
+
|
8
|
+
module Epub
|
9
|
+
require "epub/version"
|
10
|
+
require "epub/epub"
|
11
|
+
require "epub/v3"
|
12
|
+
require "epub/mime_type"
|
13
|
+
require "epub/navigation"
|
14
|
+
|
15
|
+
def self.new(**)
|
16
|
+
Epub.new(**)
|
17
|
+
end
|
18
|
+
end
|
data/playground.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
epub = Epub.new
|
4
|
+
epub.files += []
|
5
|
+
epub.cover_image = "cover.png"
|
6
|
+
epub.cover_page = "cover.html"
|
7
|
+
epub.toc_page = "toc.html"
|
8
|
+
epub.id = {}
|
9
|
+
epub.date = Time.now
|
10
|
+
epub.publisher = "O'Reilly"
|
11
|
+
epub.creator = "Author 1, Author 2, and Author 3"
|
12
|
+
epub.navigation = navigation
|
13
|
+
epub.save("file.epub")
|
14
|
+
|
15
|
+
# epub.title config[:title]
|
16
|
+
# epub.language config[:language]
|
17
|
+
# epub.creator config[:authors].to_sentence
|
18
|
+
# epub.publisher config[:publisher]
|
19
|
+
# epub.date config[:published_at]
|
20
|
+
# epub.uid "id"
|
21
|
+
# epub.identifier config[:identifier][:id],
|
22
|
+
# scheme: config[:identifier][:type]
|
23
|
+
|
24
|
+
# # epubchecker complains when assigning an image directly,
|
25
|
+
# # but if we don't, then Apple Books doesn't render the cover.
|
26
|
+
# # Need to investigate some more.
|
27
|
+
# # epub.cover_page cover_image if cover_image && File.exist?(cover_image)
|
28
|
+
# epub.cover_page "output/epub/cover.html"
|
29
|
+
# epub.files(sections.map(&:filepath) + assets)
|
30
|
+
# epub.nav(hierarchy)
|
31
|
+
# epub.toc_page(toc_path)
|
32
|
+
# epub.save(epub_path)
|