epub-rb 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/epub/epub.rb ADDED
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Epub
4
+ class Epub
5
+ # Root directory, so we can get the relative path for `:files`.
6
+ #
7
+ attr_accessor :root_dir
8
+
9
+ # EPUB spec version engine. Only EPUBv3 is supported.
10
+ #
11
+ attr_accessor :epub_engine
12
+
13
+ # The book's title.
14
+ #
15
+ attr_accessor :title
16
+
17
+ # The book's subtitle.
18
+ #
19
+ attr_accessor :subtitle
20
+
21
+ # The list of files that will be package.
22
+ # Must include everything (fonts, stylesheets, videos, etc).
23
+ #
24
+ attr_reader :files
25
+
26
+ # The path to the cover image (can be .png or .jpg).
27
+ #
28
+ attr_accessor :cover_image
29
+
30
+ # The epub identifier.
31
+ # This must be a UUIDv4. For additional ids based on identifier schemes,
32
+ # use `Epub::Epub#identifiers`.
33
+ #
34
+ attr_accessor :id
35
+
36
+ # The epub's identifier schemes. Must be a list of URN strings.
37
+ #
38
+ # URNs look like:
39
+ #
40
+ # - UUID: `urn:uuid:A1B0D67E-2E81-4DF5-9E67-A64CBE366809`
41
+ # - ISBN: `urn:isbn:9780000000001`
42
+ # - DOI: `doi:10.1016/j.iheduc.2008.03.001`
43
+ # - JDCN: `915869090000000000DD`
44
+ #
45
+ # http://idpf.github.io/epub-registries/identifiers/identifiers.html
46
+ #
47
+ attr_accessor :identifiers
48
+
49
+ # The publication date. Must be a string like `2024-01-24`.
50
+ #
51
+ attr_accessor :date
52
+
53
+ # The publisher entity.
54
+ #
55
+ attr_accessor :publisher
56
+
57
+ # A list of creators.
58
+ #
59
+ # @type String[]
60
+ #
61
+ attr_accessor :creators
62
+
63
+ # A list of contributors.
64
+ #
65
+ # @type String[]
66
+ #
67
+ attr_accessor :contributors
68
+
69
+ # The copyright notice.
70
+ #
71
+ attr_accessor :copyright
72
+
73
+ # The book language.
74
+ # Defaults to `en`.
75
+ #
76
+ attr_accessor :language
77
+
78
+ # Temporary directory. Defaults to `Dir.mktmpdir`.
79
+ #
80
+ attr_accessor :tmpdir
81
+
82
+ # Set debug mode.
83
+ # It will output debugging info, and won't remove tmpdir after book is
84
+ # generated.
85
+ #
86
+ attr_accessor :debug
87
+ alias debug? debug
88
+
89
+ def initialize(**kwargs)
90
+ kwargs.each {|key, value| public_send(:"#{key}=", value) }
91
+
92
+ self.date = Date.parse(date || Date.today.to_s)
93
+ self.id ||= SecureRandom.uuid
94
+ self.epub_engine ||= V3
95
+ self.language ||= "en"
96
+ self.tmpdir ||= Pathname.new(File.join(Dir.tmpdir, SecureRandom.uuid))
97
+ self.tmpdir = Pathname.new(tmpdir)
98
+ self.root_dir = Pathname.new(root_dir || Dir.pwd)
99
+ self.creators = Array(creators)
100
+ self.contributors = Array(contributors)
101
+ self.identifiers = Array(identifiers)
102
+ self.files ||= []
103
+ identifiers << "urn:uuid:#{id}"
104
+ end
105
+
106
+ def files=(files)
107
+ @files = Array(files).map {|file| Pathname.new(file) }
108
+ end
109
+
110
+ # Save the file at specified output path.
111
+ # If a file already exists, an error will be raised.
112
+ def save(output_path)
113
+ FileUtils.mkdir_p(tmpdir)
114
+ epub_engine.new(config: self, output_path:).save
115
+ ensure
116
+ FileUtils.rm_rf(tmpdir) unless debug?
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Epub
4
+ class Generator < Thor::Group
5
+ include Thor::Actions
6
+
7
+ attr_accessor :options
8
+
9
+ def self.source_root
10
+ File.join(__dir__, "templates")
11
+ end
12
+
13
+ no_commands do
14
+ # Add helper methods here
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Epub
4
+ module MimeType
5
+ # https://www.w3.org/TR/epub/#sec-core-media-types
6
+ def self.list
7
+ @list ||= {
8
+ ".html" => "application/xhtml+xml",
9
+ ".xhtml" => "application/xhtml+xml",
10
+ ".png" => "image/png",
11
+ ".jpeg" => "image/jpeg",
12
+ ".jpg" => "image/jpeg",
13
+ ".gif" => "image/gif",
14
+ ".webp" => "image/webp",
15
+ ".svg" => "image/svg+xml",
16
+ ".woff" => "font/woff",
17
+ ".woff2" => "font/woff2",
18
+ ".ttf" => "font/ttf",
19
+ ".otf" => "font/otf",
20
+ ".css" => "text/css",
21
+ ".mp3" => "audio/mpeg",
22
+ ".m4a" => "audio/mp4",
23
+ ".ogg" => "audio/ogg; codecs=opus",
24
+ ".js" => "application/javascript",
25
+ ".ncx" => "application/x-dtbncx+xml"
26
+ }
27
+ end
28
+
29
+ def self.[](file)
30
+ list[File.extname(file).downcase]
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Epub
4
+ class Navigation
5
+ Node = Struct.new(:level, :entry, :parent, keyword_init: true)
6
+ Entry = Struct.new(:title, :link, :navigation, keyword_init: true)
7
+ SELECTOR = "h1, h2[id], h3[id], h4[id], h5[id], h6[id]"
8
+
9
+ # Go through each files, sequentially and extract the table of contents
10
+ # hierarchy, so you don't have to do it yourself.
11
+ #
12
+ # Notice that only `h2-h6` headings with an `id` attribute will be added to
13
+ # the list. `h1` headings will always be added; if they don't have an id,
14
+ # then they'll be linked to the file itself.
15
+ #
16
+ # The output structure doesn't look like the one you're expecting, make sure
17
+ # your headings have the `id` attribute.
18
+ #
19
+ def self.extract_html(files, root_dir:)
20
+ navigation = extract(files, root_dir:)
21
+ html = renderer(navigation)
22
+ <<~HTML
23
+ <nav epub:type="toc">
24
+ #{html}
25
+ </nav>
26
+ HTML
27
+ end
28
+
29
+ def self.renderer(navigation)
30
+ return "" if navigation.empty?
31
+
32
+ html = []
33
+ html << "<ol>"
34
+
35
+ navigation.each do |item|
36
+ title = CGI.escape_html(item.title)
37
+
38
+ html << "<li>\n"
39
+ html << %[<a href="#{item.link}">#{title}</a>]
40
+ html << renderer(item.navigation)
41
+ html << "\n</li>"
42
+ end
43
+
44
+ html << "</ol>"
45
+
46
+ html.join
47
+ end
48
+
49
+ def self.extract(files, root_dir:)
50
+ root = Node.new(level: 0, entry: Entry.new(navigation: []))
51
+ current = root
52
+
53
+ sections = files.map do |file|
54
+ {
55
+ html: Nokogiri::HTML(File.read(file)),
56
+ path: Pathname.new(file).relative_path_from(root_dir).to_s
57
+ }
58
+ end
59
+
60
+ sections.each do |section|
61
+ section[:html].css(SELECTOR).each do |node|
62
+ title = node.text.strip
63
+ level = node.name[1].to_i
64
+
65
+ entry = Entry.new(
66
+ title:,
67
+ link: "#{section[:path]}##{node.attributes['id']}",
68
+ navigation: []
69
+ )
70
+
71
+ if level > current.level
72
+ current = Node.new(level:, entry:, parent: current)
73
+ elsif level == current.level
74
+ current = Node.new(level:, entry:, parent: current.parent)
75
+ else
76
+ while current.parent && current.parent.level >= level
77
+ current = current.parent
78
+ end
79
+
80
+ current = Node.new(level:, entry:, parent: current.parent)
81
+ end
82
+
83
+ current.parent.entry[:navigation] << entry
84
+ end
85
+ end
86
+
87
+ root.entry[:navigation]
88
+ end
89
+ end
90
+ end
File without changes
data/lib/epub/v3.rb ADDED
@@ -0,0 +1,208 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Epub
4
+ ContainerBreakoutError = Class.new(StandardError)
5
+
6
+ class V3
7
+ # The configuration object (`Epub::Epub`).
8
+ #
9
+ attr_reader :config
10
+
11
+ # The output path.
12
+ #
13
+ attr_reader :output_path
14
+
15
+ attr_reader :oebps_dir, :meta_inf_dir
16
+
17
+ def initialize(config:, output_path:)
18
+ @config = config
19
+ @output_path = Pathname.new(output_path)
20
+ @oebps_dir = config.tmpdir.join("OEBPS")
21
+ @meta_inf_dir = config.tmpdir.join("META-INF")
22
+ end
23
+
24
+ def save
25
+ create_dirs
26
+ create_mimetype_file
27
+ create_container_file
28
+ create_opf_file
29
+ copy_files
30
+ create_epub_file
31
+ end
32
+
33
+ private def create_epub_file
34
+ FileUtils.rm_rf(output_path)
35
+
36
+ Zip::File.open(output_path, Zip::File::CREATE) do |zip|
37
+ # The `mimetype` file must be stored first and it should be
38
+ # uncompressed.
39
+ zip.add_stored("mimetype", config.tmpdir.join("mimetype"))
40
+
41
+ config.tmpdir.glob("**/*").each do |source_path|
42
+ relative_path = source_path.relative_path_from(config.tmpdir)
43
+
44
+ next if source_path.directory?
45
+ next if relative_path.to_s == "mimetype"
46
+
47
+ zip.add(relative_path, source_path)
48
+ end
49
+ end
50
+ end
51
+
52
+ private def copy_files
53
+ config.files.each do |source_path|
54
+ relative_path = source_path.relative_path_from(config.root_dir)
55
+ target_path = oebps_dir.join(relative_path)
56
+
57
+ if relative_path.to_s.start_with?("..")
58
+ raise ContainerBreakoutError,
59
+ "Cannot copy #{source_path.expand_path}, " \
60
+ "as it breaks out the epub container (#{relative_path}).\n" \
61
+ "Ensure your files exist within #{config.root_dir.expand_path}."
62
+ end
63
+
64
+ FileUtils.mkdir_p(target_path.dirname)
65
+ FileUtils.cp(source_path, target_path)
66
+ end
67
+ end
68
+
69
+ private def create_mimetype_file
70
+ config.tmpdir.join("mimetype").open("w") do |file|
71
+ file << "application/epub+zip"
72
+ end
73
+ end
74
+
75
+ private def create_dirs
76
+ oebps_dir.mkdir
77
+ meta_inf_dir.mkdir
78
+ end
79
+
80
+ private def create_container_file
81
+ meta_inf_dir.join("container.xml").open("w") do |file|
82
+ file << <<~XML
83
+ <?xml version="1.0"?>
84
+ <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
85
+ <rootfiles>
86
+ <rootfile full-path="OEBPS/content.opf"
87
+ media-type="application/oebps-package+xml" />
88
+ </rootfiles>
89
+ </container>
90
+ XML
91
+ end
92
+ end
93
+
94
+ private def create_opf_file
95
+ xml = Builder::XmlMarkup.new(indent: 2)
96
+ xml.instruct! :xml, version: "1.0", encoding: "UTF-8"
97
+ xml.package(
98
+ xmlns: "http://www.idpf.org/2007/opf",
99
+ version: "3.0",
100
+ "xmlns:opf" => "http://www.idpf.org/2007/opf",
101
+ "unique-identifier" => "book-id"
102
+ ) do
103
+ render_opf_metadata(xml)
104
+ render_opf_manifest(xml)
105
+ render_opf_spine(xml)
106
+ end
107
+
108
+ oebps_dir.join("content.opf").open("w") do |file|
109
+ file << xml.target!
110
+ end
111
+ end
112
+
113
+ private def render_opf_metadata(xml)
114
+ xml.metadata("xmlns:dc" => "http://purl.org/dc/elements/1.1/") do
115
+ xml.dc(:identifier, config.id, id: "book-id")
116
+
117
+ config.identifiers.each do |id|
118
+ xml.dc(:identifier, id)
119
+ end
120
+
121
+ xml.dc(:title, config.title, id: "book-title")
122
+ xml.meta("main", refines: "#book-title", property: "title-type")
123
+
124
+ xml.meta(config.title, property: "dcterms:title")
125
+
126
+ unless config.subtitle.to_s == ""
127
+ xml.dc(:title, config.subtitle, id: "book-subtitle")
128
+ xml.meta("subtitle", refines: "#book-subtitle",
129
+ property: "title-type")
130
+ end
131
+
132
+ xml.meta name: "cover", content: "cover-image"
133
+
134
+ xml.dc(:date, config.date.iso8601)
135
+ xml.meta("#{config.date.iso8601}T00:00:00Z",
136
+ property: "dcterms:modified")
137
+ xml.dc(:language, config.language)
138
+ xml.meta(config.language, property: "dcterms:language")
139
+
140
+ config.creators.each_with_index do |creator, index|
141
+ xml.dc(:creator, creator, id: "creator-#{index}")
142
+ xml.meta(creator, id: "creator-#{index}-meta",
143
+ property: "dcterms:creator")
144
+ end
145
+
146
+ config.contributors.each_with_index do |contributor, index|
147
+ xml.dc(:contributor, contributor, id: "contributor-#{index}")
148
+ xml.meta(contributor,
149
+ id: "contributor-#{index}-meta",
150
+ property: "dcterms:contributor")
151
+ end
152
+
153
+ xml.dc(:publisher, config.publisher)
154
+ xml.meta(config.publisher, property: "dcterms:publisher")
155
+
156
+ if config.copyright
157
+ xml.dc(:rights, config.copyright)
158
+ xml.meta(config.copyright, property: "dcterms:rights")
159
+ end
160
+ end
161
+ end
162
+
163
+ private def render_opf_manifest(xml)
164
+ xml.manifest do
165
+ config.files.each do |file|
166
+ id = guess_id(file)
167
+ props = {}
168
+ props[:properties] = "cover-image" if id == "cover-image"
169
+ props[:properties] = "nav" if id == "toc"
170
+
171
+ xml.item href: file.relative_path_from(config.root_dir),
172
+ "media-type" => guess_media_type(file),
173
+ id:,
174
+ **props
175
+ end
176
+ end
177
+ end
178
+
179
+ private def guess_id(filename)
180
+ filename = filename.basename.to_s
181
+
182
+ case filename
183
+ when /^cover\.(png|jpe?g|gif|svg)$/i
184
+ "cover-image"
185
+ when /^cover\.x?html$/i
186
+ "cover"
187
+ when /^toc\.x?html$/i
188
+ "toc"
189
+ else
190
+ filename.tr(".", "-")
191
+ end
192
+ end
193
+
194
+ private def guess_media_type(file)
195
+ MimeType[file]
196
+ end
197
+
198
+ private def render_opf_spine(xml)
199
+ xml.spine do
200
+ config.files.each do |file|
201
+ next unless file.extname.match?(/\.x?html$/i)
202
+
203
+ xml.itemref idref: guess_id(file)
204
+ end
205
+ end
206
+ end
207
+ end
208
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Epub
4
+ VERSION = "0.0.0"
5
+ end
data/lib/epub-rb.rb ADDED
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "epub"
data/lib/epub.rb ADDED
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "securerandom"
4
+ require "builder"
5
+ require "zip"
6
+ require "nokogiri"
7
+
8
+ module Epub
9
+ require "epub/version"
10
+ require "epub/epub"
11
+ require "epub/v3"
12
+ require "epub/mime_type"
13
+ require "epub/navigation"
14
+
15
+ def self.new(**)
16
+ Epub.new(**)
17
+ end
18
+ end
data/playground.rb ADDED
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ epub = Epub.new
4
+ epub.files += []
5
+ epub.cover_image = "cover.png"
6
+ epub.cover_page = "cover.html"
7
+ epub.toc_page = "toc.html"
8
+ epub.id = {}
9
+ epub.date = Time.now
10
+ epub.publisher = "O'Reilly"
11
+ epub.creator = "Author 1, Author 2, and Author 3"
12
+ epub.navigation = navigation
13
+ epub.save("file.epub")
14
+
15
+ # epub.title config[:title]
16
+ # epub.language config[:language]
17
+ # epub.creator config[:authors].to_sentence
18
+ # epub.publisher config[:publisher]
19
+ # epub.date config[:published_at]
20
+ # epub.uid "id"
21
+ # epub.identifier config[:identifier][:id],
22
+ # scheme: config[:identifier][:type]
23
+
24
+ # # epubchecker complains when assigning an image directly,
25
+ # # but if we don't, then Apple Books doesn't render the cover.
26
+ # # Need to investigate some more.
27
+ # # epub.cover_page cover_image if cover_image && File.exist?(cover_image)
28
+ # epub.cover_page "output/epub/cover.html"
29
+ # epub.files(sections.map(&:filepath) + assets)
30
+ # epub.nav(hierarchy)
31
+ # epub.toc_page(toc_path)
32
+ # epub.save(epub_path)