epub_tools 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +3 -0
- data/.rubocop.yml +10 -17
- data/CLAUDE.md +124 -0
- data/Gemfile +4 -4
- data/Gemfile.lock +39 -34
- data/Rakefile +2 -0
- data/bin/epub-tools +2 -0
- data/epub_tools.gemspec +3 -1
- data/lib/epub_tools/add_chapters.rb +47 -29
- data/lib/epub_tools/chapter_validator.rb +40 -0
- data/lib/epub_tools/cli/command_options_configurator.rb +115 -0
- data/lib/epub_tools/cli/command_registry.rb +2 -0
- data/lib/epub_tools/cli/option_builder.rb +5 -3
- data/lib/epub_tools/cli/runner.rb +59 -110
- data/lib/epub_tools/cli.rb +16 -29
- data/lib/epub_tools/compile_book.rb +48 -65
- data/lib/epub_tools/compile_workspace.rb +40 -0
- data/lib/epub_tools/epub_configuration.rb +33 -0
- data/lib/epub_tools/epub_file_writer.rb +57 -0
- data/lib/epub_tools/epub_initializer.rb +83 -162
- data/lib/epub_tools/epub_metadata_builder.rb +92 -0
- data/lib/epub_tools/loggable.rb +2 -0
- data/lib/epub_tools/pack_ebook.rb +28 -14
- data/lib/epub_tools/split_chapters.rb +42 -17
- data/lib/epub_tools/style_finder.rb +17 -6
- data/lib/epub_tools/unpack_ebook.rb +20 -10
- data/lib/epub_tools/version.rb +3 -1
- data/lib/epub_tools/xhtml_cleaner.rb +1 -0
- data/lib/epub_tools/xhtml_extractor.rb +20 -10
- data/lib/epub_tools/xhtml_generator.rb +71 -0
- data/lib/epub_tools.rb +2 -0
- data/test/add_chapters_test.rb +49 -25
- data/test/chapter_validator_test.rb +47 -0
- data/test/cli/command_registry_test.rb +2 -0
- data/test/cli/option_builder_test.rb +24 -14
- data/test/cli/runner_test.rb +15 -15
- data/test/cli_commands_test.rb +2 -0
- data/test/cli_test.rb +2 -0
- data/test/cli_version_test.rb +2 -0
- data/test/compile_book_test.rb +17 -102
- data/test/compile_workspace_test.rb +55 -0
- data/test/epub_initializer_test.rb +55 -27
- data/test/pack_ebook_test.rb +33 -9
- data/test/split_chapters_test.rb +27 -7
- data/test/style_finder_test.rb +2 -0
- data/test/test_helper.rb +2 -0
- data/test/unpack_ebook_test.rb +45 -20
- data/test/xhtml_cleaner_test.rb +2 -0
- data/test/xhtml_extractor_test.rb +3 -1
- metadata +13 -3
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
2
4
|
require 'fileutils'
|
|
3
|
-
require 'time'
|
|
4
|
-
require 'securerandom'
|
|
5
5
|
require_relative 'loggable'
|
|
6
|
+
require_relative 'xhtml_generator'
|
|
7
|
+
require_relative 'epub_metadata_builder'
|
|
8
|
+
require_relative 'epub_file_writer'
|
|
9
|
+
require_relative 'epub_configuration'
|
|
6
10
|
|
|
7
11
|
module EpubTools
|
|
8
12
|
# Sets up a basic empty EPUB directory structure with the basic files created:
|
|
@@ -15,6 +19,7 @@ module EpubTools
|
|
|
15
19
|
# - cover image (optionally)
|
|
16
20
|
class EpubInitializer
|
|
17
21
|
include Loggable
|
|
22
|
+
|
|
18
23
|
# Initializes the class
|
|
19
24
|
# @param options [Hash] Configuration options
|
|
20
25
|
# @option options [String] :title Book title (required)
|
|
@@ -23,197 +28,113 @@ module EpubTools
|
|
|
23
28
|
# @option options [String] :cover_image Optional path to the cover image
|
|
24
29
|
# @option options [Boolean] :verbose Whether to print progress to STDOUT (default: false)
|
|
25
30
|
def initialize(options = {})
|
|
26
|
-
@
|
|
27
|
-
@
|
|
28
|
-
@
|
|
29
|
-
@
|
|
30
|
-
@
|
|
31
|
-
@cover_image_path = options[:cover_image]
|
|
32
|
-
@cover_image_fname = nil
|
|
33
|
-
@cover_image_media_type = nil
|
|
34
|
-
@verbose = options[:verbose] || false
|
|
31
|
+
@config = EpubConfiguration.new(options)
|
|
32
|
+
@verbose = @config.verbose
|
|
33
|
+
@xhtml_generator = create_xhtml_generator
|
|
34
|
+
@metadata_builder = create_metadata_builder
|
|
35
|
+
@file_writer = create_file_writer
|
|
35
36
|
end
|
|
36
37
|
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def create_xhtml_generator
|
|
41
|
+
XhtmlGenerator.new(title: @config.title, author: @config.author)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def create_metadata_builder
|
|
45
|
+
EpubMetadataBuilder.new(@config)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def create_file_writer
|
|
49
|
+
EpubFileWriter.new(@config.destination)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
public
|
|
53
|
+
|
|
37
54
|
# Creates the empty ebook and returns the directory
|
|
38
55
|
def run
|
|
39
|
-
create_structure
|
|
40
|
-
write_mimetype
|
|
56
|
+
@file_writer.create_structure
|
|
57
|
+
@file_writer.write_mimetype
|
|
41
58
|
write_title_page
|
|
42
|
-
write_container
|
|
43
|
-
write_cover if @cover_image_path
|
|
59
|
+
@file_writer.write_container
|
|
60
|
+
write_cover if @config.cover_image_path
|
|
44
61
|
write_package_opf
|
|
45
62
|
write_nav
|
|
46
|
-
write_style
|
|
47
|
-
log "Created empty ebook structure at: #{@destination}"
|
|
48
|
-
@destination
|
|
63
|
+
@file_writer.write_style
|
|
64
|
+
log "Created empty ebook structure at: #{@config.destination}"
|
|
65
|
+
@config.destination
|
|
49
66
|
end
|
|
50
67
|
|
|
51
68
|
private
|
|
52
69
|
|
|
53
|
-
def
|
|
54
|
-
|
|
55
|
-
|
|
70
|
+
def write_title_page
|
|
71
|
+
content = @xhtml_generator.build_title_page
|
|
72
|
+
@file_writer.write_xhtml('title.xhtml', content)
|
|
56
73
|
end
|
|
57
74
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
75
|
+
# Copies the cover image into the EPUB structure and creates a cover.xhtml page
|
|
76
|
+
def write_cover
|
|
77
|
+
return unless cover_image_exists?
|
|
61
78
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
<p class="author">by #{@author}</p>
|
|
74
|
-
</body>
|
|
75
|
-
</html>
|
|
76
|
-
XHTML
|
|
77
|
-
|
|
78
|
-
File.write("#{@destination}/OEBPS/title.xhtml", content)
|
|
79
|
+
ext = File.extname(@config.cover_image_path).downcase
|
|
80
|
+
media_type = determine_media_type(ext)
|
|
81
|
+
return unless media_type
|
|
82
|
+
|
|
83
|
+
fname = "cover#{ext}"
|
|
84
|
+
@config.update_cover_info(fname, media_type)
|
|
85
|
+
@xhtml_generator.cover_image_fname = fname
|
|
86
|
+
update_metadata_builder_with_cover_info
|
|
87
|
+
|
|
88
|
+
copy_cover_image(ext)
|
|
89
|
+
write_cover_page
|
|
79
90
|
end
|
|
80
91
|
|
|
81
|
-
def
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
<rootfile full-path="OEBPS/package.opf" media-type="application/oebps-package+xml"/>
|
|
87
|
-
</rootfiles>
|
|
88
|
-
</container>
|
|
89
|
-
XML
|
|
90
|
-
File.write("#{@destination}/META-INF/container.xml", content)
|
|
92
|
+
def cover_image_exists?
|
|
93
|
+
return true if File.exist?(@config.cover_image_path)
|
|
94
|
+
|
|
95
|
+
warn "Warning: cover image '#{@config.cover_image_path}' not found; skipping cover support."
|
|
96
|
+
false
|
|
91
97
|
end
|
|
92
98
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
+
def determine_media_type(ext)
|
|
100
|
+
case ext
|
|
101
|
+
when '.jpg', '.jpeg' then 'image/jpeg'
|
|
102
|
+
when '.png' then 'image/png'
|
|
103
|
+
when '.gif' then 'image/gif'
|
|
104
|
+
when '.svg' then 'image/svg+xml'
|
|
105
|
+
else
|
|
106
|
+
warn "Warning: unsupported cover image type '#{ext}'; skipping cover support."
|
|
107
|
+
nil
|
|
99
108
|
end
|
|
100
|
-
ext = File.extname(path).downcase
|
|
101
|
-
@cover_image_media_type = case ext
|
|
102
|
-
when '.jpg', '.jpeg' then 'image/jpeg'
|
|
103
|
-
when '.png' then 'image/png'
|
|
104
|
-
when '.gif' then 'image/gif'
|
|
105
|
-
when '.svg' then 'image/svg+xml'
|
|
106
|
-
else
|
|
107
|
-
warn "Warning: unsupported cover image type '#{ext}'; skipping cover support."
|
|
108
|
-
return
|
|
109
|
-
end
|
|
110
|
-
@cover_image_fname = "cover#{ext}"
|
|
111
|
-
dest = File.join(@destination, 'OEBPS', @cover_image_fname)
|
|
112
|
-
FileUtils.cp(path, dest)
|
|
113
|
-
write_cover_page
|
|
114
109
|
end
|
|
115
110
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
content = <<~XHTML
|
|
119
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
120
|
-
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
|
|
121
|
-
<head>
|
|
122
|
-
<meta charset="UTF-8" />
|
|
123
|
-
<title>Cover</title>
|
|
124
|
-
<link rel="stylesheet" type="text/css" href="style.css"/>
|
|
125
|
-
</head>
|
|
126
|
-
<body>
|
|
127
|
-
<div class="cover-image">
|
|
128
|
-
<img src="#{@cover_image_fname}" alt="Cover"/>
|
|
129
|
-
</div>
|
|
130
|
-
</body>
|
|
131
|
-
</html>
|
|
132
|
-
XHTML
|
|
133
|
-
File.write(File.join(@destination, 'OEBPS', 'cover.xhtml'), content)
|
|
111
|
+
def update_metadata_builder_with_cover_info
|
|
112
|
+
@metadata_builder = EpubMetadataBuilder.new(@config)
|
|
134
113
|
end
|
|
135
114
|
|
|
136
|
-
def
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
115
|
+
def copy_cover_image(_ext)
|
|
116
|
+
dest = File.join(@config.destination, 'OEBPS', @config.cover_image_fname)
|
|
117
|
+
FileUtils.cp(@config.cover_image_path, dest)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Generates a cover.xhtml file displaying the cover image
|
|
121
|
+
def write_cover_page
|
|
122
|
+
content = @xhtml_generator.build_cover_page
|
|
123
|
+
@file_writer.write_xhtml('cover.xhtml', content)
|
|
140
124
|
end
|
|
141
125
|
|
|
142
126
|
# Generates the package.opf with optional cover image entries
|
|
143
127
|
def write_package_opf
|
|
144
|
-
manifest_items =
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
if @cover_image_fname
|
|
150
|
-
manifest_items << mitem('cover-image', @cover_image_fname, @cover_image_media_type, 'cover-image')
|
|
151
|
-
manifest_items << mitem('cover-page', 'cover.xhtml', 'application/xhtml+xml')
|
|
152
|
-
spine_items << '<itemref idref="cover-page"/>'
|
|
153
|
-
end
|
|
154
|
-
|
|
155
|
-
manifest_items << mitem('title', 'title.xhtml', 'application/xhtml+xml')
|
|
156
|
-
spine_items << '<itemref idref="title"/>'
|
|
157
|
-
|
|
158
|
-
metadata = []
|
|
159
|
-
metadata << %(<dc:identifier id="pub-id">#{@uuid}</dc:identifier>)
|
|
160
|
-
metadata << %(<dc:title>#{@title}</dc:title>)
|
|
161
|
-
metadata << %(<dc:creator>#{@author}</dc:creator>)
|
|
162
|
-
metadata << '<dc:language>en</dc:language>'
|
|
163
|
-
metadata << %(<meta property="dcterms:modified">#{@modified}</meta>)
|
|
164
|
-
metadata << %(<meta property="schema:accessMode">textual</meta>)
|
|
165
|
-
metadata << %(<meta property="schema:accessibilityFeature">unknown</meta>)
|
|
166
|
-
metadata << %(<meta property="schema:accessibilityHazard">none</meta>)
|
|
167
|
-
metadata << %(<meta property="schema:accessModeSufficient">textual</meta>)
|
|
168
|
-
metadata << %(<meta name="cover" content="cover-image"/>) if @cover_image_fname
|
|
169
|
-
|
|
170
|
-
content = <<~XML
|
|
171
|
-
<?xml version="1.0" encoding="utf-8"?>
|
|
172
|
-
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="pub-id" xml:lang="en">
|
|
173
|
-
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
|
|
174
|
-
#{metadata.map { |line| " #{line}" }.join("\n")}
|
|
175
|
-
</metadata>
|
|
176
|
-
<manifest>
|
|
177
|
-
#{manifest_items.map { |line| " #{line}" }.join("\n")}
|
|
178
|
-
</manifest>
|
|
179
|
-
<spine>
|
|
180
|
-
#{spine_items.map { |line| " #{line}" }.join("\n")}
|
|
181
|
-
</spine>
|
|
182
|
-
</package>
|
|
183
|
-
XML
|
|
184
|
-
|
|
185
|
-
File.write(File.join(@destination, 'OEBPS', 'package.opf'), content)
|
|
128
|
+
manifest_items, spine_items = @metadata_builder.build_manifest_and_spine
|
|
129
|
+
metadata = @metadata_builder.build_metadata
|
|
130
|
+
content = @metadata_builder.build_opf_xml(metadata, manifest_items, spine_items)
|
|
131
|
+
@file_writer.write_package_opf(content)
|
|
186
132
|
end
|
|
187
133
|
|
|
188
134
|
# Generates the initial navigation document (Table of Contents)
|
|
189
135
|
def write_nav
|
|
190
|
-
content =
|
|
191
|
-
|
|
192
|
-
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="en">
|
|
193
|
-
<head>
|
|
194
|
-
<title>Table of Contents</title>
|
|
195
|
-
</head>
|
|
196
|
-
<body>
|
|
197
|
-
<nav epub:type="toc" id="toc">
|
|
198
|
-
<h1>Table of Contents</h1>
|
|
199
|
-
<ol>
|
|
200
|
-
<li><a href="title.xhtml">Title Page</a></li>
|
|
201
|
-
</ol>
|
|
202
|
-
</nav>
|
|
203
|
-
</body>
|
|
204
|
-
</html>
|
|
205
|
-
XHTML
|
|
206
|
-
File.write(File.join(@destination, 'OEBPS', 'nav.xhtml'), content)
|
|
207
|
-
end
|
|
208
|
-
|
|
209
|
-
def write_style
|
|
210
|
-
src = File.join(Dir.pwd, 'style.css')
|
|
211
|
-
dest = File.join(@destination, 'OEBPS', 'style.css')
|
|
212
|
-
unless File.exist?(src)
|
|
213
|
-
warn "Warning: style.css not found in project root (#{src}), skipping copy."
|
|
214
|
-
return
|
|
215
|
-
end
|
|
216
|
-
FileUtils.cp(src, dest)
|
|
136
|
+
content = @xhtml_generator.build_nav_page
|
|
137
|
+
@file_writer.write_xhtml('nav.xhtml', content)
|
|
217
138
|
end
|
|
218
139
|
end
|
|
219
140
|
end
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module EpubTools
|
|
4
|
+
# Builds metadata content for EPUB package.opf files
|
|
5
|
+
class EpubMetadataBuilder
|
|
6
|
+
def initialize(config)
|
|
7
|
+
@config = config
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# Builds complete metadata array
|
|
11
|
+
def build_metadata
|
|
12
|
+
metadata = []
|
|
13
|
+
add_dublin_core_metadata(metadata)
|
|
14
|
+
add_schema_metadata(metadata)
|
|
15
|
+
add_cover_metadata(metadata) if @config.cover_image_fname
|
|
16
|
+
metadata
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Builds manifest and spine items
|
|
20
|
+
def build_manifest_and_spine
|
|
21
|
+
manifest_items = []
|
|
22
|
+
spine_items = []
|
|
23
|
+
|
|
24
|
+
add_base_manifest_items(manifest_items)
|
|
25
|
+
add_cover_items(manifest_items, spine_items) if @config.cover_image_fname
|
|
26
|
+
add_title_items(manifest_items, spine_items)
|
|
27
|
+
|
|
28
|
+
[manifest_items, spine_items]
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Builds complete OPF XML content
|
|
32
|
+
def build_opf_xml(metadata, manifest_items, spine_items)
|
|
33
|
+
<<~XML
|
|
34
|
+
<?xml version="1.0" encoding="utf-8"?>
|
|
35
|
+
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="pub-id" xml:lang="en">
|
|
36
|
+
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
|
|
37
|
+
#{metadata.map { |line| " #{line}" }.join("\n")}
|
|
38
|
+
</metadata>
|
|
39
|
+
<manifest>
|
|
40
|
+
#{manifest_items.map { |line| " #{line}" }.join("\n")}
|
|
41
|
+
</manifest>
|
|
42
|
+
<spine>
|
|
43
|
+
#{spine_items.map { |line| " #{line}" }.join("\n")}
|
|
44
|
+
</spine>
|
|
45
|
+
</package>
|
|
46
|
+
XML
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def add_dublin_core_metadata(metadata)
|
|
52
|
+
metadata << %(<dc:identifier id="pub-id">#{@config.uuid}</dc:identifier>)
|
|
53
|
+
metadata << %(<dc:title>#{@config.title}</dc:title>)
|
|
54
|
+
metadata << %(<dc:creator>#{@config.author}</dc:creator>)
|
|
55
|
+
metadata << '<dc:language>en</dc:language>'
|
|
56
|
+
metadata << %(<meta property="dcterms:modified">#{@config.modified}</meta>)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def add_schema_metadata(metadata)
|
|
60
|
+
metadata << %(<meta property="schema:accessMode">textual</meta>)
|
|
61
|
+
metadata << %(<meta property="schema:accessibilityFeature">unknown</meta>)
|
|
62
|
+
metadata << %(<meta property="schema:accessibilityHazard">none</meta>)
|
|
63
|
+
metadata << %(<meta property="schema:accessModeSufficient">textual</meta>)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def add_cover_metadata(metadata)
|
|
67
|
+
metadata << %(<meta name="cover" content="cover-image"/>)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def add_base_manifest_items(manifest_items)
|
|
71
|
+
manifest_items << mitem('style', 'style.css', 'text/css')
|
|
72
|
+
manifest_items << mitem('nav', 'nav.xhtml', 'application/xhtml+xml', 'nav')
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def add_cover_items(manifest_items, spine_items)
|
|
76
|
+
manifest_items << mitem('cover-image', @config.cover_image_fname, @config.cover_image_media_type, 'cover-image')
|
|
77
|
+
manifest_items << mitem('cover-page', 'cover.xhtml', 'application/xhtml+xml')
|
|
78
|
+
spine_items << '<itemref idref="cover-page"/>'
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def add_title_items(manifest_items, spine_items)
|
|
82
|
+
manifest_items << mitem('title', 'title.xhtml', 'application/xhtml+xml')
|
|
83
|
+
spine_items << '<itemref idref="title"/>'
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def mitem(id, href, type, properties = nil)
|
|
87
|
+
xml = "<item id=\"#{id}\" href=\"#{href}\" media-type=\"#{type}\""
|
|
88
|
+
xml += " properties=\"#{properties}\"" if properties
|
|
89
|
+
"#{xml}/>"
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
data/lib/epub_tools/loggable.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'zip'
|
|
2
4
|
require 'fileutils'
|
|
3
5
|
require 'pathname'
|
|
@@ -7,6 +9,7 @@ module EpubTools
|
|
|
7
9
|
# Packages an EPUB directory into a .epub file
|
|
8
10
|
class PackEbook
|
|
9
11
|
include Loggable
|
|
12
|
+
|
|
10
13
|
# Initializes the class
|
|
11
14
|
# @param options [Hash] Configuration options
|
|
12
15
|
# @option options [String] :input_dir Path to the EPUB directory (containing mimetype, META-INF, OEBPS) (required)
|
|
@@ -28,21 +31,9 @@ module EpubTools
|
|
|
28
31
|
def run
|
|
29
32
|
validate_input!
|
|
30
33
|
Dir.chdir(@input_dir) do
|
|
31
|
-
|
|
32
|
-
target = Pathname.new(@output_file).absolute? ? @output_file : File.join('..', @output_file)
|
|
34
|
+
target = determine_output_path
|
|
33
35
|
FileUtils.rm_f(target)
|
|
34
|
-
|
|
35
|
-
# Add mimetype first and uncompressed
|
|
36
|
-
add_mimetype(zip)
|
|
37
|
-
|
|
38
|
-
# Add all other files with compression, preserving paths
|
|
39
|
-
Dir.glob('**/*', File::FNM_DOTMATCH).sort.each do |entry|
|
|
40
|
-
next if ['.', '..', 'mimetype'].include?(entry)
|
|
41
|
-
next if File.directory?(entry)
|
|
42
|
-
|
|
43
|
-
zip.add(entry, entry)
|
|
44
|
-
end
|
|
45
|
-
end
|
|
36
|
+
create_zip_file(target)
|
|
46
37
|
end
|
|
47
38
|
log "EPUB created: #{@output_file}"
|
|
48
39
|
@output_file
|
|
@@ -50,6 +41,29 @@ module EpubTools
|
|
|
50
41
|
|
|
51
42
|
private
|
|
52
43
|
|
|
44
|
+
def determine_output_path
|
|
45
|
+
# determine the output path: absolute stays as-is, otherwise sibling to input_dir
|
|
46
|
+
Pathname.new(@output_file).absolute? ? @output_file : File.join('..', @output_file)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def create_zip_file(target)
|
|
50
|
+
Zip::File.open(target, create: true) do |zip|
|
|
51
|
+
# Add mimetype first and uncompressed
|
|
52
|
+
add_mimetype(zip)
|
|
53
|
+
add_content_files(zip)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def add_content_files(zip)
|
|
58
|
+
# Add all other files with compression, preserving paths
|
|
59
|
+
Dir.glob('**/*', File::FNM_DOTMATCH).sort.each do |entry|
|
|
60
|
+
next if ['.', '..', 'mimetype'].include?(entry)
|
|
61
|
+
next if File.directory?(entry)
|
|
62
|
+
|
|
63
|
+
zip.add(entry, entry)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
53
67
|
def validate_input!
|
|
54
68
|
raise ArgumentError, "Directory '#{@input_dir}' does not exist." unless Dir.exist?(@input_dir)
|
|
55
69
|
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
2
4
|
require 'nokogiri'
|
|
3
5
|
require 'yaml'
|
|
4
6
|
require 'fileutils'
|
|
@@ -16,6 +18,7 @@ module EpubTools
|
|
|
16
18
|
# - Saves those files to +output_dir+
|
|
17
19
|
class SplitChapters
|
|
18
20
|
include Loggable
|
|
21
|
+
|
|
19
22
|
# Initializes the class
|
|
20
23
|
# @param options [Hash] Configuration options
|
|
21
24
|
# @option options [String] :input_file Path to the source XHTML (required)
|
|
@@ -60,21 +63,39 @@ module EpubTools
|
|
|
60
63
|
current_fragment = nil
|
|
61
64
|
|
|
62
65
|
doc.at('body').children.each do |node|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
66
|
+
current_number, current_fragment = process_node(node, chapters, current_number, current_fragment)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
finalize_chapters(chapters, current_number, current_fragment)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def process_node(node, chapters, current_number, current_fragment)
|
|
73
|
+
if chapter_marker?(node)
|
|
74
|
+
start_new_chapter(chapters, node, current_number, current_fragment)
|
|
75
|
+
elsif prologue_marker?(node)
|
|
76
|
+
start_prologue(chapters, current_number, current_fragment)
|
|
77
|
+
else
|
|
78
|
+
current_fragment&.add_child(node.dup)
|
|
79
|
+
[current_number, current_fragment]
|
|
76
80
|
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def chapter_marker?(node)
|
|
84
|
+
node.text.match?(/Chapter\s+\d+/i) && %w[p span h2 h3 h4].include?(node.name)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def start_new_chapter(chapters, node, current_number, current_fragment)
|
|
88
|
+
chapters[current_number] = current_fragment.to_html if current_number
|
|
89
|
+
chapter_number = node.text.match(/Chapter\s+(\d+)/i)[1].to_i
|
|
90
|
+
[chapter_number, Nokogiri::HTML::DocumentFragment.parse('')]
|
|
91
|
+
end
|
|
77
92
|
|
|
93
|
+
def start_prologue(chapters, current_number, current_fragment)
|
|
94
|
+
chapters[current_number] = current_fragment.to_html if current_number
|
|
95
|
+
[0, Nokogiri::HTML::DocumentFragment.parse('')]
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def finalize_chapters(chapters, current_number, current_fragment)
|
|
78
99
|
chapters[current_number] = current_fragment.to_html if current_number
|
|
79
100
|
chapters
|
|
80
101
|
end
|
|
@@ -91,7 +112,14 @@ module EpubTools
|
|
|
91
112
|
def write_chapter_file(label, content)
|
|
92
113
|
display_label = display_label(label)
|
|
93
114
|
filename = File.join(@output_dir, "#{@output_prefix}_#{label}.xhtml")
|
|
94
|
-
File.write(filename,
|
|
115
|
+
File.write(filename, build_xhtml_template(display_label, content))
|
|
116
|
+
XHTMLCleaner.new({ filename: filename }).run
|
|
117
|
+
log("Extracted: #{filename}")
|
|
118
|
+
filename
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def build_xhtml_template(display_label, content)
|
|
122
|
+
<<~HTML
|
|
95
123
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
96
124
|
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
|
|
97
125
|
<head>
|
|
@@ -104,9 +132,6 @@ module EpubTools
|
|
|
104
132
|
</body>
|
|
105
133
|
</html>
|
|
106
134
|
HTML
|
|
107
|
-
XHTMLCleaner.new({ filename: filename }).run
|
|
108
|
-
log("Extracted: #{filename}")
|
|
109
|
-
filename
|
|
110
135
|
end
|
|
111
136
|
|
|
112
137
|
def display_label(label)
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
2
4
|
require 'nokogiri'
|
|
3
5
|
require 'yaml'
|
|
4
6
|
require_relative 'loggable'
|
|
@@ -9,6 +11,7 @@ module EpubTools
|
|
|
9
11
|
# {SplitChapters}[rdoc-ref:EpubTools::SplitChapters].
|
|
10
12
|
class StyleFinder
|
|
11
13
|
include Loggable
|
|
14
|
+
|
|
12
15
|
# Initializes the class
|
|
13
16
|
# @param options [Hash] Configuration options
|
|
14
17
|
# @option options [String] :file_path XHTML file to be analyzed (required)
|
|
@@ -24,18 +27,26 @@ module EpubTools
|
|
|
24
27
|
# Runs the finder
|
|
25
28
|
# @return [Hash] Data containing the extracted style classes (italics and bolds)
|
|
26
29
|
def run
|
|
30
|
+
style_blocks = extract_style_blocks
|
|
31
|
+
italics, bolds = extract_style_classes(style_blocks)
|
|
32
|
+
generate_output(italics, bolds)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def extract_style_blocks
|
|
27
36
|
doc = Nokogiri::HTML(File.read(@file_path))
|
|
28
|
-
|
|
37
|
+
doc.xpath('//style').map(&:text).join("\n")
|
|
38
|
+
end
|
|
29
39
|
|
|
40
|
+
def extract_style_classes(style_blocks)
|
|
30
41
|
italics = extract_classes(style_blocks, /font-style\s*:\s*italic/)
|
|
31
|
-
bolds
|
|
42
|
+
bolds = extract_classes(style_blocks, /font-weight\s*:\s*700/)
|
|
43
|
+
[italics, bolds]
|
|
44
|
+
end
|
|
32
45
|
|
|
46
|
+
def generate_output(italics, bolds)
|
|
33
47
|
print_summary(italics, bolds) if @verbose
|
|
34
48
|
|
|
35
|
-
data = {
|
|
36
|
-
'italics' => italics,
|
|
37
|
-
'bolds' => bolds
|
|
38
|
-
}
|
|
49
|
+
data = { 'italics' => italics, 'bolds' => bolds }
|
|
39
50
|
File.write(@output_path, data.to_yaml)
|
|
40
51
|
data
|
|
41
52
|
end
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'zip'
|
|
2
4
|
require 'fileutils'
|
|
3
5
|
require_relative 'loggable'
|
|
@@ -6,6 +8,7 @@ module EpubTools
|
|
|
6
8
|
# Unpacks an EPUB (.epub file) into a directory
|
|
7
9
|
class UnpackEbook
|
|
8
10
|
include Loggable
|
|
11
|
+
|
|
9
12
|
# Initializes the class
|
|
10
13
|
# @param options [Hash] Configuration options
|
|
11
14
|
# @option options [String] :epub_file Path to the .epub file to unpack (required)
|
|
@@ -23,22 +26,29 @@ module EpubTools
|
|
|
23
26
|
def run
|
|
24
27
|
validate!
|
|
25
28
|
FileUtils.mkdir_p(@output_dir)
|
|
29
|
+
extract_entries
|
|
30
|
+
log "Unpacked #{File.basename(@epub_file)} to #{@output_dir}"
|
|
31
|
+
@output_dir
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def extract_entries
|
|
26
37
|
Zip::File.open(@epub_file) do |zip|
|
|
27
38
|
zip.each do |entry|
|
|
28
|
-
|
|
29
|
-
if entry.directory?
|
|
30
|
-
FileUtils.mkdir_p(dest_path)
|
|
31
|
-
else
|
|
32
|
-
FileUtils.mkdir_p(File.dirname(dest_path))
|
|
33
|
-
entry.extract(dest_path) { true }
|
|
34
|
-
end
|
|
39
|
+
extract_entry(entry)
|
|
35
40
|
end
|
|
36
41
|
end
|
|
37
|
-
log "Unpacked #{File.basename(@epub_file)} to #{@output_dir}"
|
|
38
|
-
@output_dir
|
|
39
42
|
end
|
|
40
43
|
|
|
41
|
-
|
|
44
|
+
def extract_entry(entry)
|
|
45
|
+
if entry.directory?
|
|
46
|
+
FileUtils.mkdir_p(File.join(@output_dir, entry.name))
|
|
47
|
+
else
|
|
48
|
+
FileUtils.mkdir_p(File.join(@output_dir, File.dirname(entry.name)))
|
|
49
|
+
entry.extract(destination_directory: @output_dir) { true }
|
|
50
|
+
end
|
|
51
|
+
end
|
|
42
52
|
|
|
43
53
|
def default_dir
|
|
44
54
|
[File.dirname(@epub_file), File.basename(@epub_file, '.epub')].join('/')
|
data/lib/epub_tools/version.rb
CHANGED