epub_tools 0.4.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +3 -0
- data/.rubocop.yml +10 -17
- data/CLAUDE.md +128 -0
- data/Gemfile +4 -4
- data/Gemfile.lock +39 -34
- data/README.md +37 -24
- data/Rakefile +2 -0
- data/bin/epub-tools +2 -0
- data/epub_tools.gemspec +3 -1
- data/lib/epub_tools/add_chapters.rb +64 -33
- data/lib/epub_tools/append_book.rb +81 -0
- data/lib/epub_tools/book_builder.rb +108 -0
- data/lib/epub_tools/chapter_marker_detector.rb +46 -0
- data/lib/epub_tools/chapter_validator.rb +50 -0
- data/lib/epub_tools/cli/command_options_configurator.rb +128 -0
- data/lib/epub_tools/cli/command_registry.rb +2 -0
- data/lib/epub_tools/cli/option_builder.rb +5 -3
- data/lib/epub_tools/cli/runner.rb +60 -110
- data/lib/epub_tools/cli.rb +17 -29
- data/lib/epub_tools/compile_book.rb +15 -146
- data/lib/epub_tools/compile_workspace.rb +40 -0
- data/lib/epub_tools/epub_configuration.rb +33 -0
- data/lib/epub_tools/epub_file_writer.rb +57 -0
- data/lib/epub_tools/epub_initializer.rb +83 -162
- data/lib/epub_tools/epub_metadata_builder.rb +92 -0
- data/lib/epub_tools/loggable.rb +2 -0
- data/lib/epub_tools/pack_ebook.rb +28 -14
- data/lib/epub_tools/split_chapters.rb +44 -56
- data/lib/epub_tools/style_finder.rb +17 -6
- data/lib/epub_tools/unpack_ebook.rb +20 -10
- data/lib/epub_tools/version.rb +3 -1
- data/lib/epub_tools/xhtml_cleaner.rb +1 -0
- data/lib/epub_tools/xhtml_extractor.rb +20 -10
- data/lib/epub_tools/xhtml_generator.rb +71 -0
- data/lib/epub_tools.rb +5 -0
- data/test/add_chapters_test.rb +119 -25
- data/test/append_book_test.rb +127 -0
- data/test/chapter_validator_test.rb +74 -0
- data/test/cli/command_registry_test.rb +2 -0
- data/test/cli/option_builder_test.rb +24 -14
- data/test/cli/runner_test.rb +15 -15
- data/test/cli_commands_test.rb +11 -0
- data/test/cli_test.rb +2 -0
- data/test/cli_version_test.rb +2 -0
- data/test/compile_book_test.rb +16 -102
- data/test/compile_workspace_test.rb +55 -0
- data/test/epub_initializer_test.rb +55 -27
- data/test/pack_ebook_test.rb +33 -9
- data/test/split_chapters_test.rb +96 -7
- data/test/style_finder_test.rb +2 -0
- data/test/test_helper.rb +2 -0
- data/test/unpack_ebook_test.rb +45 -20
- data/test/xhtml_cleaner_test.rb +2 -0
- data/test/xhtml_extractor_test.rb +3 -1
- metadata +17 -3
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module EpubTools
|
|
4
|
+
# Builds metadata content for EPUB package.opf files
|
|
5
|
+
class EpubMetadataBuilder
|
|
6
|
+
def initialize(config)
|
|
7
|
+
@config = config
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# Builds complete metadata array
|
|
11
|
+
def build_metadata
|
|
12
|
+
metadata = []
|
|
13
|
+
add_dublin_core_metadata(metadata)
|
|
14
|
+
add_schema_metadata(metadata)
|
|
15
|
+
add_cover_metadata(metadata) if @config.cover_image_fname
|
|
16
|
+
metadata
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Builds manifest and spine items
|
|
20
|
+
def build_manifest_and_spine
|
|
21
|
+
manifest_items = []
|
|
22
|
+
spine_items = []
|
|
23
|
+
|
|
24
|
+
add_base_manifest_items(manifest_items)
|
|
25
|
+
add_cover_items(manifest_items, spine_items) if @config.cover_image_fname
|
|
26
|
+
add_title_items(manifest_items, spine_items)
|
|
27
|
+
|
|
28
|
+
[manifest_items, spine_items]
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Builds complete OPF XML content
|
|
32
|
+
def build_opf_xml(metadata, manifest_items, spine_items)
|
|
33
|
+
<<~XML
|
|
34
|
+
<?xml version="1.0" encoding="utf-8"?>
|
|
35
|
+
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="pub-id" xml:lang="en">
|
|
36
|
+
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
|
|
37
|
+
#{metadata.map { |line| " #{line}" }.join("\n")}
|
|
38
|
+
</metadata>
|
|
39
|
+
<manifest>
|
|
40
|
+
#{manifest_items.map { |line| " #{line}" }.join("\n")}
|
|
41
|
+
</manifest>
|
|
42
|
+
<spine>
|
|
43
|
+
#{spine_items.map { |line| " #{line}" }.join("\n")}
|
|
44
|
+
</spine>
|
|
45
|
+
</package>
|
|
46
|
+
XML
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def add_dublin_core_metadata(metadata)
|
|
52
|
+
metadata << %(<dc:identifier id="pub-id">#{@config.uuid}</dc:identifier>)
|
|
53
|
+
metadata << %(<dc:title>#{@config.title}</dc:title>)
|
|
54
|
+
metadata << %(<dc:creator>#{@config.author}</dc:creator>)
|
|
55
|
+
metadata << '<dc:language>en</dc:language>'
|
|
56
|
+
metadata << %(<meta property="dcterms:modified">#{@config.modified}</meta>)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def add_schema_metadata(metadata)
|
|
60
|
+
metadata << %(<meta property="schema:accessMode">textual</meta>)
|
|
61
|
+
metadata << %(<meta property="schema:accessibilityFeature">unknown</meta>)
|
|
62
|
+
metadata << %(<meta property="schema:accessibilityHazard">none</meta>)
|
|
63
|
+
metadata << %(<meta property="schema:accessModeSufficient">textual</meta>)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def add_cover_metadata(metadata)
|
|
67
|
+
metadata << %(<meta name="cover" content="cover-image"/>)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def add_base_manifest_items(manifest_items)
|
|
71
|
+
manifest_items << mitem('style', 'style.css', 'text/css')
|
|
72
|
+
manifest_items << mitem('nav', 'nav.xhtml', 'application/xhtml+xml', 'nav')
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def add_cover_items(manifest_items, spine_items)
|
|
76
|
+
manifest_items << mitem('cover-image', @config.cover_image_fname, @config.cover_image_media_type, 'cover-image')
|
|
77
|
+
manifest_items << mitem('cover-page', 'cover.xhtml', 'application/xhtml+xml')
|
|
78
|
+
spine_items << '<itemref idref="cover-page"/>'
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def add_title_items(manifest_items, spine_items)
|
|
82
|
+
manifest_items << mitem('title', 'title.xhtml', 'application/xhtml+xml')
|
|
83
|
+
spine_items << '<itemref idref="title"/>'
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def mitem(id, href, type, properties = nil)
|
|
87
|
+
xml = "<item id=\"#{id}\" href=\"#{href}\" media-type=\"#{type}\""
|
|
88
|
+
xml += " properties=\"#{properties}\"" if properties
|
|
89
|
+
"#{xml}/>"
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
data/lib/epub_tools/loggable.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'zip'
|
|
2
4
|
require 'fileutils'
|
|
3
5
|
require 'pathname'
|
|
@@ -7,6 +9,7 @@ module EpubTools
|
|
|
7
9
|
# Packages an EPUB directory into a .epub file
|
|
8
10
|
class PackEbook
|
|
9
11
|
include Loggable
|
|
12
|
+
|
|
10
13
|
# Initializes the class
|
|
11
14
|
# @param options [Hash] Configuration options
|
|
12
15
|
# @option options [String] :input_dir Path to the EPUB directory (containing mimetype, META-INF, OEBPS) (required)
|
|
@@ -28,21 +31,9 @@ module EpubTools
|
|
|
28
31
|
def run
|
|
29
32
|
validate_input!
|
|
30
33
|
Dir.chdir(@input_dir) do
|
|
31
|
-
|
|
32
|
-
target = Pathname.new(@output_file).absolute? ? @output_file : File.join('..', @output_file)
|
|
34
|
+
target = determine_output_path
|
|
33
35
|
FileUtils.rm_f(target)
|
|
34
|
-
|
|
35
|
-
# Add mimetype first and uncompressed
|
|
36
|
-
add_mimetype(zip)
|
|
37
|
-
|
|
38
|
-
# Add all other files with compression, preserving paths
|
|
39
|
-
Dir.glob('**/*', File::FNM_DOTMATCH).sort.each do |entry|
|
|
40
|
-
next if ['.', '..', 'mimetype'].include?(entry)
|
|
41
|
-
next if File.directory?(entry)
|
|
42
|
-
|
|
43
|
-
zip.add(entry, entry)
|
|
44
|
-
end
|
|
45
|
-
end
|
|
36
|
+
create_zip_file(target)
|
|
46
37
|
end
|
|
47
38
|
log "EPUB created: #{@output_file}"
|
|
48
39
|
@output_file
|
|
@@ -50,6 +41,29 @@ module EpubTools
|
|
|
50
41
|
|
|
51
42
|
private
|
|
52
43
|
|
|
44
|
+
def determine_output_path
|
|
45
|
+
# determine the output path: absolute stays as-is, otherwise sibling to input_dir
|
|
46
|
+
Pathname.new(@output_file).absolute? ? @output_file : File.join('..', @output_file)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def create_zip_file(target)
|
|
50
|
+
Zip::File.open(target, create: true) do |zip|
|
|
51
|
+
# Add mimetype first and uncompressed
|
|
52
|
+
add_mimetype(zip)
|
|
53
|
+
add_content_files(zip)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def add_content_files(zip)
|
|
58
|
+
# Add all other files with compression, preserving paths
|
|
59
|
+
Dir.glob('**/*', File::FNM_DOTMATCH).sort.each do |entry|
|
|
60
|
+
next if ['.', '..', 'mimetype'].include?(entry)
|
|
61
|
+
next if File.directory?(entry)
|
|
62
|
+
|
|
63
|
+
zip.add(entry, entry)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
53
67
|
def validate_input!
|
|
54
68
|
raise ArgumentError, "Directory '#{@input_dir}' does not exist." unless Dir.exist?(@input_dir)
|
|
55
69
|
|
|
@@ -1,51 +1,35 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
2
4
|
require 'nokogiri'
|
|
3
5
|
require 'yaml'
|
|
4
6
|
require 'fileutils'
|
|
5
7
|
require_relative 'loggable'
|
|
6
8
|
require_relative 'style_finder'
|
|
7
9
|
require_relative 'xhtml_cleaner'
|
|
10
|
+
require_relative 'chapter_marker_detector'
|
|
8
11
|
|
|
9
12
|
module EpubTools
|
|
10
|
-
#
|
|
11
|
-
# chapters and it:
|
|
12
|
-
# - Extracts classes using {StyleFinder}[rdoc-ref:EpubTools::StyleFinder]
|
|
13
|
-
# - Looks for tags that say something like Chapter XX or Prologue and splits the text there
|
|
14
|
-
# - Creates new chapter_XX.xhtml files that are cleaned using
|
|
15
|
-
# {XHTMLCleaner}[rdoc-ref:EpubTools::XHTMLCleaner]
|
|
16
|
-
# - Saves those files to +output_dir+
|
|
13
|
+
# Splits a multi-chapter XHTML file into individual chapter files.
|
|
17
14
|
class SplitChapters
|
|
18
15
|
include Loggable
|
|
19
|
-
|
|
20
|
-
# @param options [Hash] Configuration options
|
|
21
|
-
# @option options [String] :input_file Path to the source XHTML (required)
|
|
22
|
-
# @option options [String] :book_title Title to use in HTML <title> tags (required)
|
|
23
|
-
# @option options [String] :output_dir Where to write chapter files (default: './chapters')
|
|
24
|
-
# @option options [String] :output_prefix Filename prefix for chapter files (default: 'chapter')
|
|
25
|
-
# @option options [Boolean] :verbose Whether to print progress to STDOUT (default: false)
|
|
16
|
+
|
|
26
17
|
def initialize(options = {})
|
|
27
18
|
@input_file = options.fetch(:input_file)
|
|
28
19
|
@book_title = options.fetch(:book_title)
|
|
29
20
|
@output_dir = options[:output_dir] || './chapters'
|
|
30
21
|
@output_prefix = options[:output_prefix] || 'chapter'
|
|
31
22
|
@verbose = options[:verbose] || false
|
|
23
|
+
@detector = ChapterMarkerDetector.new
|
|
32
24
|
end
|
|
33
25
|
|
|
34
26
|
# Runs the splitter
|
|
35
27
|
# @return [Array<String>] List of generated chapter file paths
|
|
36
28
|
def run
|
|
37
|
-
# Prepare output dir
|
|
38
29
|
FileUtils.mkdir_p(@output_dir)
|
|
39
|
-
|
|
40
|
-
# Read the doc
|
|
41
|
-
raw_content = read_and_strip_problematic_tags
|
|
42
|
-
doc = Nokogiri::HTML(raw_content)
|
|
43
|
-
|
|
44
|
-
# Find Style Classes
|
|
30
|
+
doc = Nokogiri::HTML(read_and_strip_problematic_tags)
|
|
45
31
|
StyleFinder.new({ file_path: @input_file, verbose: @verbose }).run
|
|
46
|
-
|
|
47
|
-
chapters = extract_chapters(doc)
|
|
48
|
-
write_chapter_files(chapters)
|
|
32
|
+
extract_chapters(doc).map { |number, content| write_chapter_file(number, content) }
|
|
49
33
|
end
|
|
50
34
|
|
|
51
35
|
private
|
|
@@ -60,38 +44,47 @@ module EpubTools
|
|
|
60
44
|
current_fragment = nil
|
|
61
45
|
|
|
62
46
|
doc.at('body').children.each do |node|
|
|
63
|
-
|
|
64
|
-
# start a new chapter (skip the marker node so title isn't duplicated)
|
|
65
|
-
chapters[current_number] = current_fragment.to_html if current_number
|
|
66
|
-
current_number = m[1].to_i
|
|
67
|
-
current_fragment = Nokogiri::HTML::DocumentFragment.parse('')
|
|
68
|
-
elsif prologue_marker?(node)
|
|
69
|
-
# start the prologue (skip the marker node)
|
|
70
|
-
chapters[current_number] = current_fragment.to_html if current_number
|
|
71
|
-
current_number = 0
|
|
72
|
-
current_fragment = Nokogiri::HTML::DocumentFragment.parse('')
|
|
73
|
-
else
|
|
74
|
-
current_fragment&.add_child(node.dup)
|
|
75
|
-
end
|
|
47
|
+
current_number, current_fragment = process_node(node, chapters, current_number, current_fragment)
|
|
76
48
|
end
|
|
77
49
|
|
|
78
50
|
chapters[current_number] = current_fragment.to_html if current_number
|
|
79
51
|
chapters
|
|
80
52
|
end
|
|
81
53
|
|
|
82
|
-
def
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
54
|
+
def process_node(node, chapters, current_number, current_fragment)
|
|
55
|
+
marker = @detector.detect(node)
|
|
56
|
+
if marker
|
|
57
|
+
start_chapter(chapters, marker_number(marker, node), current_number, current_fragment)
|
|
58
|
+
else
|
|
59
|
+
current_fragment&.add_child(node.dup)
|
|
60
|
+
[current_number, current_fragment]
|
|
87
61
|
end
|
|
88
|
-
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def marker_number(marker, node)
|
|
65
|
+
case marker
|
|
66
|
+
when :continued then @detector.extract_chapter_number(node) + 0.5
|
|
67
|
+
when :chapter then @detector.extract_chapter_number(node)
|
|
68
|
+
when :prologue then 0
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def start_chapter(chapters, number, current_number, current_fragment)
|
|
73
|
+
chapters[current_number] = current_fragment.to_html if current_number
|
|
74
|
+
[number, Nokogiri::HTML::DocumentFragment.parse('')]
|
|
89
75
|
end
|
|
90
76
|
|
|
91
77
|
def write_chapter_file(label, content)
|
|
92
|
-
|
|
93
|
-
filename = File.join(@output_dir, "#{@output_prefix}_#{label}.xhtml")
|
|
94
|
-
File.write(filename,
|
|
78
|
+
display = display_label(label)
|
|
79
|
+
filename = File.join(@output_dir, "#{@output_prefix}_#{file_label(label)}.xhtml")
|
|
80
|
+
File.write(filename, build_xhtml_template(display, content))
|
|
81
|
+
XHTMLCleaner.new({ filename: filename }).run
|
|
82
|
+
log("Extracted: #{filename}")
|
|
83
|
+
filename
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def build_xhtml_template(display_label, content)
|
|
87
|
+
<<~HTML
|
|
95
88
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
96
89
|
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
|
|
97
90
|
<head>
|
|
@@ -104,21 +97,16 @@ module EpubTools
|
|
|
104
97
|
</body>
|
|
105
98
|
</html>
|
|
106
99
|
HTML
|
|
107
|
-
XHTMLCleaner.new({ filename: filename }).run
|
|
108
|
-
log("Extracted: #{filename}")
|
|
109
|
-
filename
|
|
110
100
|
end
|
|
111
101
|
|
|
112
|
-
def
|
|
113
|
-
label.
|
|
102
|
+
def file_label(label)
|
|
103
|
+
label.is_a?(Float) ? label.to_s.gsub('.', '_') : label.to_s
|
|
114
104
|
end
|
|
115
105
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
return false unless %w[h3 h4].include?(node.name)
|
|
119
|
-
return false unless node.text.strip =~ /\APrologue\z/i
|
|
106
|
+
def display_label(label)
|
|
107
|
+
return 'Prologue' if label.zero?
|
|
120
108
|
|
|
121
|
-
|
|
109
|
+
"Chapter #{label}"
|
|
122
110
|
end
|
|
123
111
|
end
|
|
124
112
|
end
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
2
4
|
require 'nokogiri'
|
|
3
5
|
require 'yaml'
|
|
4
6
|
require_relative 'loggable'
|
|
@@ -9,6 +11,7 @@ module EpubTools
|
|
|
9
11
|
# {SplitChapters}[rdoc-ref:EpubTools::SplitChapters].
|
|
10
12
|
class StyleFinder
|
|
11
13
|
include Loggable
|
|
14
|
+
|
|
12
15
|
# Initializes the class
|
|
13
16
|
# @param options [Hash] Configuration options
|
|
14
17
|
# @option options [String] :file_path XHTML file to be analyzed (required)
|
|
@@ -24,18 +27,26 @@ module EpubTools
|
|
|
24
27
|
# Runs the finder
|
|
25
28
|
# @return [Hash] Data containing the extracted style classes (italics and bolds)
|
|
26
29
|
def run
|
|
30
|
+
style_blocks = extract_style_blocks
|
|
31
|
+
italics, bolds = extract_style_classes(style_blocks)
|
|
32
|
+
generate_output(italics, bolds)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def extract_style_blocks
|
|
27
36
|
doc = Nokogiri::HTML(File.read(@file_path))
|
|
28
|
-
|
|
37
|
+
doc.xpath('//style').map(&:text).join("\n")
|
|
38
|
+
end
|
|
29
39
|
|
|
40
|
+
def extract_style_classes(style_blocks)
|
|
30
41
|
italics = extract_classes(style_blocks, /font-style\s*:\s*italic/)
|
|
31
|
-
bolds
|
|
42
|
+
bolds = extract_classes(style_blocks, /font-weight\s*:\s*700/)
|
|
43
|
+
[italics, bolds]
|
|
44
|
+
end
|
|
32
45
|
|
|
46
|
+
def generate_output(italics, bolds)
|
|
33
47
|
print_summary(italics, bolds) if @verbose
|
|
34
48
|
|
|
35
|
-
data = {
|
|
36
|
-
'italics' => italics,
|
|
37
|
-
'bolds' => bolds
|
|
38
|
-
}
|
|
49
|
+
data = { 'italics' => italics, 'bolds' => bolds }
|
|
39
50
|
File.write(@output_path, data.to_yaml)
|
|
40
51
|
data
|
|
41
52
|
end
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'zip'
|
|
2
4
|
require 'fileutils'
|
|
3
5
|
require_relative 'loggable'
|
|
@@ -6,6 +8,7 @@ module EpubTools
|
|
|
6
8
|
# Unpacks an EPUB (.epub file) into a directory
|
|
7
9
|
class UnpackEbook
|
|
8
10
|
include Loggable
|
|
11
|
+
|
|
9
12
|
# Initializes the class
|
|
10
13
|
# @param options [Hash] Configuration options
|
|
11
14
|
# @option options [String] :epub_file Path to the .epub file to unpack (required)
|
|
@@ -23,22 +26,29 @@ module EpubTools
|
|
|
23
26
|
def run
|
|
24
27
|
validate!
|
|
25
28
|
FileUtils.mkdir_p(@output_dir)
|
|
29
|
+
extract_entries
|
|
30
|
+
log "Unpacked #{File.basename(@epub_file)} to #{@output_dir}"
|
|
31
|
+
@output_dir
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def extract_entries
|
|
26
37
|
Zip::File.open(@epub_file) do |zip|
|
|
27
38
|
zip.each do |entry|
|
|
28
|
-
|
|
29
|
-
if entry.directory?
|
|
30
|
-
FileUtils.mkdir_p(dest_path)
|
|
31
|
-
else
|
|
32
|
-
FileUtils.mkdir_p(File.dirname(dest_path))
|
|
33
|
-
entry.extract(dest_path) { true }
|
|
34
|
-
end
|
|
39
|
+
extract_entry(entry)
|
|
35
40
|
end
|
|
36
41
|
end
|
|
37
|
-
log "Unpacked #{File.basename(@epub_file)} to #{@output_dir}"
|
|
38
|
-
@output_dir
|
|
39
42
|
end
|
|
40
43
|
|
|
41
|
-
|
|
44
|
+
def extract_entry(entry)
|
|
45
|
+
if entry.directory?
|
|
46
|
+
FileUtils.mkdir_p(File.join(@output_dir, entry.name))
|
|
47
|
+
else
|
|
48
|
+
FileUtils.mkdir_p(File.join(@output_dir, File.dirname(entry.name)))
|
|
49
|
+
entry.extract(destination_directory: @output_dir) { true }
|
|
50
|
+
end
|
|
51
|
+
end
|
|
42
52
|
|
|
43
53
|
def default_dir
|
|
44
54
|
[File.dirname(@epub_file), File.basename(@epub_file, '.epub')].join('/')
|
data/lib/epub_tools/version.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'zip'
|
|
2
4
|
require 'fileutils'
|
|
3
5
|
require_relative 'loggable'
|
|
@@ -6,6 +8,7 @@ module EpubTools
|
|
|
6
8
|
# Extracts text .xhtml files from EPUB archives, excluding nav.xhtml
|
|
7
9
|
class XHTMLExtractor
|
|
8
10
|
include Loggable
|
|
11
|
+
|
|
9
12
|
# Initializes the class
|
|
10
13
|
# @param options [Hash] Configuration options
|
|
11
14
|
# @option options [String] :source_dir Directory containing source .epub files (required)
|
|
@@ -39,21 +42,28 @@ module EpubTools
|
|
|
39
42
|
epub_name = File.basename(epub_path, '.epub')
|
|
40
43
|
log "Extracting from #{epub_name}.epub"
|
|
41
44
|
extracted_files = []
|
|
45
|
+
|
|
42
46
|
Zip::File.open(epub_path) do |zip_file|
|
|
43
|
-
zip_file.each
|
|
44
|
-
next unless entry.name.downcase.end_with?('.xhtml')
|
|
45
|
-
next if File.basename(entry.name).downcase == 'nav.xhtml'
|
|
46
|
-
|
|
47
|
-
output_path = File.join(@target_dir, "#{epub_name}_#{File.basename(entry.name)}")
|
|
48
|
-
FileUtils.mkdir_p(File.dirname(output_path))
|
|
49
|
-
entry.extract(output_path) { true }
|
|
50
|
-
log output_path
|
|
51
|
-
extracted_files << output_path
|
|
52
|
-
end
|
|
47
|
+
zip_file.each { |entry| extract_entry_if_xhtml(entry, epub_name, extracted_files) }
|
|
53
48
|
end
|
|
54
49
|
extracted_files
|
|
55
50
|
rescue Zip::Error => e
|
|
56
51
|
warn "⚠️ Failed to process #{epub_path}: #{e.message}"
|
|
57
52
|
end
|
|
53
|
+
|
|
54
|
+
def extract_entry_if_xhtml(entry, epub_name, extracted_files)
|
|
55
|
+
return unless xhtml_entry?(entry)
|
|
56
|
+
|
|
57
|
+
renamed = "#{epub_name}_#{File.basename(entry.name)}"
|
|
58
|
+
output_path = File.join(@target_dir, renamed)
|
|
59
|
+
FileUtils.mkdir_p(File.dirname(output_path))
|
|
60
|
+
entry.extract(renamed, destination_directory: @target_dir) { true }
|
|
61
|
+
log output_path
|
|
62
|
+
extracted_files << output_path
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def xhtml_entry?(entry)
|
|
66
|
+
entry.name.downcase.end_with?('.xhtml') && File.basename(entry.name).downcase != 'nav.xhtml'
|
|
67
|
+
end
|
|
58
68
|
end
|
|
59
69
|
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module EpubTools
|
|
4
|
+
# Generates XHTML content for EPUB files
|
|
5
|
+
class XhtmlGenerator
|
|
6
|
+
attr_accessor :cover_image_fname
|
|
7
|
+
|
|
8
|
+
def initialize(title:, author:)
|
|
9
|
+
@title = title
|
|
10
|
+
@author = author
|
|
11
|
+
@cover_image_fname = nil
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Generates title page XHTML content
|
|
15
|
+
def build_title_page
|
|
16
|
+
<<~XHTML
|
|
17
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
18
|
+
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
|
|
19
|
+
<head>
|
|
20
|
+
<meta charset="UTF-8" />
|
|
21
|
+
<title>#{@title}</title>
|
|
22
|
+
<link rel="stylesheet" type="text/css" href="style.css"/>
|
|
23
|
+
</head>
|
|
24
|
+
<body>
|
|
25
|
+
<h1 class="title">#{@title}</h1>
|
|
26
|
+
<p class="author">by #{@author}</p>
|
|
27
|
+
</body>
|
|
28
|
+
</html>
|
|
29
|
+
XHTML
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Generates cover page XHTML content
|
|
33
|
+
def build_cover_page
|
|
34
|
+
<<~XHTML
|
|
35
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
36
|
+
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
|
|
37
|
+
<head>
|
|
38
|
+
<meta charset="UTF-8" />
|
|
39
|
+
<title>Cover</title>
|
|
40
|
+
<link rel="stylesheet" type="text/css" href="style.css"/>
|
|
41
|
+
</head>
|
|
42
|
+
<body>
|
|
43
|
+
<div class="cover-image">
|
|
44
|
+
<img src="#{@cover_image_fname}" alt="Cover"/>
|
|
45
|
+
</div>
|
|
46
|
+
</body>
|
|
47
|
+
</html>
|
|
48
|
+
XHTML
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Generates navigation XHTML content
|
|
52
|
+
def build_nav_page
|
|
53
|
+
<<~XHTML
|
|
54
|
+
<?xml version="1.0" encoding="utf-8"?>
|
|
55
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="en">
|
|
56
|
+
<head>
|
|
57
|
+
<title>Table of Contents</title>
|
|
58
|
+
</head>
|
|
59
|
+
<body>
|
|
60
|
+
<nav epub:type="toc" id="toc">
|
|
61
|
+
<h1>Table of Contents</h1>
|
|
62
|
+
<ol>
|
|
63
|
+
<li><a href="title.xhtml">Title Page</a></li>
|
|
64
|
+
</ol>
|
|
65
|
+
</nav>
|
|
66
|
+
</body>
|
|
67
|
+
</html>
|
|
68
|
+
XHTML
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
data/lib/epub_tools.rb
CHANGED
|
@@ -1,13 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require_relative 'epub_tools/version'
|
|
2
4
|
require_relative 'epub_tools/loggable'
|
|
3
5
|
require_relative 'epub_tools/add_chapters'
|
|
4
6
|
require_relative 'epub_tools/epub_initializer'
|
|
5
7
|
require_relative 'epub_tools/split_chapters'
|
|
8
|
+
require_relative 'epub_tools/chapter_marker_detector'
|
|
6
9
|
require_relative 'epub_tools/xhtml_cleaner'
|
|
7
10
|
require_relative 'epub_tools/xhtml_extractor'
|
|
8
11
|
require_relative 'epub_tools/pack_ebook'
|
|
9
12
|
require_relative 'epub_tools/unpack_ebook'
|
|
13
|
+
require_relative 'epub_tools/book_builder'
|
|
10
14
|
require_relative 'epub_tools/compile_book'
|
|
15
|
+
require_relative 'epub_tools/append_book'
|
|
11
16
|
require_relative 'epub_tools/cli'
|
|
12
17
|
|
|
13
18
|
# Wrapper for all the other classes
|