epub_tools 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CLAUDE.md +6 -2
- data/Gemfile.lock +1 -1
- data/README.md +37 -24
- data/lib/epub_tools/add_chapters.rb +19 -6
- data/lib/epub_tools/append_book.rb +81 -0
- data/lib/epub_tools/book_builder.rb +108 -0
- data/lib/epub_tools/chapter_marker_detector.rb +46 -0
- data/lib/epub_tools/chapter_validator.rb +16 -6
- data/lib/epub_tools/cli/command_options_configurator.rb +13 -0
- data/lib/epub_tools/cli/runner.rb +1 -0
- data/lib/epub_tools/cli.rb +1 -0
- data/lib/epub_tools/compile_book.rb +11 -125
- data/lib/epub_tools/split_chapters.rb +26 -63
- data/lib/epub_tools/version.rb +1 -1
- data/lib/epub_tools.rb +3 -0
- data/test/add_chapters_test.rb +70 -0
- data/test/append_book_test.rb +127 -0
- data/test/chapter_validator_test.rb +27 -0
- data/test/cli_commands_test.rb +9 -0
- data/test/compile_book_test.rb +1 -2
- data/test/split_chapters_test.rb +69 -0
- metadata +5 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a379e3e740873230fa8c4f71261006ac79654e74f1fa5ba299aee8742f9aa3c3
|
|
4
|
+
data.tar.gz: c9d83418c1f585a43e3650e3b5189fd74a4f3374a4835b818bc15379f19ede05
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 27bf72687e57c4998831d5e94098ddbeb76f953d99b09d2798e2087b4ce147353df2673e5d1996124047ae3c09741604b80483a79aea471180cdd9769f76a28f
|
|
7
|
+
data.tar.gz: b06cce574ef93ab03769d7734bffd0947453bc4353f50c17edda34943a075e151a44e04916bc4c08890ff51b9f8a7f6d4fde7ba05d591507ba5be0bb6d49a65e
|
data/CLAUDE.md
CHANGED
|
@@ -71,10 +71,14 @@ gem install ./epub_tools-*.gem
|
|
|
71
71
|
- `AddChapters`: Adds chapter files to existing EPUB
|
|
72
72
|
- `PackEbook`: Packages EPUB directories into .epub files
|
|
73
73
|
- `UnpackEbook`: Unpacks .epub files into directories
|
|
74
|
-
|
|
74
|
+
- **Workflow Classes**: Orchestrators built on a shared base class
|
|
75
|
+
- `BookBuilder`: Base class with template method pattern (extract → split → validate → add → pack)
|
|
76
|
+
- `CompileBook`: Creates a new EPUB from source EPUBs (inherits BookBuilder)
|
|
77
|
+
- `AppendBook`: Appends chapters from source EPUBs to an existing EPUB (inherits BookBuilder)
|
|
75
78
|
- **Supporting Classes**: SOLID-designed helper classes
|
|
76
|
-
- `CompileWorkspace`: Manages build directories for
|
|
79
|
+
- `CompileWorkspace`: Manages build directories for book-building workflows
|
|
77
80
|
- `ChapterValidator`: Validates chapter sequence completeness
|
|
81
|
+
- `ChapterMarkerDetector`: Detects chapter boundary markers (Chapter N, Chapter N (continued), Prologue)
|
|
78
82
|
- `EpubConfiguration`: Configuration object for EPUB initialization
|
|
79
83
|
- `XhtmlGenerator`: Generates XHTML templates for EPUB content
|
|
80
84
|
- `EpubMetadataBuilder`: Builds OPF metadata content
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://github.com/jaimerodas/epub_tools/actions) [](LICENSE) [](https://badge.fury.io/rb/epub_tools)
|
|
4
4
|
|
|
5
|
-
**TL;DR:** A Ruby gem and CLI for working with EPUB files: extract, split, initialize, add chapters, pack, and
|
|
5
|
+
**TL;DR:** A Ruby gem and CLI for working with EPUB files: extract, split, initialize, add chapters, pack, unpack, compile, and append to EPUB books.
|
|
6
6
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
@@ -36,6 +36,7 @@ Commands:
|
|
|
36
36
|
- `pack` Package an EPUB directory into a `.epub` file
|
|
37
37
|
- `unpack` Unpack a `.epub` file into a directory
|
|
38
38
|
- `compile` Takes EPUBs in a dir and splits, cleans, and compiles into a single EPUB
|
|
39
|
+
- `append` Extracts and splits EPUBs from a dir and appends them to an existing EPUB
|
|
39
40
|
|
|
40
41
|
Run `epub-tools COMMAND --help` for details on options.
|
|
41
42
|
|
|
@@ -61,46 +62,58 @@ epub-tools unpack -i MyBook.epub -o unpacked_dir
|
|
|
61
62
|
|
|
62
63
|
# Full compile workflow: extract, split, initialize, add, and pack into one EPUB
|
|
63
64
|
epub-tools compile -t "My Book" -a "Author Name" -s source_epubs -c cover.jpg -o MyBook.epub
|
|
64
|
-
```
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
# Append chapters from new EPUBs to an existing book
|
|
67
|
+
epub-tools append -s new_epubs -t MyBook.epub
|
|
68
|
+
```
|
|
67
69
|
|
|
68
70
|
## Library Usage
|
|
69
71
|
Use the library directly in Ruby:
|
|
70
72
|
```ruby
|
|
71
73
|
require 'epub_tools'
|
|
72
74
|
|
|
73
|
-
#
|
|
75
|
+
# Full compile workflow: extract, split, and compile into a new EPUB
|
|
76
|
+
EpubTools::CompileBook.new(
|
|
77
|
+
title: 'My Book', author: 'Author Name',
|
|
78
|
+
source_dir: 'source_epubs', cover_image: 'cover.jpg',
|
|
79
|
+
output_file: 'MyBook.epub'
|
|
80
|
+
).run
|
|
81
|
+
|
|
82
|
+
# Append chapters from new EPUBs to an existing book
|
|
83
|
+
EpubTools::AppendBook.new(
|
|
84
|
+
source_dir: 'new_epubs',
|
|
85
|
+
target_epub: 'MyBook.epub'
|
|
86
|
+
).run
|
|
87
|
+
|
|
88
|
+
# Individual steps can also be used standalone:
|
|
89
|
+
|
|
90
|
+
# Extract XHTML files from EPUBs
|
|
74
91
|
EpubTools::XHTMLExtractor.new(
|
|
75
|
-
source_dir: 'source_epubs',
|
|
76
|
-
|
|
77
|
-
verbose: true
|
|
78
|
-
).extract_all
|
|
92
|
+
source_dir: 'source_epubs', target_dir: 'xhtml_output'
|
|
93
|
+
).run
|
|
79
94
|
|
|
80
|
-
# Split
|
|
95
|
+
# Split a multi-chapter XHTML into individual chapter files
|
|
81
96
|
EpubTools::SplitChapters.new(
|
|
82
|
-
'xhtml_output/chapter1.xhtml',
|
|
83
|
-
'
|
|
84
|
-
'chapters',
|
|
85
|
-
'chapter'
|
|
97
|
+
input_file: 'xhtml_output/chapter1.xhtml', book_title: 'My Book',
|
|
98
|
+
output_dir: 'chapters', output_prefix: 'chapter'
|
|
86
99
|
).run
|
|
87
100
|
|
|
88
|
-
# Initialize EPUB
|
|
101
|
+
# Initialize a new EPUB directory structure
|
|
89
102
|
EpubTools::EpubInitializer.new(
|
|
90
|
-
'My Book',
|
|
91
|
-
'
|
|
92
|
-
'epub_dir',
|
|
93
|
-
'cover.jpg'
|
|
103
|
+
title: 'My Book', author: 'Author Name',
|
|
104
|
+
destination: 'epub_dir', cover_image: 'cover.jpg'
|
|
94
105
|
).run
|
|
95
106
|
|
|
96
|
-
# Add
|
|
97
|
-
EpubTools::AddChapters.new(
|
|
107
|
+
# Add chapter files into an EPUB
|
|
108
|
+
EpubTools::AddChapters.new(
|
|
109
|
+
chapters_dir: 'chapters', oebps_dir: 'epub_dir/OEBPS'
|
|
110
|
+
).run
|
|
98
111
|
|
|
99
|
-
#
|
|
100
|
-
EpubTools::PackEbook.new('epub_dir', 'MyBook.epub').run
|
|
112
|
+
# Package an EPUB directory into a .epub file
|
|
113
|
+
EpubTools::PackEbook.new(input_dir: 'epub_dir', output_file: 'MyBook.epub').run
|
|
101
114
|
|
|
102
|
-
# Unpack
|
|
103
|
-
EpubTools::UnpackEbook.new('MyBook.epub', 'unpacked_dir').run
|
|
115
|
+
# Unpack a .epub file into a directory
|
|
116
|
+
EpubTools::UnpackEbook.new(epub_file: 'MyBook.epub', output_dir: 'unpacked_dir').run
|
|
104
117
|
```
|
|
105
118
|
## Development & Testing
|
|
106
119
|
Clone the repo and install dependencies:
|
|
@@ -59,10 +59,8 @@ module EpubTools
|
|
|
59
59
|
end
|
|
60
60
|
|
|
61
61
|
def move_chapters
|
|
62
|
-
# Sort by chapter number (numeric)
|
|
63
62
|
chapter_files = Dir.glob(File.join(@chapters_dir, '*.xhtml')).sort_by do |path|
|
|
64
|
-
|
|
65
|
-
File.basename(path)[/\d+/].to_i
|
|
63
|
+
chapter_sort_key(File.basename(path))
|
|
66
64
|
end
|
|
67
65
|
|
|
68
66
|
raise ArgumentError, "No .xhtml files found in '#{@chapters_dir}'" if chapter_files.empty?
|
|
@@ -73,8 +71,17 @@ module EpubTools
|
|
|
73
71
|
chapter_files.map { |f| File.basename(f) }
|
|
74
72
|
end
|
|
75
73
|
|
|
74
|
+
def chapter_sort_key(filename)
|
|
75
|
+
basename = File.basename(filename, '.xhtml')
|
|
76
|
+
if (m = basename.match(/_(\d+)_5\z/))
|
|
77
|
+
m[1].to_f + 0.5
|
|
78
|
+
else
|
|
79
|
+
basename[/\d+/].to_f
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
76
83
|
def chapter_id(filename)
|
|
77
|
-
match = filename.match(/chapter_(\d+)\.xhtml/)
|
|
84
|
+
match = filename.match(/chapter_(\d+(?:_5)?)\.xhtml/)
|
|
78
85
|
match ? "chap#{match[1]}" : File.basename(filename, '.xhtml')
|
|
79
86
|
end
|
|
80
87
|
|
|
@@ -107,8 +114,14 @@ module EpubTools
|
|
|
107
114
|
end
|
|
108
115
|
|
|
109
116
|
def format_chapter_label(filename)
|
|
110
|
-
|
|
111
|
-
|
|
117
|
+
basename = File.basename(filename, '.xhtml')
|
|
118
|
+
return 'Prologue' if basename == 'chapter_0'
|
|
119
|
+
|
|
120
|
+
if (m = basename.match(/chapter_(\d+)_5/))
|
|
121
|
+
"Chapter #{m[1]}.5"
|
|
122
|
+
else
|
|
123
|
+
basename.gsub('_', ' ').capitalize
|
|
124
|
+
end
|
|
112
125
|
end
|
|
113
126
|
|
|
114
127
|
def update_opf_for_file(doc, manifest, spine, filename)
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'nokogiri'
|
|
4
|
+
require_relative 'book_builder'
|
|
5
|
+
require_relative 'unpack_ebook'
|
|
6
|
+
|
|
7
|
+
module EpubTools
|
|
8
|
+
# Appends chapters from source EPUBs to an existing target EPUB
|
|
9
|
+
class AppendBook < BookBuilder
|
|
10
|
+
attr_reader :target_epub
|
|
11
|
+
|
|
12
|
+
def initialize(options = {})
|
|
13
|
+
super
|
|
14
|
+
@target_epub = File.expand_path(options.fetch(:target_epub))
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
private
|
|
18
|
+
|
|
19
|
+
def book_title
|
|
20
|
+
@book_title ||= read_target_title
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def output_path = @target_epub
|
|
24
|
+
|
|
25
|
+
def prepare_epub
|
|
26
|
+
backup_target
|
|
27
|
+
unpack_target
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def before_add_chapters
|
|
31
|
+
detect_conflicts
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def finalize_and_cleanup
|
|
35
|
+
log "Done. Updated EPUB: #{@target_epub} (backup: #{@backup_path})"
|
|
36
|
+
@workspace.clean
|
|
37
|
+
@target_epub
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def backup_target
|
|
41
|
+
@backup_path = "#{@target_epub}.bak"
|
|
42
|
+
log "Backing up target to '#{@backup_path}'..."
|
|
43
|
+
FileUtils.cp(@target_epub, @backup_path)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def unpack_target
|
|
47
|
+
log 'Unpacking target EPUB...'
|
|
48
|
+
UnpackEbook.new(epub_file: @target_epub, output_dir: @workspace.epub_dir, verbose: verbose).run
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def read_target_title
|
|
52
|
+
opf_path = File.join(epub_oebps_dir, 'package.opf')
|
|
53
|
+
doc = Nokogiri::XML(File.read(opf_path))
|
|
54
|
+
doc.remove_namespaces!
|
|
55
|
+
doc.at_xpath('//title')&.text || 'Untitled'
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def detect_conflicts
|
|
59
|
+
new_numbers = chapter_numbers_in(@workspace.chapters_dir)
|
|
60
|
+
existing_numbers = chapter_numbers_in(epub_oebps_dir)
|
|
61
|
+
conflicts = new_numbers & existing_numbers
|
|
62
|
+
return if conflicts.empty?
|
|
63
|
+
|
|
64
|
+
formatted = conflicts.sort.map { |n| n == n.to_i ? n.to_i.to_s : n.to_s }
|
|
65
|
+
raise ArgumentError,
|
|
66
|
+
"Chapter number conflict: chapters #{formatted.join(', ')} already exist in the target EPUB. " \
|
|
67
|
+
'Renumber the source chapters or remove conflicting chapters from the target.'
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def chapter_numbers_in(dir)
|
|
71
|
+
Dir.glob(File.join(dir, 'chapter_*.xhtml')).filter_map do |path|
|
|
72
|
+
basename = File.basename(path, '.xhtml')
|
|
73
|
+
if (m = basename.match(/_(\d+)_5\z/))
|
|
74
|
+
m[1].to_f + 0.5
|
|
75
|
+
elsif (m = basename.match(/_(\d+)\z/))
|
|
76
|
+
m[1].to_f
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'fileutils'
|
|
4
|
+
require_relative 'loggable'
|
|
5
|
+
require_relative 'xhtml_extractor'
|
|
6
|
+
require_relative 'split_chapters'
|
|
7
|
+
require_relative 'add_chapters'
|
|
8
|
+
require_relative 'pack_ebook'
|
|
9
|
+
require_relative 'compile_workspace'
|
|
10
|
+
require_relative 'chapter_validator'
|
|
11
|
+
|
|
12
|
+
module EpubTools
|
|
13
|
+
# Base class for book-building workflows (compile and append).
|
|
14
|
+
# Uses template method pattern — subclasses override hooks to customize behavior.
|
|
15
|
+
class BookBuilder
|
|
16
|
+
include Loggable
|
|
17
|
+
|
|
18
|
+
attr_reader :source_dir, :build_dir, :verbose
|
|
19
|
+
|
|
20
|
+
def initialize(options = {})
|
|
21
|
+
@source_dir = options.fetch(:source_dir)
|
|
22
|
+
@build_dir = options[:build_dir] || File.join(Dir.pwd, '.epub_tools_build')
|
|
23
|
+
@verbose = options[:verbose] || false
|
|
24
|
+
@workspace = CompileWorkspace.new(@build_dir)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Run the full build workflow
|
|
28
|
+
# @return [String] Path to the output EPUB file
|
|
29
|
+
def run
|
|
30
|
+
setup_workspace
|
|
31
|
+
prepare_epub
|
|
32
|
+
extract_xhtmls
|
|
33
|
+
split_xhtmls
|
|
34
|
+
validate_chapters
|
|
35
|
+
before_add_chapters
|
|
36
|
+
add_chapters
|
|
37
|
+
pack_epub
|
|
38
|
+
finalize_and_cleanup
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
# Hook: called before extract/split to set up the EPUB target
|
|
44
|
+
def prepare_epub; end
|
|
45
|
+
|
|
46
|
+
# Hook: called after validation, before adding chapters
|
|
47
|
+
def before_add_chapters; end
|
|
48
|
+
|
|
49
|
+
# Subclasses must implement: the book title used when splitting chapters
|
|
50
|
+
def book_title
|
|
51
|
+
raise NotImplementedError, "#{self.class} must implement #book_title"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Subclasses must implement: the output file path for pack_epub
|
|
55
|
+
def output_path
|
|
56
|
+
raise NotImplementedError, "#{self.class} must implement #output_path"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def setup_workspace
|
|
60
|
+
@workspace.clean
|
|
61
|
+
@workspace.prepare_directories
|
|
62
|
+
log 'Preparing build directories...'
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def extract_xhtmls
|
|
66
|
+
log "Extracting XHTML files from EPUBs in '#{source_dir}'..."
|
|
67
|
+
XHTMLExtractor.new(source_dir: source_dir, target_dir: @workspace.xhtml_dir, verbose: verbose).run
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def split_xhtmls
|
|
71
|
+
Dir.glob(File.join(@workspace.xhtml_dir, '*.xhtml')).each { |f| split_xhtml_file(f) }
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def split_xhtml_file(xhtml_file)
|
|
75
|
+
log "Splitting '#{File.basename(xhtml_file, '.xhtml')}'..."
|
|
76
|
+
SplitChapters.new(
|
|
77
|
+
input_file: xhtml_file, book_title: book_title,
|
|
78
|
+
output_dir: @workspace.chapters_dir, output_prefix: 'chapter', verbose: verbose
|
|
79
|
+
).run
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def validate_chapters
|
|
83
|
+
ChapterValidator.new(chapters_dir: @workspace.chapters_dir, verbose: verbose).validate
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def add_chapters
|
|
87
|
+
log 'Adding chapters to EPUB...'
|
|
88
|
+
AddChapters.new(
|
|
89
|
+
chapters_dir: @workspace.chapters_dir,
|
|
90
|
+
oebps_dir: epub_oebps_dir,
|
|
91
|
+
verbose: verbose
|
|
92
|
+
).run
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def pack_epub
|
|
96
|
+
log "Building EPUB '#{output_path}'..."
|
|
97
|
+
PackEbook.new(input_dir: @workspace.epub_dir, output_file: output_path, verbose: verbose).run
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def finalize_and_cleanup
|
|
101
|
+
log "Done. Output EPUB: #{File.expand_path(output_path)}"
|
|
102
|
+
@workspace.clean
|
|
103
|
+
output_path
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def epub_oebps_dir = File.join(@workspace.epub_dir, 'OEBPS')
|
|
107
|
+
end
|
|
108
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module EpubTools
|
|
4
|
+
# Detects chapter boundary markers in XHTML nodes.
|
|
5
|
+
# Recognizes: "Chapter N", "Chapter N (continued)", and "Prologue".
|
|
6
|
+
class ChapterMarkerDetector
|
|
7
|
+
# Tags that can contain chapter markers
|
|
8
|
+
MARKER_TAGS = %w[p span h2 h3 h4].freeze
|
|
9
|
+
# Tags that can contain prologue markers
|
|
10
|
+
PROLOGUE_TAGS = %w[h3 h4].freeze
|
|
11
|
+
|
|
12
|
+
# Detect what type of chapter marker a node represents
|
|
13
|
+
# @param node [Nokogiri::XML::Node] The XHTML node to check
|
|
14
|
+
# @return [Symbol, nil] :chapter, :continued, :prologue, or nil
|
|
15
|
+
def detect(node)
|
|
16
|
+
if continued_marker?(node)
|
|
17
|
+
:continued
|
|
18
|
+
elsif chapter_marker?(node)
|
|
19
|
+
:chapter
|
|
20
|
+
elsif prologue_marker?(node)
|
|
21
|
+
:prologue
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Extract the chapter number from a node's text
|
|
26
|
+
# @param node [Nokogiri::XML::Node] A node containing "Chapter N" text
|
|
27
|
+
# @return [Integer] The chapter number
|
|
28
|
+
def extract_chapter_number(node)
|
|
29
|
+
node.text.match(/Chapter\s+(\d+)/i)[1].to_i
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
def continued_marker?(node)
|
|
35
|
+
MARKER_TAGS.include?(node.name) && node.text.match?(/Chapter\s+\d+\s*\(continued\)/i)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def chapter_marker?(node)
|
|
39
|
+
MARKER_TAGS.include?(node.name) && node.text.match?(/Chapter\s+\d+/i) && !continued_marker?(node)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def prologue_marker?(node)
|
|
43
|
+
PROLOGUE_TAGS.include?(node.name) && node.text.strip.match?(/\APrologue\z/i)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -12,6 +12,9 @@ module EpubTools
|
|
|
12
12
|
@verbose = verbose
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
+
# Validates that integer chapter numbers form a complete sequence with no gaps.
|
|
16
|
+
# Half-chapters (e.g. chapter_5_5.xhtml) are recognized but not required.
|
|
17
|
+
# @raise [RuntimeError] if no chapter files are found or if integer chapters have gaps
|
|
15
18
|
def validate
|
|
16
19
|
log 'Validating chapter sequence...'
|
|
17
20
|
nums = extract_chapter_numbers
|
|
@@ -22,18 +25,25 @@ module EpubTools
|
|
|
22
25
|
private
|
|
23
26
|
|
|
24
27
|
def extract_chapter_numbers
|
|
25
|
-
nums = Dir.glob(File.join(@chapters_dir, '*.xhtml')).
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
end
|
|
29
|
-
end.compact
|
|
28
|
+
nums = Dir.glob(File.join(@chapters_dir, '*.xhtml')).filter_map do |file|
|
|
29
|
+
extract_chapter_number(File.basename(file, '.xhtml'))
|
|
30
|
+
end
|
|
30
31
|
raise "No chapter files found in #{@chapters_dir}" if nums.empty?
|
|
31
32
|
|
|
32
33
|
nums.sort.uniq
|
|
33
34
|
end
|
|
34
35
|
|
|
36
|
+
def extract_chapter_number(basename)
|
|
37
|
+
if (m = basename.match(/_(\d+)_5\z/))
|
|
38
|
+
m[1].to_i + 0.5
|
|
39
|
+
elsif (m = basename.match(/_(\d+)\z/))
|
|
40
|
+
m[1].to_i
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
35
44
|
def check_sequence_completeness(sorted)
|
|
36
|
-
|
|
45
|
+
integers = sorted.select { |n| n == n.to_i }.map(&:to_i)
|
|
46
|
+
missing = (integers.first..integers.last).to_a - integers
|
|
37
47
|
raise "Missing chapter numbers: #{missing.join(' ')}" if missing.any?
|
|
38
48
|
end
|
|
39
49
|
end
|
|
@@ -96,6 +96,19 @@ module EpubTools
|
|
|
96
96
|
end.with_verbose_option
|
|
97
97
|
end
|
|
98
98
|
|
|
99
|
+
# Configure options for the 'append' command
|
|
100
|
+
# @param builder [OptionBuilder] Option builder instance
|
|
101
|
+
def configure_append_options(builder)
|
|
102
|
+
builder.with_custom_options do |opts, options|
|
|
103
|
+
opts.on('-s DIR', '--source-dir DIR', 'Directory with EPUBs to append (required)') do |v|
|
|
104
|
+
options[:source_dir] = v
|
|
105
|
+
end
|
|
106
|
+
opts.on('-t FILE', '--target-epub FILE', 'Existing EPUB file to append to (required)') do |v|
|
|
107
|
+
options[:target_epub] = v
|
|
108
|
+
end
|
|
109
|
+
end.with_verbose_option
|
|
110
|
+
end
|
|
111
|
+
|
|
99
112
|
# Configure options for the 'compile' command
|
|
100
113
|
# @param builder [OptionBuilder] Option builder instance
|
|
101
114
|
def configure_compile_options(builder)
|
|
@@ -107,6 +107,7 @@ module EpubTools
|
|
|
107
107
|
puts ' pack Package an EPUB directory into a .epub file'
|
|
108
108
|
puts ' unpack Unpack an EPUB file into a directory'
|
|
109
109
|
puts ' compile Takes EPUBs in a dir and splits, cleans, and compiles into a single EPUB.'
|
|
110
|
+
puts ' append Extracts and splits EPUBs from a dir and appends them to an existing EPUB.'
|
|
110
111
|
end
|
|
111
112
|
end
|
|
112
113
|
end
|
data/lib/epub_tools/cli.rb
CHANGED
|
@@ -27,6 +27,7 @@ module EpubTools
|
|
|
27
27
|
registry.register('pack', EpubTools::PackEbook, %i[input_dir output_file], { verbose: true })
|
|
28
28
|
registry.register('unpack', EpubTools::UnpackEbook, [:epub_file], { verbose: true })
|
|
29
29
|
registry.register('compile', EpubTools::CompileBook, %i[title author source_dir], { verbose: true })
|
|
30
|
+
registry.register('append', EpubTools::AppendBook, %i[source_dir target_epub], { verbose: true })
|
|
30
31
|
end
|
|
31
32
|
end
|
|
32
33
|
end
|
|
@@ -1,150 +1,36 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
1
|
# frozen_string_literal: true
|
|
3
2
|
|
|
4
|
-
|
|
5
|
-
require_relative 'loggable'
|
|
6
|
-
require_relative 'xhtml_extractor'
|
|
7
|
-
require_relative 'split_chapters'
|
|
3
|
+
require_relative 'book_builder'
|
|
8
4
|
require_relative 'epub_initializer'
|
|
9
|
-
require_relative 'add_chapters'
|
|
10
|
-
require_relative 'pack_ebook'
|
|
11
|
-
require_relative 'compile_workspace'
|
|
12
|
-
require_relative 'chapter_validator'
|
|
13
5
|
|
|
14
6
|
module EpubTools
|
|
15
|
-
#
|
|
16
|
-
class CompileBook
|
|
17
|
-
|
|
7
|
+
# Compiles a new EPUB from source EPUBs by extracting, splitting, and repackaging
|
|
8
|
+
class CompileBook < BookBuilder
|
|
9
|
+
attr_reader :title, :author, :cover_image, :output_file
|
|
18
10
|
|
|
19
|
-
# Book title
|
|
20
|
-
attr_reader :title
|
|
21
|
-
# Book author
|
|
22
|
-
attr_reader :author
|
|
23
|
-
# Path of the input epubs
|
|
24
|
-
attr_reader :source_dir
|
|
25
|
-
# Optional path to the cover image
|
|
26
|
-
attr_reader :cover_image
|
|
27
|
-
# Filename for the final epub
|
|
28
|
-
attr_reader :output_file
|
|
29
|
-
# Optional working directory for intermediate files
|
|
30
|
-
attr_reader :build_dir
|
|
31
|
-
# Whether to print progress to STDOUT
|
|
32
|
-
attr_reader :verbose
|
|
33
|
-
|
|
34
|
-
# Initializes the class
|
|
35
|
-
# @param options [Hash] Configuration options
|
|
36
|
-
# @option options [String] :title Book title (required)
|
|
37
|
-
# @option options [String] :author Book author (required)
|
|
38
|
-
# @option options [String] :source_dir Path of the input epubs (required)
|
|
39
|
-
# @option options [String] :cover_image Optional path to the cover image
|
|
40
|
-
# @option options [String] :output_file Filename for the final epub (default: [title].epub)
|
|
41
|
-
# @option options [String] :build_dir Optional working directory for intermediate files
|
|
42
|
-
# @option options [Boolean] :verbose Whether to print progress to STDOUT (default: false)
|
|
43
11
|
def initialize(options = {})
|
|
12
|
+
super
|
|
44
13
|
@title = options.fetch(:title)
|
|
45
14
|
@author = options.fetch(:author)
|
|
46
|
-
@source_dir = options.fetch(:source_dir)
|
|
47
15
|
@cover_image = options[:cover_image]
|
|
48
16
|
@output_file = options[:output_file] || default_output_file
|
|
49
|
-
@build_dir = options[:build_dir] || File.join(Dir.pwd, '.epub_tools_build')
|
|
50
|
-
@verbose = options[:verbose] || false
|
|
51
|
-
@workspace = CompileWorkspace.new(@build_dir)
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
# Run the full compile workflow
|
|
55
|
-
def run
|
|
56
|
-
setup_workspace
|
|
57
|
-
extract_xhtmls
|
|
58
|
-
split_xhtmls
|
|
59
|
-
validate_chapters
|
|
60
|
-
initialize_epub
|
|
61
|
-
add_chapters
|
|
62
|
-
pack_epub
|
|
63
|
-
finalize_and_cleanup
|
|
64
17
|
end
|
|
65
18
|
|
|
66
19
|
private
|
|
67
20
|
|
|
68
|
-
def
|
|
69
|
-
@workspace.clean
|
|
70
|
-
log "Cleaning build directory #{@build_dir}..."
|
|
71
|
-
@workspace.prepare_directories
|
|
72
|
-
log 'Preparing build directories...'
|
|
73
|
-
end
|
|
21
|
+
def book_title = @title
|
|
74
22
|
|
|
75
|
-
def
|
|
76
|
-
log "Done. Output EPUB: #{File.expand_path(output_file)}"
|
|
77
|
-
@workspace.clean
|
|
78
|
-
output_file
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
def default_output_file
|
|
82
|
-
"#{title.gsub(' ', '_')}.epub"
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
def extract_xhtmls
|
|
86
|
-
log "Extracting XHTML files from epubs in '#{source_dir}'..."
|
|
87
|
-
XHTMLExtractor.new({
|
|
88
|
-
source_dir: source_dir,
|
|
89
|
-
target_dir: @workspace.xhtml_dir,
|
|
90
|
-
verbose: verbose
|
|
91
|
-
}).run
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
def split_xhtmls
|
|
95
|
-
log 'Splitting XHTML files into chapters...'
|
|
96
|
-
Dir.glob(File.join(@workspace.xhtml_dir, '*.xhtml')).each do |xhtml_file|
|
|
97
|
-
split_xhtml_file(xhtml_file)
|
|
98
|
-
end
|
|
99
|
-
end
|
|
23
|
+
def output_path = @output_file
|
|
100
24
|
|
|
101
|
-
def
|
|
102
|
-
base = File.basename(xhtml_file, '.xhtml')
|
|
103
|
-
log "Splitting '#{base}'..."
|
|
104
|
-
SplitChapters.new(build_split_options(xhtml_file)).run
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
def build_split_options(xhtml_file)
|
|
108
|
-
{
|
|
109
|
-
input_file: xhtml_file,
|
|
110
|
-
book_title: title,
|
|
111
|
-
output_dir: @workspace.chapters_dir,
|
|
112
|
-
output_prefix: 'chapter',
|
|
113
|
-
verbose: verbose
|
|
114
|
-
}
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
def validate_chapters
|
|
118
|
-
ChapterValidator.new(chapters_dir: @workspace.chapters_dir, verbose: verbose).validate
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
def initialize_epub
|
|
25
|
+
def before_add_chapters
|
|
122
26
|
log 'Initializing new EPUB...'
|
|
123
|
-
EpubInitializer.new(build_epub_options).run
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
def build_epub_options
|
|
127
27
|
options = { title: title, author: author, destination: @workspace.epub_dir }
|
|
128
28
|
options[:cover_image] = cover_image if cover_image
|
|
129
|
-
options
|
|
130
|
-
end
|
|
131
|
-
|
|
132
|
-
def add_chapters
|
|
133
|
-
log 'Adding chapters to EPUB...'
|
|
134
|
-
AddChapters.new({
|
|
135
|
-
chapters_dir: @workspace.chapters_dir,
|
|
136
|
-
epub_dir: File.join(@workspace.epub_dir, 'OEBPS'),
|
|
137
|
-
verbose: verbose
|
|
138
|
-
}).run
|
|
29
|
+
EpubInitializer.new(options).run
|
|
139
30
|
end
|
|
140
31
|
|
|
141
|
-
def
|
|
142
|
-
|
|
143
|
-
PackEbook.new({
|
|
144
|
-
input_dir: @workspace.epub_dir,
|
|
145
|
-
output_file: output_file,
|
|
146
|
-
verbose: verbose
|
|
147
|
-
}).run
|
|
32
|
+
def default_output_file
|
|
33
|
+
"#{title.gsub(' ', '_')}.epub"
|
|
148
34
|
end
|
|
149
35
|
end
|
|
150
36
|
end
|
|
@@ -7,48 +7,29 @@ require 'fileutils'
|
|
|
7
7
|
require_relative 'loggable'
|
|
8
8
|
require_relative 'style_finder'
|
|
9
9
|
require_relative 'xhtml_cleaner'
|
|
10
|
+
require_relative 'chapter_marker_detector'
|
|
10
11
|
|
|
11
12
|
module EpubTools
|
|
12
|
-
#
|
|
13
|
-
# chapters and it:
|
|
14
|
-
# - Extracts classes using {StyleFinder}[rdoc-ref:EpubTools::StyleFinder]
|
|
15
|
-
# - Looks for tags that say something like Chapter XX or Prologue and splits the text there
|
|
16
|
-
# - Creates new chapter_XX.xhtml files that are cleaned using
|
|
17
|
-
# {XHTMLCleaner}[rdoc-ref:EpubTools::XHTMLCleaner]
|
|
18
|
-
# - Saves those files to +output_dir+
|
|
13
|
+
# Splits a multi-chapter XHTML file into individual chapter files.
|
|
19
14
|
class SplitChapters
|
|
20
15
|
include Loggable
|
|
21
16
|
|
|
22
|
-
# Initializes the class
|
|
23
|
-
# @param options [Hash] Configuration options
|
|
24
|
-
# @option options [String] :input_file Path to the source XHTML (required)
|
|
25
|
-
# @option options [String] :book_title Title to use in HTML <title> tags (required)
|
|
26
|
-
# @option options [String] :output_dir Where to write chapter files (default: './chapters')
|
|
27
|
-
# @option options [String] :output_prefix Filename prefix for chapter files (default: 'chapter')
|
|
28
|
-
# @option options [Boolean] :verbose Whether to print progress to STDOUT (default: false)
|
|
29
17
|
def initialize(options = {})
|
|
30
18
|
@input_file = options.fetch(:input_file)
|
|
31
19
|
@book_title = options.fetch(:book_title)
|
|
32
20
|
@output_dir = options[:output_dir] || './chapters'
|
|
33
21
|
@output_prefix = options[:output_prefix] || 'chapter'
|
|
34
22
|
@verbose = options[:verbose] || false
|
|
23
|
+
@detector = ChapterMarkerDetector.new
|
|
35
24
|
end
|
|
36
25
|
|
|
37
26
|
# Runs the splitter
|
|
38
27
|
# @return [Array<String>] List of generated chapter file paths
|
|
39
28
|
def run
|
|
40
|
-
# Prepare output dir
|
|
41
29
|
FileUtils.mkdir_p(@output_dir)
|
|
42
|
-
|
|
43
|
-
# Read the doc
|
|
44
|
-
raw_content = read_and_strip_problematic_tags
|
|
45
|
-
doc = Nokogiri::HTML(raw_content)
|
|
46
|
-
|
|
47
|
-
# Find Style Classes
|
|
30
|
+
doc = Nokogiri::HTML(read_and_strip_problematic_tags)
|
|
48
31
|
StyleFinder.new({ file_path: @input_file, verbose: @verbose }).run
|
|
49
|
-
|
|
50
|
-
chapters = extract_chapters(doc)
|
|
51
|
-
write_chapter_files(chapters)
|
|
32
|
+
extract_chapters(doc).map { |number, content| write_chapter_file(number, content) }
|
|
52
33
|
end
|
|
53
34
|
|
|
54
35
|
private
|
|
@@ -66,53 +47,37 @@ module EpubTools
|
|
|
66
47
|
current_number, current_fragment = process_node(node, chapters, current_number, current_fragment)
|
|
67
48
|
end
|
|
68
49
|
|
|
69
|
-
|
|
50
|
+
chapters[current_number] = current_fragment.to_html if current_number
|
|
51
|
+
chapters
|
|
70
52
|
end
|
|
71
53
|
|
|
72
54
|
def process_node(node, chapters, current_number, current_fragment)
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
start_prologue(chapters, current_number, current_fragment)
|
|
55
|
+
marker = @detector.detect(node)
|
|
56
|
+
if marker
|
|
57
|
+
start_chapter(chapters, marker_number(marker, node), current_number, current_fragment)
|
|
77
58
|
else
|
|
78
59
|
current_fragment&.add_child(node.dup)
|
|
79
60
|
[current_number, current_fragment]
|
|
80
61
|
end
|
|
81
62
|
end
|
|
82
63
|
|
|
83
|
-
def
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
chapter_number = node.text.match(/Chapter\s+(\d+)/i)[1].to_i
|
|
90
|
-
[chapter_number, Nokogiri::HTML::DocumentFragment.parse('')]
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
def start_prologue(chapters, current_number, current_fragment)
|
|
94
|
-
chapters[current_number] = current_fragment.to_html if current_number
|
|
95
|
-
[0, Nokogiri::HTML::DocumentFragment.parse('')]
|
|
64
|
+
def marker_number(marker, node)
|
|
65
|
+
case marker
|
|
66
|
+
when :continued then @detector.extract_chapter_number(node) + 0.5
|
|
67
|
+
when :chapter then @detector.extract_chapter_number(node)
|
|
68
|
+
when :prologue then 0
|
|
69
|
+
end
|
|
96
70
|
end
|
|
97
71
|
|
|
98
|
-
def
|
|
72
|
+
def start_chapter(chapters, number, current_number, current_fragment)
|
|
99
73
|
chapters[current_number] = current_fragment.to_html if current_number
|
|
100
|
-
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
def write_chapter_files(chapters)
|
|
104
|
-
chapter_files = []
|
|
105
|
-
chapters.each do |number, content|
|
|
106
|
-
filename = write_chapter_file(number, content)
|
|
107
|
-
chapter_files << filename
|
|
108
|
-
end
|
|
109
|
-
chapter_files
|
|
74
|
+
[number, Nokogiri::HTML::DocumentFragment.parse('')]
|
|
110
75
|
end
|
|
111
76
|
|
|
112
77
|
def write_chapter_file(label, content)
|
|
113
|
-
|
|
114
|
-
filename = File.join(@output_dir, "#{@output_prefix}_#{label}.xhtml")
|
|
115
|
-
File.write(filename, build_xhtml_template(
|
|
78
|
+
display = display_label(label)
|
|
79
|
+
filename = File.join(@output_dir, "#{@output_prefix}_#{file_label(label)}.xhtml")
|
|
80
|
+
File.write(filename, build_xhtml_template(display, content))
|
|
116
81
|
XHTMLCleaner.new({ filename: filename }).run
|
|
117
82
|
log("Extracted: #{filename}")
|
|
118
83
|
filename
|
|
@@ -134,16 +99,14 @@ module EpubTools
|
|
|
134
99
|
HTML
|
|
135
100
|
end
|
|
136
101
|
|
|
137
|
-
def
|
|
138
|
-
label.
|
|
102
|
+
def file_label(label)
|
|
103
|
+
label.is_a?(Float) ? label.to_s.gsub('.', '_') : label.to_s
|
|
139
104
|
end
|
|
140
105
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
return false unless %w[h3 h4].include?(node.name)
|
|
144
|
-
return false unless node.text.strip =~ /\APrologue\z/i
|
|
106
|
+
def display_label(label)
|
|
107
|
+
return 'Prologue' if label.zero?
|
|
145
108
|
|
|
146
|
-
|
|
109
|
+
"Chapter #{label}"
|
|
147
110
|
end
|
|
148
111
|
end
|
|
149
112
|
end
|
data/lib/epub_tools/version.rb
CHANGED
data/lib/epub_tools.rb
CHANGED
|
@@ -5,11 +5,14 @@ require_relative 'epub_tools/loggable'
|
|
|
5
5
|
require_relative 'epub_tools/add_chapters'
|
|
6
6
|
require_relative 'epub_tools/epub_initializer'
|
|
7
7
|
require_relative 'epub_tools/split_chapters'
|
|
8
|
+
require_relative 'epub_tools/chapter_marker_detector'
|
|
8
9
|
require_relative 'epub_tools/xhtml_cleaner'
|
|
9
10
|
require_relative 'epub_tools/xhtml_extractor'
|
|
10
11
|
require_relative 'epub_tools/pack_ebook'
|
|
11
12
|
require_relative 'epub_tools/unpack_ebook'
|
|
13
|
+
require_relative 'epub_tools/book_builder'
|
|
12
14
|
require_relative 'epub_tools/compile_book'
|
|
15
|
+
require_relative 'epub_tools/append_book'
|
|
13
16
|
require_relative 'epub_tools/cli'
|
|
14
17
|
|
|
15
18
|
# Wrapper for all the other classes
|
data/test/add_chapters_test.rb
CHANGED
|
@@ -121,3 +121,73 @@ class AddChaptersTest < Minitest::Test
|
|
|
121
121
|
nav_doc.xpath('//nav/ol/li/a')
|
|
122
122
|
end
|
|
123
123
|
end
|
|
124
|
+
|
|
125
|
+
class AddChaptersHalfChapterTest < Minitest::Test
|
|
126
|
+
def setup
|
|
127
|
+
@tmp = Dir.mktmpdir
|
|
128
|
+
@chapters_dir = File.join(@tmp, 'chapters')
|
|
129
|
+
@epub_dir = File.join(@tmp, 'OEBPS')
|
|
130
|
+
Dir.mkdir(@chapters_dir)
|
|
131
|
+
Dir.mkdir(@epub_dir)
|
|
132
|
+
create_chapter_files
|
|
133
|
+
create_opf_and_nav
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def teardown
|
|
137
|
+
FileUtils.remove_entry(@tmp)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def test_half_chapter_sorting_and_labels
|
|
141
|
+
result = EpubTools::AddChapters.new(chapters_dir: @chapters_dir, oebps_dir: @epub_dir).run
|
|
142
|
+
|
|
143
|
+
assert_equal %w[chapter_1.xhtml chapter_1_5.xhtml chapter_2.xhtml], result
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def test_half_chapter_nav_label
|
|
147
|
+
EpubTools::AddChapters.new(chapters_dir: @chapters_dir, oebps_dir: @epub_dir).run
|
|
148
|
+
|
|
149
|
+
nav_doc = Nokogiri::XML(File.read(File.join(@epub_dir, 'nav.xhtml')))
|
|
150
|
+
nav_doc.remove_namespaces!
|
|
151
|
+
links = nav_doc.xpath('//nav/ol/li/a')
|
|
152
|
+
|
|
153
|
+
assert_equal 'Chapter 1', links[0].text
|
|
154
|
+
assert_equal 'Chapter 1.5', links[1].text
|
|
155
|
+
assert_equal 'Chapter 2', links[2].text
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def test_half_chapter_opf_id
|
|
159
|
+
EpubTools::AddChapters.new(chapters_dir: @chapters_dir, oebps_dir: @epub_dir).run
|
|
160
|
+
|
|
161
|
+
doc = Nokogiri::XML(File.read(File.join(@epub_dir, 'package.opf')))
|
|
162
|
+
ids = doc.xpath('//xmlns:manifest/xmlns:item').map { |i| i['id'] }
|
|
163
|
+
|
|
164
|
+
assert_includes ids, 'chap1_5'
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
private
|
|
168
|
+
|
|
169
|
+
def create_chapter_files
|
|
170
|
+
File.write(File.join(@chapters_dir, 'chapter_1.xhtml'), '<html><body>Ch1</body></html>')
|
|
171
|
+
File.write(File.join(@chapters_dir, 'chapter_1_5.xhtml'), '<html><body>Ch1.5</body></html>')
|
|
172
|
+
File.write(File.join(@chapters_dir, 'chapter_2.xhtml'), '<html><body>Ch2</body></html>')
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def create_opf_and_nav
|
|
176
|
+
File.write(File.join(@epub_dir, 'package.opf'), <<~XML)
|
|
177
|
+
<?xml version="1.0"?>
|
|
178
|
+
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="pub-id" xml:lang="en">
|
|
179
|
+
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/"></metadata>
|
|
180
|
+
<manifest></manifest>
|
|
181
|
+
<spine></spine>
|
|
182
|
+
</package>
|
|
183
|
+
XML
|
|
184
|
+
File.write(File.join(@epub_dir, 'nav.xhtml'), <<~XHTML)
|
|
185
|
+
<?xml version="1.0" encoding="utf-8"?>
|
|
186
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="en">
|
|
187
|
+
<body>
|
|
188
|
+
<nav epub:type="toc" id="toc"><ol></ol></nav>
|
|
189
|
+
</body>
|
|
190
|
+
</html>
|
|
191
|
+
XHTML
|
|
192
|
+
end
|
|
193
|
+
end
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'test_helper'
|
|
4
|
+
require_relative '../lib/epub_tools/append_book'
|
|
5
|
+
|
|
6
|
+
class AppendBookTest < Minitest::Test
|
|
7
|
+
def setup
|
|
8
|
+
@tmp = Dir.mktmpdir
|
|
9
|
+
@source = File.join(@tmp, 'src')
|
|
10
|
+
@target = File.join(@tmp, 'target.epub')
|
|
11
|
+
FileUtils.mkdir_p(@source)
|
|
12
|
+
FileUtils.touch(@target)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def teardown
|
|
16
|
+
FileUtils.rm_rf(@tmp)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def test_initialize_assigns_attributes
|
|
20
|
+
ab = build_append_book(verbose: true)
|
|
21
|
+
|
|
22
|
+
assert_equal @source, ab.source_dir
|
|
23
|
+
assert_equal File.expand_path(@target), ab.target_epub
|
|
24
|
+
assert_equal @tmp, ab.build_dir
|
|
25
|
+
assert ab.verbose
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def test_default_build_dir
|
|
29
|
+
ab = EpubTools::AppendBook.new(source_dir: @source, target_epub: @target)
|
|
30
|
+
|
|
31
|
+
assert ab.build_dir.end_with?('.epub_tools_build')
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def test_log_outputs_when_verbose
|
|
35
|
+
ab = build_append_book(verbose: true)
|
|
36
|
+
|
|
37
|
+
assert_output("hello\n") { ab.send(:log, 'hello') }
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def test_log_silent_when_not_verbose
|
|
41
|
+
ab = build_append_book(verbose: false)
|
|
42
|
+
|
|
43
|
+
assert_silent { ab.send(:log, 'hello') }
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def test_detect_conflicts_raises_on_overlap
|
|
47
|
+
ab = build_append_book
|
|
48
|
+
setup_conflict_dirs(ab, new_chapters: [1, 2], existing_chapters: [1])
|
|
49
|
+
|
|
50
|
+
error = assert_raises(ArgumentError) { ab.send(:detect_conflicts) }
|
|
51
|
+
assert_match(/chapters 1 already exist/, error.message)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def test_detect_conflicts_passes_with_no_overlap
|
|
55
|
+
ab = build_append_book
|
|
56
|
+
setup_conflict_dirs(ab, new_chapters: [5], existing_chapters: [1, 2])
|
|
57
|
+
|
|
58
|
+
ab.send(:detect_conflicts)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def test_detect_conflicts_with_half_chapters
|
|
62
|
+
ab = build_append_book
|
|
63
|
+
chapters_dir = File.join(@tmp, 'chapters')
|
|
64
|
+
oebps_dir = File.join(@tmp, 'epub', 'OEBPS')
|
|
65
|
+
FileUtils.mkdir_p([chapters_dir, oebps_dir])
|
|
66
|
+
|
|
67
|
+
FileUtils.touch(File.join(chapters_dir, 'chapter_3_5.xhtml'))
|
|
68
|
+
FileUtils.touch(File.join(oebps_dir, 'chapter_3_5.xhtml'))
|
|
69
|
+
|
|
70
|
+
workspace = ab.instance_variable_get(:@workspace)
|
|
71
|
+
workspace.instance_variable_set(:@chapters_dir, chapters_dir)
|
|
72
|
+
workspace.instance_variable_set(:@epub_dir, File.join(@tmp, 'epub'))
|
|
73
|
+
|
|
74
|
+
error = assert_raises(ArgumentError) { ab.send(:detect_conflicts) }
|
|
75
|
+
assert_match(/3\.5 already exist/, error.message)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def test_run_completes_workflow
|
|
79
|
+
ab = build_append_book
|
|
80
|
+
|
|
81
|
+
def ab.prepare_epub; end
|
|
82
|
+
def ab.extract_xhtmls; end
|
|
83
|
+
def ab.split_xhtmls; end
|
|
84
|
+
def ab.validate_chapters; end
|
|
85
|
+
def ab.before_add_chapters; end
|
|
86
|
+
def ab.add_chapters; end
|
|
87
|
+
def ab.pack_epub; end
|
|
88
|
+
|
|
89
|
+
result = ab.run
|
|
90
|
+
|
|
91
|
+
assert_equal File.expand_path(@target), result
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def test_backup_creates_bak_file
|
|
95
|
+
File.write(@target, 'epub content')
|
|
96
|
+
ab = build_append_book
|
|
97
|
+
|
|
98
|
+
ab.send(:backup_target)
|
|
99
|
+
|
|
100
|
+
backup_path = "#{File.expand_path(@target)}.bak"
|
|
101
|
+
|
|
102
|
+
assert_path_exists backup_path
|
|
103
|
+
assert_equal 'epub content', File.read(backup_path)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
private
|
|
107
|
+
|
|
108
|
+
def build_append_book(verbose: false)
|
|
109
|
+
EpubTools::AppendBook.new(
|
|
110
|
+
source_dir: @source, target_epub: @target,
|
|
111
|
+
build_dir: @tmp, verbose: verbose
|
|
112
|
+
)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def setup_conflict_dirs(append_book, new_chapters:, existing_chapters:)
|
|
116
|
+
chapters_dir = File.join(@tmp, 'chapters')
|
|
117
|
+
oebps_dir = File.join(@tmp, 'epub', 'OEBPS')
|
|
118
|
+
FileUtils.mkdir_p([chapters_dir, oebps_dir])
|
|
119
|
+
|
|
120
|
+
new_chapters.each { |n| FileUtils.touch(File.join(chapters_dir, "chapter_#{n}.xhtml")) }
|
|
121
|
+
existing_chapters.each { |n| FileUtils.touch(File.join(oebps_dir, "chapter_#{n}.xhtml")) }
|
|
122
|
+
|
|
123
|
+
workspace = append_book.instance_variable_get(:@workspace)
|
|
124
|
+
workspace.instance_variable_set(:@chapters_dir, chapters_dir)
|
|
125
|
+
workspace.instance_variable_set(:@epub_dir, File.join(@tmp, 'epub'))
|
|
126
|
+
end
|
|
127
|
+
end
|
|
@@ -37,6 +37,27 @@ class ChapterValidatorTest < Minitest::Test
|
|
|
37
37
|
assert_silent { @validator.validate }
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
+
def test_validates_sequence_with_half_chapters
|
|
41
|
+
create_chapter_files([1, 2, 3])
|
|
42
|
+
create_half_chapter_files([2])
|
|
43
|
+
|
|
44
|
+
assert_silent { @validator.validate }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def test_validates_sequence_without_half_chapters_present
|
|
48
|
+
create_chapter_files([1, 2, 3])
|
|
49
|
+
|
|
50
|
+
assert_silent { @validator.validate }
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def test_raises_on_missing_integer_despite_half_chapter
|
|
54
|
+
create_chapter_files([1, 3]) # Missing 2
|
|
55
|
+
create_half_chapter_files([1])
|
|
56
|
+
|
|
57
|
+
error = assert_raises(RuntimeError) { @validator.validate }
|
|
58
|
+
assert_match(/Missing chapter numbers: 2/, error.message)
|
|
59
|
+
end
|
|
60
|
+
|
|
40
61
|
private
|
|
41
62
|
|
|
42
63
|
def create_chapter_files(numbers)
|
|
@@ -44,4 +65,10 @@ class ChapterValidatorTest < Minitest::Test
|
|
|
44
65
|
File.write(File.join(@tmp, "chapter_#{num}.xhtml"), '<html></html>')
|
|
45
66
|
end
|
|
46
67
|
end
|
|
68
|
+
|
|
69
|
+
def create_half_chapter_files(numbers)
|
|
70
|
+
numbers.each do |num|
|
|
71
|
+
File.write(File.join(@tmp, "chapter_#{num}_5.xhtml"), '<html></html>')
|
|
72
|
+
end
|
|
73
|
+
end
|
|
47
74
|
end
|
data/test/cli_commands_test.rb
CHANGED
|
@@ -22,6 +22,7 @@ class CLICommandsTest < Minitest::Test
|
|
|
22
22
|
assert_includes output, 'pack'
|
|
23
23
|
assert_includes output, 'unpack'
|
|
24
24
|
assert_includes output, 'compile'
|
|
25
|
+
assert_includes output, 'append'
|
|
25
26
|
end
|
|
26
27
|
|
|
27
28
|
def test_show_version
|
|
@@ -92,6 +93,14 @@ class CLICommandsTest < Minitest::Test
|
|
|
92
93
|
assert_includes output, '--output-file FILE'
|
|
93
94
|
end
|
|
94
95
|
|
|
96
|
+
def test_append_command
|
|
97
|
+
output = `#{@bin_path} append --help`
|
|
98
|
+
|
|
99
|
+
assert_match(/Usage: epub-tools append \[options\]/, output)
|
|
100
|
+
assert_includes output, '--source-dir DIR'
|
|
101
|
+
assert_includes output, '--target-epub FILE'
|
|
102
|
+
end
|
|
103
|
+
|
|
95
104
|
def test_unpack_command
|
|
96
105
|
output = `#{@bin_path} unpack --help`
|
|
97
106
|
|
data/test/compile_book_test.rb
CHANGED
|
@@ -101,11 +101,10 @@ class CompileBookTest < Minitest::Test
|
|
|
101
101
|
output_file: 'test.epub'
|
|
102
102
|
)
|
|
103
103
|
|
|
104
|
-
# Mock the workflow methods to avoid complex file setup
|
|
105
104
|
def cb.extract_xhtmls; end
|
|
106
105
|
def cb.split_xhtmls; end
|
|
107
106
|
def cb.validate_chapters; end
|
|
108
|
-
def cb.
|
|
107
|
+
def cb.before_add_chapters; end
|
|
109
108
|
def cb.add_chapters; end
|
|
110
109
|
def cb.pack_epub; end
|
|
111
110
|
|
data/test/split_chapters_test.rb
CHANGED
|
@@ -92,3 +92,72 @@ class SplitChaptersTest < Minitest::Test
|
|
|
92
92
|
assert_includes ch2, 'Second paragraph'
|
|
93
93
|
end
|
|
94
94
|
end
|
|
95
|
+
|
|
96
|
+
class SplitChaptersContinuedTest < Minitest::Test
|
|
97
|
+
def setup
|
|
98
|
+
@tmp = Dir.mktmpdir
|
|
99
|
+
@input = File.join(@tmp, 'input.xhtml')
|
|
100
|
+
@out = File.join(@tmp, 'out')
|
|
101
|
+
content = <<~HTML
|
|
102
|
+
<?xml version="1.0"?>
|
|
103
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
104
|
+
<body>
|
|
105
|
+
<p>Chapter 1</p>
|
|
106
|
+
<p>First paragraph</p>
|
|
107
|
+
<p>Chapter 1 (Continued)</p>
|
|
108
|
+
<p>Continued text</p>
|
|
109
|
+
<p>Chapter 2</p>
|
|
110
|
+
<p>Second paragraph</p>
|
|
111
|
+
</body>
|
|
112
|
+
</html>
|
|
113
|
+
HTML
|
|
114
|
+
File.write(@input, content)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def teardown
|
|
118
|
+
FileUtils.remove_entry(@tmp)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def test_splits_continued_chapters
|
|
122
|
+
result = EpubTools::SplitChapters.new(
|
|
123
|
+
input_file: @input, book_title: 'Test', output_dir: @out, output_prefix: 'chapter'
|
|
124
|
+
).run
|
|
125
|
+
|
|
126
|
+
assert_equal 3, result.size
|
|
127
|
+
assert_includes result, File.join(@out, 'chapter_1.xhtml')
|
|
128
|
+
assert_includes result, File.join(@out, 'chapter_1_5.xhtml')
|
|
129
|
+
assert_includes result, File.join(@out, 'chapter_2.xhtml')
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def test_continued_chapter_content
|
|
133
|
+
EpubTools::SplitChapters.new(
|
|
134
|
+
input_file: @input, book_title: 'Test', output_dir: @out, output_prefix: 'chapter'
|
|
135
|
+
).run
|
|
136
|
+
|
|
137
|
+
continued = File.read(File.join(@out, 'chapter_1_5.xhtml'))
|
|
138
|
+
|
|
139
|
+
assert_includes continued, '<h1>Chapter 1.5</h1>'
|
|
140
|
+
assert_includes continued, 'Continued text'
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def test_continued_marker_case_insensitive
|
|
144
|
+
content = <<~HTML
|
|
145
|
+
<?xml version="1.0"?>
|
|
146
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
147
|
+
<body>
|
|
148
|
+
<p>Chapter 5</p>
|
|
149
|
+
<p>Text</p>
|
|
150
|
+
<p>Chapter 5 (continued)</p>
|
|
151
|
+
<p>More text</p>
|
|
152
|
+
</body>
|
|
153
|
+
</html>
|
|
154
|
+
HTML
|
|
155
|
+
File.write(@input, content)
|
|
156
|
+
|
|
157
|
+
result = EpubTools::SplitChapters.new(
|
|
158
|
+
input_file: @input, book_title: 'Test', output_dir: @out, output_prefix: 'chapter'
|
|
159
|
+
).run
|
|
160
|
+
|
|
161
|
+
assert_includes result, File.join(@out, 'chapter_5_5.xhtml')
|
|
162
|
+
end
|
|
163
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: epub_tools
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jaime Rodas
|
|
@@ -74,6 +74,9 @@ files:
|
|
|
74
74
|
- epub_tools.gemspec
|
|
75
75
|
- lib/epub_tools.rb
|
|
76
76
|
- lib/epub_tools/add_chapters.rb
|
|
77
|
+
- lib/epub_tools/append_book.rb
|
|
78
|
+
- lib/epub_tools/book_builder.rb
|
|
79
|
+
- lib/epub_tools/chapter_marker_detector.rb
|
|
77
80
|
- lib/epub_tools/chapter_validator.rb
|
|
78
81
|
- lib/epub_tools/cli.rb
|
|
79
82
|
- lib/epub_tools/cli/command_options_configurator.rb
|
|
@@ -97,6 +100,7 @@ files:
|
|
|
97
100
|
- lib/epub_tools/xhtml_generator.rb
|
|
98
101
|
- style.css
|
|
99
102
|
- test/add_chapters_test.rb
|
|
103
|
+
- test/append_book_test.rb
|
|
100
104
|
- test/chapter_validator_test.rb
|
|
101
105
|
- test/cli/command_registry_test.rb
|
|
102
106
|
- test/cli/option_builder_test.rb
|