epub_tools 0.4.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +3 -0
- data/.rubocop.yml +10 -17
- data/CLAUDE.md +128 -0
- data/Gemfile +4 -4
- data/Gemfile.lock +39 -34
- data/README.md +37 -24
- data/Rakefile +2 -0
- data/bin/epub-tools +2 -0
- data/epub_tools.gemspec +3 -1
- data/lib/epub_tools/add_chapters.rb +64 -33
- data/lib/epub_tools/append_book.rb +81 -0
- data/lib/epub_tools/book_builder.rb +108 -0
- data/lib/epub_tools/chapter_marker_detector.rb +46 -0
- data/lib/epub_tools/chapter_validator.rb +50 -0
- data/lib/epub_tools/cli/command_options_configurator.rb +128 -0
- data/lib/epub_tools/cli/command_registry.rb +2 -0
- data/lib/epub_tools/cli/option_builder.rb +5 -3
- data/lib/epub_tools/cli/runner.rb +60 -110
- data/lib/epub_tools/cli.rb +17 -29
- data/lib/epub_tools/compile_book.rb +15 -146
- data/lib/epub_tools/compile_workspace.rb +40 -0
- data/lib/epub_tools/epub_configuration.rb +33 -0
- data/lib/epub_tools/epub_file_writer.rb +57 -0
- data/lib/epub_tools/epub_initializer.rb +83 -162
- data/lib/epub_tools/epub_metadata_builder.rb +92 -0
- data/lib/epub_tools/loggable.rb +2 -0
- data/lib/epub_tools/pack_ebook.rb +28 -14
- data/lib/epub_tools/split_chapters.rb +44 -56
- data/lib/epub_tools/style_finder.rb +17 -6
- data/lib/epub_tools/unpack_ebook.rb +20 -10
- data/lib/epub_tools/version.rb +3 -1
- data/lib/epub_tools/xhtml_cleaner.rb +1 -0
- data/lib/epub_tools/xhtml_extractor.rb +20 -10
- data/lib/epub_tools/xhtml_generator.rb +71 -0
- data/lib/epub_tools.rb +5 -0
- data/test/add_chapters_test.rb +119 -25
- data/test/append_book_test.rb +127 -0
- data/test/chapter_validator_test.rb +74 -0
- data/test/cli/command_registry_test.rb +2 -0
- data/test/cli/option_builder_test.rb +24 -14
- data/test/cli/runner_test.rb +15 -15
- data/test/cli_commands_test.rb +11 -0
- data/test/cli_test.rb +2 -0
- data/test/cli_version_test.rb +2 -0
- data/test/compile_book_test.rb +16 -102
- data/test/compile_workspace_test.rb +55 -0
- data/test/epub_initializer_test.rb +55 -27
- data/test/pack_ebook_test.rb +33 -9
- data/test/split_chapters_test.rb +96 -7
- data/test/style_finder_test.rb +2 -0
- data/test/test_helper.rb +2 -0
- data/test/unpack_ebook_test.rb +45 -20
- data/test/xhtml_cleaner_test.rb +2 -0
- data/test/xhtml_extractor_test.rb +3 -1
- metadata +17 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a379e3e740873230fa8c4f71261006ac79654e74f1fa5ba299aee8742f9aa3c3
|
|
4
|
+
data.tar.gz: c9d83418c1f585a43e3650e3b5189fd74a4f3374a4835b818bc15379f19ede05
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 27bf72687e57c4998831d5e94098ddbeb76f953d99b09d2798e2087b4ce147353df2673e5d1996124047ae3c09741604b80483a79aea471180cdd9769f76a28f
|
|
7
|
+
data.tar.gz: b06cce574ef93ab03769d7734bffd0947453bc4353f50c17edda34943a075e151a44e04916bc4c08890ff51b9f8a7f6d4fde7ba05d591507ba5be0bb6d49a65e
|
data/.github/workflows/ci.yml
CHANGED
data/.rubocop.yml
CHANGED
|
@@ -16,26 +16,19 @@ plugins:
|
|
|
16
16
|
- rubocop-minitest
|
|
17
17
|
- rubocop-rake
|
|
18
18
|
|
|
19
|
-
Style/FrozenStringLiteralComment:
|
|
20
|
-
Enabled: false
|
|
21
19
|
|
|
22
20
|
Metrics/MethodLength:
|
|
23
|
-
|
|
21
|
+
Exclude:
|
|
22
|
+
- 'test/**/*'
|
|
24
23
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
Metrics/AbcSize:
|
|
29
|
-
Enabled: false
|
|
30
|
-
|
|
31
|
-
Metrics/CyclomaticComplexity:
|
|
32
|
-
Enabled: false
|
|
24
|
+
Naming/PredicateMethod:
|
|
25
|
+
Exclude:
|
|
26
|
+
- 'test/**/*'
|
|
33
27
|
|
|
34
|
-
Metrics/
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
Style/OptionalBooleanParameter:
|
|
38
|
-
Enabled: false
|
|
28
|
+
Metrics/ClassLength:
|
|
29
|
+
Exclude:
|
|
30
|
+
- 'test/**/*'
|
|
39
31
|
|
|
40
32
|
Minitest/MultipleAssertions:
|
|
41
|
-
|
|
33
|
+
Exclude:
|
|
34
|
+
- 'test/**/*'
|
data/CLAUDE.md
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
EPUB Tools is a Ruby gem and CLI for working with EPUB files. It provides functionality to extract, split, initialize, add chapters, pack, and unpack EPUB books. The project uses a modular architecture with separate classes for each operation and a structured CLI system.
|
|
8
|
+
|
|
9
|
+
## Development Commands
|
|
10
|
+
|
|
11
|
+
### Testing
|
|
12
|
+
```bash
|
|
13
|
+
# Run all tests
|
|
14
|
+
bundle exec rake test
|
|
15
|
+
|
|
16
|
+
# Run a specific test file
|
|
17
|
+
ruby -Itest test/specific_test.rb
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
### Linting
|
|
21
|
+
```bash
|
|
22
|
+
# Run RuboCop linting
|
|
23
|
+
bundle exec rubocop
|
|
24
|
+
|
|
25
|
+
# Fix auto-correctable issues
|
|
26
|
+
bundle exec rubocop --auto-correct
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Dependencies
|
|
30
|
+
```bash
|
|
31
|
+
# Install dependencies
|
|
32
|
+
bundle install
|
|
33
|
+
|
|
34
|
+
# Install with documentation dependencies
|
|
35
|
+
bundle install --with doc
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Documentation
|
|
39
|
+
```bash
|
|
40
|
+
# Generate and serve YARD documentation
|
|
41
|
+
bundle exec yard server --reload
|
|
42
|
+
# Then visit http://localhost:8808
|
|
43
|
+
|
|
44
|
+
# Generate documentation files
|
|
45
|
+
bundle exec yard doc
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Gem Management
|
|
49
|
+
```bash
|
|
50
|
+
# Build the gem
|
|
51
|
+
gem build epub_tools.gemspec
|
|
52
|
+
|
|
53
|
+
# Install locally built gem
|
|
54
|
+
gem install ./epub_tools-*.gem
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Architecture
|
|
58
|
+
|
|
59
|
+
### Core Components
|
|
60
|
+
|
|
61
|
+
- **Main Module** (`lib/epub_tools.rb`): Entry point that requires all components
|
|
62
|
+
- **CLI System** (`lib/epub_tools/cli/`): Object-oriented command-line interface
|
|
63
|
+
- `Runner`: Main CLI runner that handles command dispatch
|
|
64
|
+
- `CommandRegistry`: Manages available commands and their configurations
|
|
65
|
+
- `OptionBuilder`: Builds command-line option parsers
|
|
66
|
+
- `CommandOptionsConfigurator`: Handles command-specific option configuration
|
|
67
|
+
- **Core Classes**: Individual operation classes for EPUB manipulation
|
|
68
|
+
- `XHTMLExtractor`: Extracts XHTML files from EPUB archives
|
|
69
|
+
- `SplitChapters`: Splits XHTML files into separate chapters
|
|
70
|
+
- `EpubInitializer`: Creates new EPUB directory structure (uses configuration pattern)
|
|
71
|
+
- `AddChapters`: Adds chapter files to existing EPUB
|
|
72
|
+
- `PackEbook`: Packages EPUB directories into .epub files
|
|
73
|
+
- `UnpackEbook`: Unpacks .epub files into directories
|
|
74
|
+
- **Workflow Classes**: Orchestrators built on a shared base class
|
|
75
|
+
- `BookBuilder`: Base class with template method pattern (extract → split → validate → add → pack)
|
|
76
|
+
- `CompileBook`: Creates a new EPUB from source EPUBs (inherits BookBuilder)
|
|
77
|
+
- `AppendBook`: Appends chapters from source EPUBs to an existing EPUB (inherits BookBuilder)
|
|
78
|
+
- **Supporting Classes**: SOLID-designed helper classes
|
|
79
|
+
- `CompileWorkspace`: Manages build directories for book-building workflows
|
|
80
|
+
- `ChapterValidator`: Validates chapter sequence completeness
|
|
81
|
+
- `ChapterMarkerDetector`: Detects chapter boundary markers (Chapter N, Chapter N (continued), Prologue)
|
|
82
|
+
- `EpubConfiguration`: Configuration object for EPUB initialization
|
|
83
|
+
- `XhtmlGenerator`: Generates XHTML templates for EPUB content
|
|
84
|
+
- `EpubMetadataBuilder`: Builds OPF metadata content
|
|
85
|
+
- `EpubFileWriter`: Handles EPUB file writing operations
|
|
86
|
+
|
|
87
|
+
### CLI Architecture
|
|
88
|
+
|
|
89
|
+
The CLI uses a registry-based system where:
|
|
90
|
+
1. Commands are registered in `cli.rb` with their class, required parameters, and defaults
|
|
91
|
+
2. The `Runner` dispatches to the appropriate command class
|
|
92
|
+
3. The `CommandOptionsConfigurator` handles command-specific option setup
|
|
93
|
+
4. Each command class implements a `run` method and uses the `Loggable` mixin for verbose output
|
|
94
|
+
|
|
95
|
+
### Dependencies
|
|
96
|
+
|
|
97
|
+
- **nokogiri**: XML/HTML parsing for EPUB content
|
|
98
|
+
- **rubyzip**: ZIP file manipulation for EPUB packaging
|
|
99
|
+
- **rake**: Build tasks and testing
|
|
100
|
+
- **minitest**: Testing framework
|
|
101
|
+
- **rubocop**: Code linting with custom configuration
|
|
102
|
+
- **simplecov**: Test coverage reporting
|
|
103
|
+
|
|
104
|
+
### File Structure
|
|
105
|
+
|
|
106
|
+
- `bin/epub-tools`: Executable CLI entry point
|
|
107
|
+
- `lib/epub_tools/`: Main library code
|
|
108
|
+
- `test/`: Minitest-based test suite
|
|
109
|
+
- `.rubocop.yml`: RuboCop configuration with relaxed complexity rules
|
|
110
|
+
- `epub_tools.gemspec`: Gem specification
|
|
111
|
+
- `Gemfile`: Dependency management
|
|
112
|
+
|
|
113
|
+
### Testing Patterns
|
|
114
|
+
|
|
115
|
+
Tests use Minitest with:
|
|
116
|
+
- `test_helper.rb` sets up SimpleCov coverage
|
|
117
|
+
- Tests in `test/` directory follow `*_test.rb` naming
|
|
118
|
+
- CLI tests verify command registration and option parsing
|
|
119
|
+
- Individual component tests verify core functionality
|
|
120
|
+
|
|
121
|
+
### Code Quality
|
|
122
|
+
|
|
123
|
+
The codebase follows SOLID principles with:
|
|
124
|
+
- **Single Responsibility**: Classes have focused, well-defined purposes
|
|
125
|
+
- **Open/Closed**: Extensible design through composition and dependency injection
|
|
126
|
+
- **Dependency Inversion**: Configuration objects and factory patterns
|
|
127
|
+
|
|
128
|
+
RuboCop configuration excludes test files from metrics cops while maintaining strict standards for production code.
|
data/Gemfile
CHANGED
|
@@ -6,18 +6,18 @@ source 'https://rubygems.org'
|
|
|
6
6
|
|
|
7
7
|
gem 'nokogiri', '~> 1.18'
|
|
8
8
|
gem 'rake', '~> 13.2'
|
|
9
|
-
gem 'rubyzip', '~> 2
|
|
9
|
+
gem 'rubyzip', '~> 3.2'
|
|
10
10
|
|
|
11
11
|
group :test, :development do
|
|
12
|
-
gem 'minitest', '~>
|
|
12
|
+
gem 'minitest', '~> 6.0'
|
|
13
13
|
gem 'rubocop', '~> 1.75', require: false
|
|
14
|
-
gem 'rubocop-minitest', '~> 0.
|
|
14
|
+
gem 'rubocop-minitest', '~> 0.39.0', require: false
|
|
15
15
|
gem 'rubocop-rake', '~> 0.7.1', require: false
|
|
16
16
|
gem 'simplecov', require: false
|
|
17
17
|
end
|
|
18
18
|
|
|
19
19
|
group :doc do
|
|
20
|
-
gem 'rdoc', '~>
|
|
20
|
+
gem 'rdoc', '~> 7.2'
|
|
21
21
|
gem 'webrick', '~> 1.9'
|
|
22
22
|
gem 'yard', '~> 0.9.37'
|
|
23
23
|
end
|
data/Gemfile.lock
CHANGED
|
@@ -2,45 +2,49 @@ GEM
|
|
|
2
2
|
remote: https://rubygems.org/
|
|
3
3
|
specs:
|
|
4
4
|
ast (2.4.3)
|
|
5
|
-
date (3.
|
|
5
|
+
date (3.5.1)
|
|
6
6
|
docile (1.4.1)
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
drb (2.2.3)
|
|
8
|
+
erb (6.0.2)
|
|
9
|
+
json (2.19.2)
|
|
9
10
|
language_server-protocol (3.17.0.5)
|
|
10
11
|
lint_roller (1.1.0)
|
|
11
|
-
minitest (
|
|
12
|
-
|
|
12
|
+
minitest (6.0.2)
|
|
13
|
+
drb (~> 2.0)
|
|
14
|
+
prism (~> 1.5)
|
|
15
|
+
nokogiri (1.19.1-aarch64-linux-gnu)
|
|
13
16
|
racc (~> 1.4)
|
|
14
|
-
nokogiri (1.
|
|
17
|
+
nokogiri (1.19.1-aarch64-linux-musl)
|
|
15
18
|
racc (~> 1.4)
|
|
16
|
-
nokogiri (1.
|
|
19
|
+
nokogiri (1.19.1-arm-linux-gnu)
|
|
17
20
|
racc (~> 1.4)
|
|
18
|
-
nokogiri (1.
|
|
21
|
+
nokogiri (1.19.1-arm-linux-musl)
|
|
19
22
|
racc (~> 1.4)
|
|
20
|
-
nokogiri (1.
|
|
23
|
+
nokogiri (1.19.1-arm64-darwin)
|
|
21
24
|
racc (~> 1.4)
|
|
22
|
-
nokogiri (1.
|
|
25
|
+
nokogiri (1.19.1-x86_64-darwin)
|
|
23
26
|
racc (~> 1.4)
|
|
24
|
-
nokogiri (1.
|
|
27
|
+
nokogiri (1.19.1-x86_64-linux-gnu)
|
|
25
28
|
racc (~> 1.4)
|
|
26
|
-
nokogiri (1.
|
|
29
|
+
nokogiri (1.19.1-x86_64-linux-musl)
|
|
27
30
|
racc (~> 1.4)
|
|
28
31
|
parallel (1.27.0)
|
|
29
|
-
parser (3.3.
|
|
32
|
+
parser (3.3.10.2)
|
|
30
33
|
ast (~> 2.4.1)
|
|
31
34
|
racc
|
|
32
|
-
prism (1.
|
|
33
|
-
psych (5.
|
|
35
|
+
prism (1.9.0)
|
|
36
|
+
psych (5.3.1)
|
|
34
37
|
date
|
|
35
38
|
stringio
|
|
36
39
|
racc (1.8.1)
|
|
37
40
|
rainbow (3.1.1)
|
|
38
|
-
rake (13.3.
|
|
39
|
-
rdoc (
|
|
41
|
+
rake (13.3.1)
|
|
42
|
+
rdoc (7.2.0)
|
|
40
43
|
erb
|
|
41
44
|
psych (>= 4.0.0)
|
|
42
|
-
|
|
43
|
-
|
|
45
|
+
tsort
|
|
46
|
+
regexp_parser (2.11.3)
|
|
47
|
+
rubocop (1.84.2)
|
|
44
48
|
json (~> 2.3)
|
|
45
49
|
language_server-protocol (~> 3.17.0.2)
|
|
46
50
|
lint_roller (~> 1.1.0)
|
|
@@ -48,13 +52,13 @@ GEM
|
|
|
48
52
|
parser (>= 3.3.0.2)
|
|
49
53
|
rainbow (>= 2.2.2, < 4.0)
|
|
50
54
|
regexp_parser (>= 2.9.3, < 3.0)
|
|
51
|
-
rubocop-ast (>= 1.
|
|
55
|
+
rubocop-ast (>= 1.49.0, < 2.0)
|
|
52
56
|
ruby-progressbar (~> 1.7)
|
|
53
57
|
unicode-display_width (>= 2.4.0, < 4.0)
|
|
54
|
-
rubocop-ast (1.
|
|
58
|
+
rubocop-ast (1.49.0)
|
|
55
59
|
parser (>= 3.3.7.2)
|
|
56
|
-
prism (~> 1.
|
|
57
|
-
rubocop-minitest (0.
|
|
60
|
+
prism (~> 1.7)
|
|
61
|
+
rubocop-minitest (0.39.1)
|
|
58
62
|
lint_roller (~> 1.1)
|
|
59
63
|
rubocop (>= 1.75.0, < 2.0)
|
|
60
64
|
rubocop-ast (>= 1.38.0, < 2.0)
|
|
@@ -62,19 +66,20 @@ GEM
|
|
|
62
66
|
lint_roller (~> 1.1)
|
|
63
67
|
rubocop (>= 1.72.1)
|
|
64
68
|
ruby-progressbar (1.13.0)
|
|
65
|
-
rubyzip (2.
|
|
69
|
+
rubyzip (3.2.2)
|
|
66
70
|
simplecov (0.22.0)
|
|
67
71
|
docile (~> 1.1)
|
|
68
72
|
simplecov-html (~> 0.11)
|
|
69
73
|
simplecov_json_formatter (~> 0.1)
|
|
70
74
|
simplecov-html (0.13.2)
|
|
71
75
|
simplecov_json_formatter (0.1.4)
|
|
72
|
-
stringio (3.
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
76
|
+
stringio (3.2.0)
|
|
77
|
+
tsort (0.2.0)
|
|
78
|
+
unicode-display_width (3.2.0)
|
|
79
|
+
unicode-emoji (~> 4.1)
|
|
80
|
+
unicode-emoji (4.2.0)
|
|
81
|
+
webrick (1.9.2)
|
|
82
|
+
yard (0.9.38)
|
|
78
83
|
|
|
79
84
|
PLATFORMS
|
|
80
85
|
aarch64-linux-gnu
|
|
@@ -87,14 +92,14 @@ PLATFORMS
|
|
|
87
92
|
x86_64-linux-musl
|
|
88
93
|
|
|
89
94
|
DEPENDENCIES
|
|
90
|
-
minitest (~>
|
|
95
|
+
minitest (~> 6.0)
|
|
91
96
|
nokogiri (~> 1.18)
|
|
92
97
|
rake (~> 13.2)
|
|
93
|
-
rdoc (~>
|
|
98
|
+
rdoc (~> 7.2)
|
|
94
99
|
rubocop (~> 1.75)
|
|
95
|
-
rubocop-minitest (~> 0.
|
|
100
|
+
rubocop-minitest (~> 0.39.0)
|
|
96
101
|
rubocop-rake (~> 0.7.1)
|
|
97
|
-
rubyzip (~> 2
|
|
102
|
+
rubyzip (~> 3.2)
|
|
98
103
|
simplecov
|
|
99
104
|
webrick (~> 1.9)
|
|
100
105
|
yard (~> 0.9.37)
|
data/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://github.com/jaimerodas/epub_tools/actions) [](LICENSE) [](https://badge.fury.io/rb/epub_tools)
|
|
4
4
|
|
|
5
|
-
**TL;DR:** A Ruby gem and CLI for working with EPUB files: extract, split, initialize, add chapters, pack, and
|
|
5
|
+
**TL;DR:** A Ruby gem and CLI for working with EPUB files: extract, split, initialize, add chapters, pack, unpack, compile, and append to EPUB books.
|
|
6
6
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
@@ -36,6 +36,7 @@ Commands:
|
|
|
36
36
|
- `pack` Package an EPUB directory into a `.epub` file
|
|
37
37
|
- `unpack` Unpack a `.epub` file into a directory
|
|
38
38
|
- `compile` Takes EPUBs in a dir and splits, cleans, and compiles into a single EPUB
|
|
39
|
+
- `append` Extracts and splits EPUBs from a dir and appends them to an existing EPUB
|
|
39
40
|
|
|
40
41
|
Run `epub-tools COMMAND --help` for details on options.
|
|
41
42
|
|
|
@@ -61,46 +62,58 @@ epub-tools unpack -i MyBook.epub -o unpacked_dir
|
|
|
61
62
|
|
|
62
63
|
# Full compile workflow: extract, split, initialize, add, and pack into one EPUB
|
|
63
64
|
epub-tools compile -t "My Book" -a "Author Name" -s source_epubs -c cover.jpg -o MyBook.epub
|
|
64
|
-
```
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
# Append chapters from new EPUBs to an existing book
|
|
67
|
+
epub-tools append -s new_epubs -t MyBook.epub
|
|
68
|
+
```
|
|
67
69
|
|
|
68
70
|
## Library Usage
|
|
69
71
|
Use the library directly in Ruby:
|
|
70
72
|
```ruby
|
|
71
73
|
require 'epub_tools'
|
|
72
74
|
|
|
73
|
-
#
|
|
75
|
+
# Full compile workflow: extract, split, and compile into a new EPUB
|
|
76
|
+
EpubTools::CompileBook.new(
|
|
77
|
+
title: 'My Book', author: 'Author Name',
|
|
78
|
+
source_dir: 'source_epubs', cover_image: 'cover.jpg',
|
|
79
|
+
output_file: 'MyBook.epub'
|
|
80
|
+
).run
|
|
81
|
+
|
|
82
|
+
# Append chapters from new EPUBs to an existing book
|
|
83
|
+
EpubTools::AppendBook.new(
|
|
84
|
+
source_dir: 'new_epubs',
|
|
85
|
+
target_epub: 'MyBook.epub'
|
|
86
|
+
).run
|
|
87
|
+
|
|
88
|
+
# Individual steps can also be used standalone:
|
|
89
|
+
|
|
90
|
+
# Extract XHTML files from EPUBs
|
|
74
91
|
EpubTools::XHTMLExtractor.new(
|
|
75
|
-
source_dir: 'source_epubs',
|
|
76
|
-
|
|
77
|
-
verbose: true
|
|
78
|
-
).extract_all
|
|
92
|
+
source_dir: 'source_epubs', target_dir: 'xhtml_output'
|
|
93
|
+
).run
|
|
79
94
|
|
|
80
|
-
# Split
|
|
95
|
+
# Split a multi-chapter XHTML into individual chapter files
|
|
81
96
|
EpubTools::SplitChapters.new(
|
|
82
|
-
'xhtml_output/chapter1.xhtml',
|
|
83
|
-
'
|
|
84
|
-
'chapters',
|
|
85
|
-
'chapter'
|
|
97
|
+
input_file: 'xhtml_output/chapter1.xhtml', book_title: 'My Book',
|
|
98
|
+
output_dir: 'chapters', output_prefix: 'chapter'
|
|
86
99
|
).run
|
|
87
100
|
|
|
88
|
-
# Initialize EPUB
|
|
101
|
+
# Initialize a new EPUB directory structure
|
|
89
102
|
EpubTools::EpubInitializer.new(
|
|
90
|
-
'My Book',
|
|
91
|
-
'
|
|
92
|
-
'epub_dir',
|
|
93
|
-
'cover.jpg'
|
|
103
|
+
title: 'My Book', author: 'Author Name',
|
|
104
|
+
destination: 'epub_dir', cover_image: 'cover.jpg'
|
|
94
105
|
).run
|
|
95
106
|
|
|
96
|
-
# Add
|
|
97
|
-
EpubTools::AddChapters.new(
|
|
107
|
+
# Add chapter files into an EPUB
|
|
108
|
+
EpubTools::AddChapters.new(
|
|
109
|
+
chapters_dir: 'chapters', oebps_dir: 'epub_dir/OEBPS'
|
|
110
|
+
).run
|
|
98
111
|
|
|
99
|
-
#
|
|
100
|
-
EpubTools::PackEbook.new('epub_dir', 'MyBook.epub').run
|
|
112
|
+
# Package an EPUB directory into a .epub file
|
|
113
|
+
EpubTools::PackEbook.new(input_dir: 'epub_dir', output_file: 'MyBook.epub').run
|
|
101
114
|
|
|
102
|
-
# Unpack
|
|
103
|
-
EpubTools::UnpackEbook.new('MyBook.epub', 'unpacked_dir').run
|
|
115
|
+
# Unpack a .epub file into a directory
|
|
116
|
+
EpubTools::UnpackEbook.new(epub_file: 'MyBook.epub', output_dir: 'unpacked_dir').run
|
|
104
117
|
```
|
|
105
118
|
## Development & Testing
|
|
106
119
|
Clone the repo and install dependencies:
|
data/Rakefile
CHANGED
data/bin/epub-tools
CHANGED
data/epub_tools.gemspec
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require_relative 'lib/epub_tools/version'
|
|
2
4
|
|
|
3
5
|
Gem::Specification.new do |spec|
|
|
@@ -20,5 +22,5 @@ Gem::Specification.new do |spec|
|
|
|
20
22
|
|
|
21
23
|
spec.add_dependency 'nokogiri', '~> 1.18'
|
|
22
24
|
spec.add_dependency 'rake', '~> 13.2'
|
|
23
|
-
spec.add_dependency 'rubyzip', '~> 2
|
|
25
|
+
spec.add_dependency 'rubyzip', '~> 3.2'
|
|
24
26
|
end
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
2
4
|
require 'nokogiri'
|
|
3
5
|
require 'fileutils'
|
|
4
6
|
require_relative 'loggable'
|
|
@@ -7,6 +9,7 @@ module EpubTools
|
|
|
7
9
|
# Moves new chapters into an unpacked EPUB
|
|
8
10
|
class AddChapters
|
|
9
11
|
include Loggable
|
|
12
|
+
|
|
10
13
|
# Initializes the class
|
|
11
14
|
# @param options [Hash] Configuration options
|
|
12
15
|
# @option options [String] :chapters_dir Directory from which to move the xhtml chapters.
|
|
@@ -56,10 +59,8 @@ module EpubTools
|
|
|
56
59
|
end
|
|
57
60
|
|
|
58
61
|
def move_chapters
|
|
59
|
-
# Sort by chapter number (numeric)
|
|
60
62
|
chapter_files = Dir.glob(File.join(@chapters_dir, '*.xhtml')).sort_by do |path|
|
|
61
|
-
|
|
62
|
-
File.basename(path)[/\d+/].to_i
|
|
63
|
+
chapter_sort_key(File.basename(path))
|
|
63
64
|
end
|
|
64
65
|
|
|
65
66
|
raise ArgumentError, "No .xhtml files found in '#{@chapters_dir}'" if chapter_files.empty?
|
|
@@ -70,8 +71,17 @@ module EpubTools
|
|
|
70
71
|
chapter_files.map { |f| File.basename(f) }
|
|
71
72
|
end
|
|
72
73
|
|
|
74
|
+
def chapter_sort_key(filename)
|
|
75
|
+
basename = File.basename(filename, '.xhtml')
|
|
76
|
+
if (m = basename.match(/_(\d+)_5\z/))
|
|
77
|
+
m[1].to_f + 0.5
|
|
78
|
+
else
|
|
79
|
+
basename[/\d+/].to_f
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
73
83
|
def chapter_id(filename)
|
|
74
|
-
match = filename.match(/chapter_(\d+)\.xhtml/)
|
|
84
|
+
match = filename.match(/chapter_(\d+(?:_5)?)\.xhtml/)
|
|
75
85
|
match ? "chap#{match[1]}" : File.basename(filename, '.xhtml')
|
|
76
86
|
end
|
|
77
87
|
|
|
@@ -80,24 +90,7 @@ module EpubTools
|
|
|
80
90
|
manifest = doc.at_xpath('//xmlns:manifest')
|
|
81
91
|
spine = doc.at_xpath('//xmlns:spine')
|
|
82
92
|
|
|
83
|
-
filenames.each
|
|
84
|
-
id = chapter_id(filename)
|
|
85
|
-
# Add <item> to the manifest if missing
|
|
86
|
-
unless doc.at_xpath("//xmlns:item[@href='#{filename}']")
|
|
87
|
-
item = Nokogiri::XML::Node.new('item', doc)
|
|
88
|
-
item['id'] = id
|
|
89
|
-
item['href'] = filename
|
|
90
|
-
item['media-type'] = 'application/xhtml+xml'
|
|
91
|
-
manifest.add_child(item)
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
# Add <itemref> to the spine if missing
|
|
95
|
-
next if doc.at_xpath("//xmlns:itemref[@idref='#{id}']")
|
|
96
|
-
|
|
97
|
-
itemref = Nokogiri::XML::Node.new('itemref', doc)
|
|
98
|
-
itemref['idref'] = id
|
|
99
|
-
spine.add_child(itemref)
|
|
100
|
-
end
|
|
93
|
+
filenames.each { |filename| update_opf_for_file(doc, manifest, spine, filename) }
|
|
101
94
|
|
|
102
95
|
File.write(@opf_file, doc.to_xml(indent: 2))
|
|
103
96
|
end
|
|
@@ -106,19 +99,57 @@ module EpubTools
|
|
|
106
99
|
doc = Nokogiri::XML(File.read(@nav_file)) { |config| config.default_xml.noblanks }
|
|
107
100
|
nav = doc.at_xpath('//xmlns:nav[@epub:type="toc"]/xmlns:ol')
|
|
108
101
|
|
|
109
|
-
filenames.each
|
|
110
|
-
# Create a new <li><a href="...">Label</a></li> element
|
|
111
|
-
label = File.basename(filename, '.xhtml').gsub('_', ' ').capitalize
|
|
112
|
-
label = 'Prologue' if label == 'Chapter 0'
|
|
113
|
-
li = Nokogiri::XML::Node.new('li', doc)
|
|
114
|
-
a = Nokogiri::XML::Node.new('a', doc)
|
|
115
|
-
a['href'] = filename
|
|
116
|
-
a.content = label
|
|
117
|
-
li.add_child(a)
|
|
118
|
-
nav.add_child(li)
|
|
119
|
-
end
|
|
102
|
+
filenames.each { |filename| nav.add_child(create_nav_link(doc, filename)) }
|
|
120
103
|
|
|
121
104
|
File.write(@nav_file, doc.to_xml(indent: 2))
|
|
122
105
|
end
|
|
106
|
+
|
|
107
|
+
def create_nav_link(doc, filename)
|
|
108
|
+
li = Nokogiri::XML::Node.new('li', doc)
|
|
109
|
+
a = Nokogiri::XML::Node.new('a', doc)
|
|
110
|
+
a['href'] = filename
|
|
111
|
+
a.content = format_chapter_label(filename)
|
|
112
|
+
li.add_child(a)
|
|
113
|
+
li
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def format_chapter_label(filename)
|
|
117
|
+
basename = File.basename(filename, '.xhtml')
|
|
118
|
+
return 'Prologue' if basename == 'chapter_0'
|
|
119
|
+
|
|
120
|
+
if (m = basename.match(/chapter_(\d+)_5/))
|
|
121
|
+
"Chapter #{m[1]}.5"
|
|
122
|
+
else
|
|
123
|
+
basename.gsub('_', ' ').capitalize
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def update_opf_for_file(doc, manifest, spine, filename)
|
|
128
|
+
id = chapter_id(filename)
|
|
129
|
+
add_manifest_item(doc, manifest, filename, id) unless manifest_item_exists?(doc, filename)
|
|
130
|
+
add_spine_itemref(doc, spine, id) unless spine_itemref_exists?(doc, id)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def manifest_item_exists?(doc, filename)
|
|
134
|
+
doc.at_xpath("//xmlns:item[@href='#{filename}']")
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def spine_itemref_exists?(doc, id)
|
|
138
|
+
doc.at_xpath("//xmlns:itemref[@idref='#{id}']")
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def add_manifest_item(doc, manifest, filename, id)
|
|
142
|
+
item = Nokogiri::XML::Node.new('item', doc)
|
|
143
|
+
item['id'] = id
|
|
144
|
+
item['href'] = filename
|
|
145
|
+
item['media-type'] = 'application/xhtml+xml'
|
|
146
|
+
manifest.add_child(item)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def add_spine_itemref(doc, spine, id)
|
|
150
|
+
itemref = Nokogiri::XML::Node.new('itemref', doc)
|
|
151
|
+
itemref['idref'] = id
|
|
152
|
+
spine.add_child(itemref)
|
|
153
|
+
end
|
|
123
154
|
end
|
|
124
155
|
end
|