epub_tools 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.document +2 -0
- data/.github/workflows/ci.yml +9 -8
- data/.gitignore +2 -0
- data/Gemfile +6 -1
- data/Gemfile.lock +2 -0
- data/LICENSE +21 -0
- data/README.md +5 -3
- data/bin/epub-tools +1 -1
- data/epub_tools.gemspec +1 -1
- data/lib/epub_tools/{add_chapters_to_epub.rb → add_chapters.rb} +16 -1
- data/lib/epub_tools/compile_book.rb +18 -7
- data/lib/epub_tools/epub_initializer.rb +14 -2
- data/lib/epub_tools/pack_ebook.rb +4 -3
- data/lib/epub_tools/split_chapters.rb +15 -6
- data/lib/epub_tools/text_style_class_finder.rb +7 -0
- data/lib/epub_tools/unpack_ebook.rb +11 -9
- data/lib/epub_tools/version.rb +2 -1
- data/lib/epub_tools/xhtml_cleaner.rb +16 -0
- data/lib/epub_tools/xhtml_extractor.rb +6 -2
- data/lib/epub_tools.rb +2 -1
- data/test/{add_chapters_to_epub_test.rb → add_chapters_test.rb} +3 -3
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 815d8970acc779f70f7d86395a3658a296536675539dfcf3e7f9a0e0f2d4153a
|
4
|
+
data.tar.gz: da9131f5868b8e8c438e9f41c526d62a2d9f70a320e8d20fa14ffcf4a487f22c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7dc1015fc79f4c72edec8fb672622122a3540ef3b8923683777a178d397fff5af4e0b0765f237e748ca7a103b9e4c2c0c0c0902d180fcd9cb87e90c613c862f
|
7
|
+
data.tar.gz: fe60fa02c6a7db816b7aab2f5fe368de1a463fdaaa5aa1892146281362ea2cda49d904da541b67e34b2329225e01d9fcc7cd9a3a4c15503cb06cf3d6a10e26ec
|
data/.document
ADDED
data/.github/workflows/ci.yml
CHANGED
@@ -2,20 +2,21 @@ name: CI
|
|
2
2
|
|
3
3
|
on:
|
4
4
|
push:
|
5
|
-
branches: [
|
5
|
+
branches: [ main ]
|
6
6
|
pull_request:
|
7
|
-
branches: [
|
7
|
+
branches: [ main ]
|
8
8
|
|
9
9
|
jobs:
|
10
10
|
test:
|
11
11
|
runs-on: ubuntu-latest
|
12
|
+
strategy:
|
13
|
+
matrix:
|
14
|
+
ruby: ['3.2', '3.3', '3.4']
|
12
15
|
steps:
|
13
16
|
- uses: actions/checkout@v3
|
14
17
|
- uses: ruby/setup-ruby@v1
|
15
18
|
with:
|
16
|
-
ruby-version:
|
17
|
-
|
18
|
-
-
|
19
|
-
|
20
|
-
- name: Run tests
|
21
|
-
run: bundle exec rake test
|
19
|
+
ruby-version: ${{ matrix.ruby }}
|
20
|
+
- run: gem install bundler
|
21
|
+
- run: bundle install --jobs 4 --retry 3
|
22
|
+
- run: bundle exec rake test
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
ruby '3.
|
3
|
+
ruby '>= 3.2'
|
4
4
|
|
5
5
|
source "https://rubygems.org"
|
6
6
|
|
@@ -12,3 +12,8 @@ group :test do
|
|
12
12
|
gem "minitest", "~> 5.25"
|
13
13
|
gem "simplecov", require: false
|
14
14
|
end
|
15
|
+
|
16
|
+
group :doc do
|
17
|
+
gem "yard", "~> 0.9.37"
|
18
|
+
end
|
19
|
+
|
data/Gemfile.lock
CHANGED
@@ -28,6 +28,7 @@ GEM
|
|
28
28
|
simplecov_json_formatter (~> 0.1)
|
29
29
|
simplecov-html (0.13.1)
|
30
30
|
simplecov_json_formatter (0.1.4)
|
31
|
+
yard (0.9.37)
|
31
32
|
|
32
33
|
PLATFORMS
|
33
34
|
aarch64-linux-gnu
|
@@ -45,6 +46,7 @@ DEPENDENCIES
|
|
45
46
|
rake (~> 13.2)
|
46
47
|
rubyzip (~> 2.4)
|
47
48
|
simplecov
|
49
|
+
yard (~> 0.9.37)
|
48
50
|
|
49
51
|
RUBY VERSION
|
50
52
|
ruby 3.4.3p32
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Jaime Rodas
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
9
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
10
|
+
so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# EPUB Tools
|
2
2
|
|
3
|
-
|
3
|
+
[](https://github.com/jaimerodas/epub_tools/actions) [](LICENSE)
|
4
|
+
|
5
|
+
**TL;DR:** A Ruby gem and CLI for working with EPUB files: extract, split, initialize, add chapters, pack, and unpack EPUB books.
|
4
6
|
|
5
7
|
## Installation
|
6
8
|
Install the gem via RubyGems:
|
@@ -52,7 +54,7 @@ epub-tools pack -i epub_dir -o MyBook.epub
|
|
52
54
|
|
53
55
|
# Unpack EPUB
|
54
56
|
epub-tools unpack -i MyBook.epub -o unpacked_dir
|
55
|
-
|
57
|
+
|
56
58
|
# Full compile workflow: extract, split, initialize, add, and pack into one EPUB
|
57
59
|
epub-tools compile -t "My Book" -a "Author Name" -s source_epubs -c cover.jpg -o MyBook.epub
|
58
60
|
```
|
@@ -88,7 +90,7 @@ EpubTools::EpubInitializer.new(
|
|
88
90
|
).run
|
89
91
|
|
90
92
|
# Add chapters
|
91
|
-
EpubTools::
|
93
|
+
EpubTools::AddChapters.new('chapters', 'epub_dir/OEBPS').run
|
92
94
|
|
93
95
|
# Pack EPUB
|
94
96
|
EpubTools::PackEbook.new('epub_dir', 'MyBook.epub').run
|
data/bin/epub-tools
CHANGED
@@ -36,7 +36,7 @@ EpubTools::CLIHelper.parse(options, [:chapters_dir, :epub_oebps_dir]) do |opts,
|
|
36
36
|
opts.on('-e DIR', '--epub-oebps-dir DIR', 'EPUB OEBPS directory (required)') { |v| o[:epub_oebps_dir] = v }
|
37
37
|
end
|
38
38
|
|
39
|
-
EpubTools::
|
39
|
+
EpubTools::AddChapters.new(options[:chapters_dir], options[:epub_oebps_dir]).run
|
40
40
|
|
41
41
|
when 'extract'
|
42
42
|
options = { verbose: true }
|
data/epub_tools.gemspec
CHANGED
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
|
|
11
11
|
spec.files = `git ls-files`.split("\n")
|
12
12
|
spec.require_paths = ['lib']
|
13
13
|
spec.executables = ['epub-tools']
|
14
|
-
spec.required_ruby_version = ">= 3.
|
14
|
+
spec.required_ruby_version = ">= 3.2"
|
15
15
|
spec.metadata = {
|
16
16
|
"source_code_uri" => "https://github.com/jaimerodas/epub_tools/tree/main",
|
17
17
|
"homepage_uri" => "https://github.com/jaimerodas/epub_tools"
|
@@ -3,7 +3,16 @@ require 'nokogiri'
|
|
3
3
|
require 'fileutils'
|
4
4
|
|
5
5
|
module EpubTools
|
6
|
-
|
6
|
+
# Moves new chapters into an unpacked EPUB
|
7
|
+
class AddChapters
|
8
|
+
# :args: chapters_dir, epub_dir, verbose:
|
9
|
+
# [chapters_dir] Directory from which to move the xhtml chapters. It assumes the
|
10
|
+
# directory will contain one or more files named +chapter_XX.xhtml+,
|
11
|
+
# where +XX+ is a number. Defaults to +./chapters+.
|
12
|
+
# [epub_dir] Unpacked EPUB directory to move the chapters to. It should be the same
|
13
|
+
# directory that contains the +package.opf+ and +nav.xhtml+ files. Defaults
|
14
|
+
# to +./epub/OEBPS+.
|
15
|
+
# [verbose:] Whether to log progress to +STDOUT+ or not. Defaults to +false+.
|
7
16
|
def initialize(chapters_dir = './chapters', epub_dir = './epub/OEBPS', verbose = false)
|
8
17
|
@chapters_dir = chapters_dir
|
9
18
|
@epub_dir = epub_dir
|
@@ -12,6 +21,12 @@ module EpubTools
|
|
12
21
|
@verbose = verbose
|
13
22
|
end
|
14
23
|
|
24
|
+
# It works like this:
|
25
|
+
# - First, the *.xhtml files are moved from +chapters_dir+ over to +epub_dir+
|
26
|
+
# - Then, new entries will be added to the manifest and spine of the EPUB's +package.opf+ file.
|
27
|
+
# It will sort the files by extracting the chapter number.
|
28
|
+
# - Finally, it will update the +nav.xhtml+ file with the new chapters. Note that if there's a
|
29
|
+
# file named +chapter_0.xhtml+, it will be added to the +nav.xhtml+ as the Prologue.
|
15
30
|
def run
|
16
31
|
moved_files = move_chapters
|
17
32
|
update_package_opf(moved_files)
|
@@ -3,17 +3,28 @@ require 'fileutils'
|
|
3
3
|
require_relative 'xhtml_extractor'
|
4
4
|
require_relative 'split_chapters'
|
5
5
|
require_relative 'epub_initializer'
|
6
|
-
require_relative '
|
6
|
+
require_relative 'add_chapters'
|
7
7
|
require_relative 'pack_ebook'
|
8
8
|
|
9
9
|
module EpubTools
|
10
10
|
# Orchestrates extraction, splitting, validation, and packaging of book EPUBs
|
11
11
|
class CompileBook
|
12
|
-
|
13
|
-
|
14
|
-
#
|
15
|
-
|
16
|
-
#
|
12
|
+
# Book title
|
13
|
+
attr_reader :title
|
14
|
+
# Book author
|
15
|
+
attr_reader :author
|
16
|
+
# Path of the input epubs
|
17
|
+
attr_reader :source_dir
|
18
|
+
# Optional path to the cover image
|
19
|
+
attr_reader :cover_image
|
20
|
+
# Filename for the final epub
|
21
|
+
attr_reader :output_file
|
22
|
+
# Optional working directory for intermediate files
|
23
|
+
attr_reader :build_dir
|
24
|
+
# Whether to print progress to STDOUT
|
25
|
+
attr_reader :verbose
|
26
|
+
|
27
|
+
# Initializes the class
|
17
28
|
def initialize(title:, author:, source_dir:, cover_image: nil, output_file: nil, build_dir: nil, verbose: false)
|
18
29
|
@title = title
|
19
30
|
@author = author
|
@@ -113,7 +124,7 @@ module EpubTools
|
|
113
124
|
|
114
125
|
def add_chapters
|
115
126
|
log "Adding chapters to EPUB..."
|
116
|
-
|
127
|
+
AddChapters.new(chapters_dir, File.join(epub_dir, 'OEBPS'), verbose).run
|
117
128
|
end
|
118
129
|
|
119
130
|
def pack_epub
|
@@ -4,9 +4,19 @@ require 'time'
|
|
4
4
|
require 'securerandom'
|
5
5
|
|
6
6
|
module EpubTools
|
7
|
+
# Sets up a basic empty EPUB directory structure with the basic files created:
|
8
|
+
# - +mimetype+
|
9
|
+
# - +container.xml+
|
10
|
+
# - +title.xhtml+ as a title page
|
11
|
+
# - +package.opf+
|
12
|
+
# - +nav.xhtml+ as a table of contents
|
13
|
+
# - +style.css+ a basic style inherited from the repo
|
14
|
+
# - cover image (optionally)
|
7
15
|
class EpubInitializer
|
8
|
-
# title
|
9
|
-
#
|
16
|
+
# [title] Book title
|
17
|
+
# [author] Book Author
|
18
|
+
# [destination] Target directory
|
19
|
+
# [cover_image] Optional image path to use as a cover for the book
|
10
20
|
def initialize(title, author, destination, cover_image = nil)
|
11
21
|
@title = title
|
12
22
|
@author = author
|
@@ -18,6 +28,7 @@ module EpubTools
|
|
18
28
|
@cover_image_media_type = nil
|
19
29
|
end
|
20
30
|
|
31
|
+
# Creates the empty ebook and returns the directory
|
21
32
|
def run
|
22
33
|
create_structure
|
23
34
|
write_mimetype
|
@@ -27,6 +38,7 @@ module EpubTools
|
|
27
38
|
write_package_opf
|
28
39
|
write_nav
|
29
40
|
write_style
|
41
|
+
@destination
|
30
42
|
end
|
31
43
|
|
32
44
|
private
|
@@ -5,8 +5,8 @@ require 'pathname'
|
|
5
5
|
module EpubTools
|
6
6
|
# Packages an EPUB directory into a .epub file
|
7
7
|
class PackEbook
|
8
|
-
# input_dir
|
9
|
-
# output_file
|
8
|
+
# [input_dir] Path to the EPUB directory (containing mimetype, META-INF, OEBPS)
|
9
|
+
# [output_file] Path to resulting .epub file; if +nil+, defaults to <tt><input_dir>.epub</tt>
|
10
10
|
def initialize(input_dir, output_file = nil, verbose: false)
|
11
11
|
@input_dir = File.expand_path(input_dir)
|
12
12
|
default_name = "#{File.basename(@input_dir)}.epub"
|
@@ -18,7 +18,7 @@ module EpubTools
|
|
18
18
|
@verbose = verbose
|
19
19
|
end
|
20
20
|
|
21
|
-
#
|
21
|
+
# Runs the packaging process and returns the resulting file path
|
22
22
|
def run
|
23
23
|
validate_input!
|
24
24
|
Dir.chdir(@input_dir) do
|
@@ -38,6 +38,7 @@ module EpubTools
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
puts "EPUB created: #{@output_file}" if @verbose
|
41
|
+
@output_file
|
41
42
|
end
|
42
43
|
|
43
44
|
private
|
@@ -5,11 +5,19 @@ require_relative 'text_style_class_finder'
|
|
5
5
|
require_relative 'xhtml_cleaner'
|
6
6
|
|
7
7
|
module EpubTools
|
8
|
+
# Takes a Google Docs generated, already extracted from their EPUB, XHTML files with multiple
|
9
|
+
# chapters and it:
|
10
|
+
# - Extracts classes using {TextStyleClassFinder}[rdoc-ref:EpubTools::TextStyleClassFinder]
|
11
|
+
# - Looks for tags that say something like Chapter XX or Prologue and splits the text there
|
12
|
+
# - Creates new chapter_XX.xhtml files that are cleaned using
|
13
|
+
# {XHTMLCleaner}[rdoc-ref:EpubTools::XHTMLCleaner]
|
14
|
+
# - Saves those files to +output_dir+
|
8
15
|
class SplitChapters
|
9
|
-
# input_file
|
10
|
-
# book_title
|
11
|
-
# output_dir
|
12
|
-
# output_prefix
|
16
|
+
# [input_file] path to the source XHTML
|
17
|
+
# [book_title] title to use in HTML <title> tags
|
18
|
+
# [output_dir] where to write chapter files
|
19
|
+
# [output_prefix] filename prefix. Defaults to 'chapter' and you should never need to change it
|
20
|
+
# [verbose] whether to print progress to STDOUT.
|
13
21
|
def initialize(input_file, book_title, output_dir = './chapters', output_prefix = 'chapter', verbose = false)
|
14
22
|
@input_file = input_file
|
15
23
|
@book_title = book_title
|
@@ -18,12 +26,13 @@ module EpubTools
|
|
18
26
|
@verbose = verbose
|
19
27
|
end
|
20
28
|
|
29
|
+
# Runs the splitter
|
21
30
|
def run
|
22
31
|
# Prepare output dir
|
23
32
|
Dir.mkdir(@output_dir) unless Dir.exist?(@output_dir)
|
24
33
|
|
25
34
|
# Read the doc
|
26
|
-
raw_content =
|
35
|
+
raw_content = read_and_strip_problematic_tags
|
27
36
|
doc = Nokogiri::HTML(raw_content)
|
28
37
|
|
29
38
|
# Find Style Classes
|
@@ -35,7 +44,7 @@ module EpubTools
|
|
35
44
|
|
36
45
|
private
|
37
46
|
|
38
|
-
def
|
47
|
+
def read_and_strip_problematic_tags
|
39
48
|
File.read(@input_file).gsub(/<hr\b[^>]*\/?>/i, '').gsub(/<br\b[^>]*\/?>/i, '')
|
40
49
|
end
|
41
50
|
|
@@ -3,7 +3,13 @@ require 'nokogiri'
|
|
3
3
|
require 'yaml'
|
4
4
|
|
5
5
|
module EpubTools
|
6
|
+
# Finds css classes for bold and italic texts in Google Docs-generated EPUBs. Used by
|
7
|
+
# {XHTMLCleaner}[rdoc-ref:EpubTools::XHTMLCleaner] and
|
8
|
+
# {SplitChapters}[rdoc-ref:EpubTools::SplitChapters].
|
6
9
|
class TextStyleClassFinder
|
10
|
+
# [file_path] XHTML file to be analyzed.
|
11
|
+
# [output_path] Defaults to +text_style_classes.yaml+. You should never need to change this.
|
12
|
+
# [verbose] Whether to print progress or not
|
7
13
|
def initialize(file_path, output_path = 'text_style_classes.yaml', verbose: false)
|
8
14
|
@file_path = file_path
|
9
15
|
@output_path = output_path
|
@@ -11,6 +17,7 @@ module EpubTools
|
|
11
17
|
raise ArgumentError, "File does not exist: #{@file_path}" unless File.exist?(@file_path)
|
12
18
|
end
|
13
19
|
|
20
|
+
# Runs the finder
|
14
21
|
def call
|
15
22
|
doc = Nokogiri::HTML(File.read(@file_path))
|
16
23
|
style_blocks = doc.xpath('//style').map(&:text).join("\n")
|
@@ -4,20 +4,17 @@ require 'fileutils'
|
|
4
4
|
module EpubTools
|
5
5
|
# Unpacks an EPUB (.epub file) into a directory
|
6
6
|
class UnpackEbook
|
7
|
-
# epub_file
|
8
|
-
# output_dir
|
7
|
+
# [epub_file] path to the .epub file
|
8
|
+
# [output_dir] Directory to extract into; defaults to basename of epub_file without .epub
|
9
|
+
# [verbose] Whether to log things to $stdout while the class runs or not
|
9
10
|
def initialize(epub_file, output_dir = nil, verbose: false)
|
10
11
|
@epub_file = File.expand_path(epub_file)
|
11
|
-
|
12
|
-
@output_dir = if output_dir.nil? || output_dir.empty?
|
13
|
-
default_dir
|
14
|
-
else
|
15
|
-
output_dir
|
16
|
-
end
|
12
|
+
@output_dir = (output_dir.nil? || output_dir.empty?) ? default_dir: output_dir
|
17
13
|
@verbose = verbose
|
18
14
|
end
|
19
15
|
|
20
|
-
# Extracts all entries from the EPUB into the output directory
|
16
|
+
# Extracts all entries from the EPUB into the output directory. Returns the output
|
17
|
+
# directory.
|
21
18
|
def run
|
22
19
|
validate!
|
23
20
|
FileUtils.mkdir_p(@output_dir)
|
@@ -33,10 +30,15 @@ module EpubTools
|
|
33
30
|
end
|
34
31
|
end
|
35
32
|
puts "Unpacked #{File.basename(@epub_file)} to #{@output_dir}" if @verbose
|
33
|
+
@output_dir
|
36
34
|
end
|
37
35
|
|
38
36
|
private
|
39
37
|
|
38
|
+
def default_dir
|
39
|
+
[File.dirname(@epub_file), File.basename(@epub_file, '.epub')].join("/")
|
40
|
+
end
|
41
|
+
|
40
42
|
def validate!
|
41
43
|
unless File.file?(@epub_file)
|
42
44
|
raise ArgumentError, "EPUB file '#{@epub_file}' does not exist"
|
data/lib/epub_tools/version.rb
CHANGED
@@ -4,12 +4,28 @@ require 'nokogiri'
|
|
4
4
|
require 'yaml'
|
5
5
|
|
6
6
|
module EpubTools
|
7
|
+
# Cleans Google Docs XHTMLs
|
8
|
+
|
9
|
+
# Google Docs makes a mess out of EPUBs and creates html without proper tag names and just uses
|
10
|
+
# classes for _everything_. This class does the following to clean invalid xhtml:
|
11
|
+
#
|
12
|
+
# - Removes any <tt><br /></tt> or <tt><hr /></tt> tags.
|
13
|
+
# - Removes empty <tt><p></tt> tags.
|
14
|
+
# - Using the +class_config+, it removes <tt><span></tt> tags that are used for bold or italics and
|
15
|
+
# replaces them with <tt><b></tt> or <tt><i></tt> tags.
|
16
|
+
# - Unwraps any <tt><span></tt> tags that have no classes assigned.
|
17
|
+
# - Outputs everything to a cleanly formatted +.xhtml+
|
7
18
|
class XHTMLCleaner
|
19
|
+
# [filename] The path to the xhtml to clean
|
20
|
+
# [class_config] A YAML containing the bold and italic classes to check. It defaults to
|
21
|
+
# +text_style_classes.yaml+ since that's the one that
|
22
|
+
# {TextStyleClassFinder}[rdoc-ref:EpubTools::TextStyleClassFinder] uses.
|
8
23
|
def initialize(filename, class_config = 'text_style_classes.yaml')
|
9
24
|
@filename = filename
|
10
25
|
@classes = YAML.load_file(class_config).transform_keys(&:to_sym)
|
11
26
|
end
|
12
27
|
|
28
|
+
# Calls the service class
|
13
29
|
def call
|
14
30
|
raw_content = read_and_strip_problematic_hr
|
15
31
|
doc = parse_xml(raw_content)
|
@@ -2,8 +2,11 @@ require 'zip'
|
|
2
2
|
require 'fileutils'
|
3
3
|
|
4
4
|
module EpubTools
|
5
|
-
# Extracts .xhtml files from EPUB archives, excluding nav.xhtml
|
5
|
+
# Extracts text .xhtml files from EPUB archives, excluding nav.xhtml
|
6
6
|
class XHTMLExtractor
|
7
|
+
# [source_dir] Directory that has source .epub files
|
8
|
+
# [target_dir] Directory where the extracted .xhtml files will be copied to
|
9
|
+
# [verbose] Whether to print progress to +STDOUT+ or not
|
7
10
|
def initialize(source_dir:, target_dir:, verbose: false)
|
8
11
|
@source_dir = File.expand_path(source_dir)
|
9
12
|
@target_dir = File.expand_path(target_dir)
|
@@ -11,6 +14,7 @@ module EpubTools
|
|
11
14
|
FileUtils.mkdir_p(@target_dir)
|
12
15
|
end
|
13
16
|
|
17
|
+
# Runs the extraction process
|
14
18
|
def extract_all
|
15
19
|
epub_files.each do |epub_path|
|
16
20
|
extract_xhtmls_from(epub_path)
|
@@ -43,4 +47,4 @@ module EpubTools
|
|
43
47
|
warn "⚠️ Failed to process #{epub_path}: #{e.message}"
|
44
48
|
end
|
45
49
|
end
|
46
|
-
end
|
50
|
+
end
|
data/lib/epub_tools.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require_relative 'epub_tools/version'
|
2
|
-
require_relative 'epub_tools/
|
2
|
+
require_relative 'epub_tools/add_chapters'
|
3
3
|
require_relative 'epub_tools/cli_helper'
|
4
4
|
require_relative 'epub_tools/epub_initializer'
|
5
5
|
require_relative 'epub_tools/split_chapters'
|
@@ -9,5 +9,6 @@ require_relative 'epub_tools/pack_ebook'
|
|
9
9
|
require_relative 'epub_tools/unpack_ebook'
|
10
10
|
require_relative 'epub_tools/compile_book'
|
11
11
|
|
12
|
+
# Wrapper for all the other classes
|
12
13
|
module EpubTools
|
13
14
|
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require_relative 'test_helper'
|
2
|
-
require_relative '../lib/epub_tools/
|
2
|
+
require_relative '../lib/epub_tools/add_chapters'
|
3
3
|
require 'nokogiri'
|
4
4
|
|
5
|
-
class
|
5
|
+
class AddChaptersTest < Minitest::Test
|
6
6
|
def setup
|
7
7
|
@tmp = Dir.mktmpdir
|
8
8
|
# Directories for chapters and EPUB OEBPS
|
@@ -54,7 +54,7 @@ class AddChaptersToEpubTest < Minitest::Test
|
|
54
54
|
|
55
55
|
def test_run_moves_files_and_updates_opf_and_nav
|
56
56
|
# Run the add chapters task
|
57
|
-
EpubTools::
|
57
|
+
EpubTools::AddChapters.new(@chapters_dir, @epub_dir).run
|
58
58
|
|
59
59
|
# Original chapter files should be moved
|
60
60
|
assert_empty Dir.glob(File.join(@chapters_dir, '*.xhtml'))
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: epub_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Rodas
|
@@ -86,18 +86,20 @@ executables:
|
|
86
86
|
extensions: []
|
87
87
|
extra_rdoc_files: []
|
88
88
|
files:
|
89
|
+
- ".document"
|
89
90
|
- ".github/workflows/ci.yml"
|
90
91
|
- ".gitignore"
|
91
92
|
- ".nova/Configuration.json"
|
92
93
|
- ".ruby-version"
|
93
94
|
- Gemfile
|
94
95
|
- Gemfile.lock
|
96
|
+
- LICENSE
|
95
97
|
- README.md
|
96
98
|
- Rakefile
|
97
99
|
- bin/epub-tools
|
98
100
|
- epub_tools.gemspec
|
99
101
|
- lib/epub_tools.rb
|
100
|
-
- lib/epub_tools/
|
102
|
+
- lib/epub_tools/add_chapters.rb
|
101
103
|
- lib/epub_tools/cli_helper.rb
|
102
104
|
- lib/epub_tools/compile_book.rb
|
103
105
|
- lib/epub_tools/epub_initializer.rb
|
@@ -109,7 +111,7 @@ files:
|
|
109
111
|
- lib/epub_tools/xhtml_cleaner.rb
|
110
112
|
- lib/epub_tools/xhtml_extractor.rb
|
111
113
|
- style.css
|
112
|
-
- test/
|
114
|
+
- test/add_chapters_test.rb
|
113
115
|
- test/cli_version_test.rb
|
114
116
|
- test/compile_book_test.rb
|
115
117
|
- test/epub_initializer_test.rb
|
@@ -133,7 +135,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
133
135
|
requirements:
|
134
136
|
- - ">="
|
135
137
|
- !ruby/object:Gem::Version
|
136
|
-
version: '3.
|
138
|
+
version: '3.2'
|
137
139
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
138
140
|
requirements:
|
139
141
|
- - ">="
|