epub_tools 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.document +2 -0
  3. data/.github/workflows/ci.yml +9 -8
  4. data/.gitignore +4 -0
  5. data/.rubocop.yml +41 -0
  6. data/Gemfile +17 -8
  7. data/Gemfile.lock +51 -0
  8. data/LICENSE +21 -0
  9. data/README.md +21 -3
  10. data/bin/epub-tools +3 -109
  11. data/epub_tools.gemspec +6 -8
  12. data/lib/epub_tools/add_chapters.rb +124 -0
  13. data/lib/epub_tools/cli/command_registry.rb +47 -0
  14. data/lib/epub_tools/cli/option_builder.rb +164 -0
  15. data/lib/epub_tools/cli/runner.rb +164 -0
  16. data/lib/epub_tools/cli.rb +45 -0
  17. data/lib/epub_tools/compile_book.rb +77 -34
  18. data/lib/epub_tools/epub_initializer.rb +48 -26
  19. data/lib/epub_tools/loggable.rb +11 -0
  20. data/lib/epub_tools/pack_ebook.rb +20 -13
  21. data/lib/epub_tools/split_chapters.rb +40 -21
  22. data/lib/epub_tools/style_finder.rb +58 -0
  23. data/lib/epub_tools/unpack_ebook.rb +23 -16
  24. data/lib/epub_tools/version.rb +2 -1
  25. data/lib/epub_tools/xhtml_cleaner.rb +28 -8
  26. data/lib/epub_tools/xhtml_extractor.rb +23 -10
  27. data/lib/epub_tools.rb +4 -2
  28. data/test/{add_chapters_to_epub_test.rb → add_chapters_test.rb} +14 -7
  29. data/test/cli/command_registry_test.rb +66 -0
  30. data/test/cli/option_builder_test.rb +173 -0
  31. data/test/cli/runner_test.rb +91 -0
  32. data/test/cli_commands_test.rb +100 -0
  33. data/test/cli_test.rb +4 -0
  34. data/test/cli_version_test.rb +5 -3
  35. data/test/compile_book_test.rb +11 -2
  36. data/test/epub_initializer_test.rb +51 -31
  37. data/test/pack_ebook_test.rb +14 -8
  38. data/test/split_chapters_test.rb +22 -1
  39. data/test/{text_style_class_finder_test.rb → style_finder_test.rb} +7 -6
  40. data/test/test_helper.rb +4 -5
  41. data/test/unpack_ebook_test.rb +21 -5
  42. data/test/xhtml_cleaner_test.rb +13 -7
  43. data/test/xhtml_extractor_test.rb +17 -1
  44. metadata +24 -39
  45. data/lib/epub_tools/add_chapters_to_epub.rb +0 -87
  46. data/lib/epub_tools/cli_helper.rb +0 -31
  47. data/lib/epub_tools/text_style_class_finder.rb +0 -47
@@ -27,26 +27,47 @@ class SplitChaptersTest < Minitest::Test
27
27
  end
28
28
 
29
29
  def test_run_generates_chapter_files
30
- EpubTools::SplitChapters.new(@input, 'BookTitle', @out, 'chap').run
30
+ result = EpubTools::SplitChapters.new(input_file: @input, book_title: 'BookTitle', output_dir: @out,
31
+ output_prefix: 'chap').run
32
+
33
+ # Check return value is an array of chapter file paths
34
+ assert_instance_of Array, result
35
+ assert_equal 3, result.size
36
+
37
+ expected_paths = [
38
+ File.join(@out, 'chap_0.xhtml'),
39
+ File.join(@out, 'chap_1.xhtml'),
40
+ File.join(@out, 'chap_2.xhtml')
41
+ ]
42
+
43
+ expected_paths.each do |path|
44
+ assert_includes result, path
45
+ assert_path_exists path, "Expected file #{path} to exist"
46
+ end
47
+
31
48
  files = Dir.children(@out)
49
+
32
50
  assert_includes files, 'chap_0.xhtml'
33
51
  assert_includes files, 'chap_1.xhtml'
34
52
  assert_includes files, 'chap_2.xhtml'
35
53
 
36
54
  # Prologue
37
55
  prologue = File.read(File.join(@out, 'chap_0.xhtml'))
56
+
38
57
  assert_includes prologue, '<h1>Prologue</h1>'
39
58
  assert_includes prologue, 'Intro text'
40
59
  refute_includes prologue, 'Chapter 1'
41
60
 
42
61
  # Chapter 1
43
62
  ch1 = File.read(File.join(@out, 'chap_1.xhtml'))
63
+
44
64
  assert_includes ch1, '<h1>Chapter 1</h1>'
45
65
  assert_includes ch1, 'First paragraph'
46
66
  refute_includes ch1, 'Chapter 2'
47
67
 
48
68
  # Chapter 2
49
69
  ch2 = File.read(File.join(@out, 'chap_2.xhtml'))
70
+
50
71
  assert_includes ch2, '<h1>Chapter 2</h1>'
51
72
  assert_includes ch2, 'Second paragraph'
52
73
  end
@@ -1,8 +1,8 @@
1
1
  require 'yaml'
2
2
  require_relative 'test_helper'
3
- require_relative '../lib/epub_tools/text_style_class_finder'
3
+ require_relative '../lib/epub_tools/style_finder'
4
4
 
5
- class TextStyleClassFinderTest < Minitest::Test
5
+ class StyleFinder < Minitest::Test
6
6
  def setup
7
7
  @tmp = Dir.mktmpdir
8
8
  @xhtml = File.join(@tmp, 'doc.xhtml')
@@ -22,19 +22,20 @@ class TextStyleClassFinderTest < Minitest::Test
22
22
  end
23
23
 
24
24
  def test_finds_italic_and_bold_classes
25
- EpubTools::TextStyleClassFinder.new(@xhtml, @yaml).call
25
+ EpubTools::StyleFinder.new(file_path: @xhtml, output_path: @yaml).run
26
26
  data = YAML.load_file(@yaml)
27
+
27
28
  assert_equal ['c1'], data['italics']
28
29
  assert_equal ['c2'], data['bolds']
29
30
  end
30
31
 
31
32
  def test_verbose_mode
32
33
  text = <<~OUTPUT
33
- Classes with font-style: italic: c1
34
- Classes with font-weight: 700: c2
34
+ Classes with font-style: italic: c1
35
+ Classes with font-weight: 700: c2
35
36
  OUTPUT
36
37
  assert_output(text) do
37
- EpubTools::TextStyleClassFinder.new(@xhtml, @yaml, verbose: true).call
38
+ EpubTools::StyleFinder.new(file_path: @xhtml, output_path: @yaml, verbose: true).run
38
39
  end
39
40
  end
40
41
  end
data/test/test_helper.rb CHANGED
@@ -1,9 +1,8 @@
1
- if ENV['COVERAGE']
2
- require 'simplecov'
3
- SimpleCov.start do
4
- add_filter '/test/'
5
- end
1
+ require 'simplecov'
2
+ SimpleCov.start do
3
+ add_filter '/test/'
6
4
  end
5
+
7
6
  require 'minitest/autorun'
8
7
  require 'tmpdir'
9
8
  require 'fileutils'
@@ -24,6 +24,7 @@ class UnpackEbookTest < Minitest::Test
24
24
  Dir.glob(File.join(@build_dir, '**', '*'), File::FNM_DOTMATCH).sort.each do |src_path|
25
25
  rel_path = src_path.sub(%r{^#{Regexp.escape(@build_dir)}/?}, '')
26
26
  next if rel_path.empty? || rel_path == 'mimetype'
27
+
27
28
  if File.directory?(src_path)
28
29
  zip.mkdir(rel_path)
29
30
  else
@@ -40,19 +41,34 @@ class UnpackEbookTest < Minitest::Test
40
41
  end
41
42
 
42
43
  def test_run_extracts_all_entries
43
- EpubTools::UnpackEbook.new(@epub_file, @dest_dir).run
44
+ result = EpubTools::UnpackEbook.new(epub_file: @epub_file, output_dir: @dest_dir).run
45
+
46
+ # Check return value is the output directory path
47
+ assert_equal @dest_dir, result
48
+
44
49
  # Check extracted files
45
50
  assert Dir.exist?(@dest_dir)
46
51
  assert_equal 'application/epub+zip', File.read(File.join(@dest_dir, 'mimetype'))
47
- assert File.exist?(File.join(@dest_dir, 'META-INF', 'container.xml'))
48
- assert File.exist?(File.join(@dest_dir, 'OEBPS', 'title.xhtml'))
52
+ assert_path_exists File.join(@dest_dir, 'META-INF', 'container.xml')
53
+ assert_path_exists File.join(@dest_dir, 'OEBPS', 'title.xhtml')
54
+ end
55
+
56
+ def test_run_with_default_output_dir
57
+ # Create the test with default output directory
58
+ result = EpubTools::UnpackEbook.new(epub_file: @epub_file).run
59
+
60
+ expected_dir = File.join(File.dirname(@epub_file), File.basename(@epub_file, '.epub'))
61
+
62
+ assert_equal expected_dir, result
63
+ assert Dir.exist?(expected_dir)
64
+ assert_path_exists File.join(expected_dir, 'mimetype')
49
65
  end
50
66
 
51
67
  def test_missing_epub_raises_error
52
68
  missing = File.join(@tmp, 'nope.epub')
53
69
  error = assert_raises(ArgumentError) do
54
- EpubTools::UnpackEbook.new(missing, @dest_dir).run
70
+ EpubTools::UnpackEbook.new(epub_file: missing, output_dir: @dest_dir).run
55
71
  end
56
- assert_includes error.message, "does not exist"
72
+ assert_includes error.message, 'does not exist'
57
73
  end
58
74
  end
@@ -28,12 +28,18 @@ class XHTMLCleanerTest < Minitest::Test
28
28
  end
29
29
 
30
30
  def test_cleaner_removes_and_transforms_tags
31
- EpubTools::XHTMLCleaner.new(@file, @config).call
32
- result = File.read(@file)
33
- assert_includes result, '<i>ItalicsOnly</i>'
34
- assert_includes result, 'KeepThis'
35
- refute_includes result, '<span'
36
- refute_includes result, '<hr'
37
- refute_includes result, 'RemoveMe'
31
+ result = EpubTools::XHTMLCleaner.new(filename: @file, class_config: @config).run
32
+
33
+ # Check return value is the filename that was cleaned
34
+ assert_equal @file, result
35
+ assert_path_exists @file
36
+
37
+ content = File.read(@file)
38
+
39
+ assert_includes content, '<i>ItalicsOnly</i>'
40
+ assert_includes content, 'KeepThis'
41
+ refute_includes content, '<span'
42
+ refute_includes content, '<hr'
43
+ refute_includes content, 'RemoveMe'
38
44
  end
39
45
  end
@@ -22,8 +22,24 @@ class XHTMLExtractorTest < Minitest::Test
22
22
  end
23
23
 
24
24
  def test_extracts_xhtml_excluding_nav
25
- @extractor.extract_all
25
+ result = @extractor.run
26
+
27
+ # Check return value is an array of extracted file paths
28
+ assert_instance_of Array, result
29
+ assert_equal 2, result.size
30
+
31
+ expected_paths = [
32
+ File.join(@tgt, 'sample_chapter1.xhtml'),
33
+ File.join(@tgt, 'sample_ch2.xhtml')
34
+ ]
35
+
36
+ expected_paths.each do |path|
37
+ assert_includes result, path
38
+ assert_path_exists path
39
+ end
40
+
26
41
  files = Dir.children(@tgt)
42
+
27
43
  assert_includes files, 'sample_chapter1.xhtml'
28
44
  assert_includes files, 'sample_ch2.xhtml'
29
45
  refute_includes files, 'nav.xhtml'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: epub_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaime Rodas
@@ -23,20 +23,6 @@ dependencies:
23
23
  - - "~>"
24
24
  - !ruby/object:Gem::Version
25
25
  version: '1.18'
26
- - !ruby/object:Gem::Dependency
27
- name: rubyzip
28
- requirement: !ruby/object:Gem::Requirement
29
- requirements:
30
- - - "~>"
31
- - !ruby/object:Gem::Version
32
- version: '2.4'
33
- type: :runtime
34
- prerelease: false
35
- version_requirements: !ruby/object:Gem::Requirement
36
- requirements:
37
- - - "~>"
38
- - !ruby/object:Gem::Version
39
- version: '2.4'
40
26
  - !ruby/object:Gem::Dependency
41
27
  name: rake
42
28
  requirement: !ruby/object:Gem::Requirement
@@ -52,33 +38,19 @@ dependencies:
52
38
  - !ruby/object:Gem::Version
53
39
  version: '13.2'
54
40
  - !ruby/object:Gem::Dependency
55
- name: minitest
56
- requirement: !ruby/object:Gem::Requirement
57
- requirements:
58
- - - "~>"
59
- - !ruby/object:Gem::Version
60
- version: '5.25'
61
- type: :development
62
- prerelease: false
63
- version_requirements: !ruby/object:Gem::Requirement
64
- requirements:
65
- - - "~>"
66
- - !ruby/object:Gem::Version
67
- version: '5.25'
68
- - !ruby/object:Gem::Dependency
69
- name: simplecov
41
+ name: rubyzip
70
42
  requirement: !ruby/object:Gem::Requirement
71
43
  requirements:
72
44
  - - "~>"
73
45
  - !ruby/object:Gem::Version
74
- version: '0'
75
- type: :development
46
+ version: '2.4'
47
+ type: :runtime
76
48
  prerelease: false
77
49
  version_requirements: !ruby/object:Gem::Requirement
78
50
  requirements:
79
51
  - - "~>"
80
52
  - !ruby/object:Gem::Version
81
- version: '0'
53
+ version: '2.4'
82
54
  email:
83
55
  - rodas@hey.com
84
56
  executables:
@@ -86,37 +58,49 @@ executables:
86
58
  extensions: []
87
59
  extra_rdoc_files: []
88
60
  files:
61
+ - ".document"
89
62
  - ".github/workflows/ci.yml"
90
63
  - ".gitignore"
91
64
  - ".nova/Configuration.json"
65
+ - ".rubocop.yml"
92
66
  - ".ruby-version"
93
67
  - Gemfile
94
68
  - Gemfile.lock
69
+ - LICENSE
95
70
  - README.md
96
71
  - Rakefile
97
72
  - bin/epub-tools
98
73
  - epub_tools.gemspec
99
74
  - lib/epub_tools.rb
100
- - lib/epub_tools/add_chapters_to_epub.rb
101
- - lib/epub_tools/cli_helper.rb
75
+ - lib/epub_tools/add_chapters.rb
76
+ - lib/epub_tools/cli.rb
77
+ - lib/epub_tools/cli/command_registry.rb
78
+ - lib/epub_tools/cli/option_builder.rb
79
+ - lib/epub_tools/cli/runner.rb
102
80
  - lib/epub_tools/compile_book.rb
103
81
  - lib/epub_tools/epub_initializer.rb
82
+ - lib/epub_tools/loggable.rb
104
83
  - lib/epub_tools/pack_ebook.rb
105
84
  - lib/epub_tools/split_chapters.rb
106
- - lib/epub_tools/text_style_class_finder.rb
85
+ - lib/epub_tools/style_finder.rb
107
86
  - lib/epub_tools/unpack_ebook.rb
108
87
  - lib/epub_tools/version.rb
109
88
  - lib/epub_tools/xhtml_cleaner.rb
110
89
  - lib/epub_tools/xhtml_extractor.rb
111
90
  - style.css
112
- - test/add_chapters_to_epub_test.rb
91
+ - test/add_chapters_test.rb
92
+ - test/cli/command_registry_test.rb
93
+ - test/cli/option_builder_test.rb
94
+ - test/cli/runner_test.rb
95
+ - test/cli_commands_test.rb
96
+ - test/cli_test.rb
113
97
  - test/cli_version_test.rb
114
98
  - test/compile_book_test.rb
115
99
  - test/epub_initializer_test.rb
116
100
  - test/pack_ebook_test.rb
117
101
  - test/split_chapters_test.rb
102
+ - test/style_finder_test.rb
118
103
  - test/test_helper.rb
119
- - test/text_style_class_finder_test.rb
120
104
  - test/unpack_ebook_test.rb
121
105
  - test/xhtml_cleaner_test.rb
122
106
  - test/xhtml_extractor_test.rb
@@ -126,6 +110,7 @@ licenses:
126
110
  metadata:
127
111
  source_code_uri: https://github.com/jaimerodas/epub_tools/tree/main
128
112
  homepage_uri: https://github.com/jaimerodas/epub_tools
113
+ rubygems_mfa_required: 'true'
129
114
  rdoc_options: []
130
115
  require_paths:
131
116
  - lib
@@ -133,7 +118,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
133
118
  requirements:
134
119
  - - ">="
135
120
  - !ruby/object:Gem::Version
136
- version: '3.0'
121
+ version: '3.2'
137
122
  required_rubygems_version: !ruby/object:Gem::Requirement
138
123
  requirements:
139
124
  - - ">="
@@ -1,87 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'nokogiri'
3
- require 'fileutils'
4
-
5
- module EpubTools
6
- class AddChaptersToEpub
7
- def initialize(chapters_dir = './chapters', epub_dir = './epub/OEBPS', verbose = false)
8
- @chapters_dir = chapters_dir
9
- @epub_dir = epub_dir
10
- @opf_file = File.join(@epub_dir, 'package.opf')
11
- @nav_file = File.join(@epub_dir, 'nav.xhtml')
12
- @verbose = verbose
13
- end
14
-
15
- def run
16
- moved_files = move_chapters
17
- update_package_opf(moved_files)
18
- update_nav_xhtml(moved_files)
19
- @verbose ? moved_files.each {|f| puts "Moved: #{f}"} : moved_files
20
- end
21
-
22
- private
23
-
24
- def move_chapters
25
- # Sort by chapter number (numeric)
26
- chapter_files = Dir.glob(File.join(@chapters_dir, '*.xhtml')).sort_by do |path|
27
- # extract first integer from filename (e.g. chapter_10.xhtml -> 10)
28
- File.basename(path)[/\d+/].to_i
29
- end
30
- chapter_files.each do |file|
31
- FileUtils.mv(file, @epub_dir)
32
- end
33
- chapter_files.map { |f| File.basename(f) }
34
- end
35
-
36
- def chapter_id(filename)
37
- match = filename.match(/chapter_(\d+)\.xhtml/)
38
- match ? "chap#{match[1]}" : File.basename(filename, '.xhtml')
39
- end
40
-
41
- def update_package_opf(filenames)
42
- doc = Nokogiri::XML(File.read(@opf_file)) { |config| config.default_xml.noblanks }
43
- manifest = doc.at_xpath('//xmlns:manifest')
44
- spine = doc.at_xpath('//xmlns:spine')
45
-
46
- filenames.each do |filename|
47
- id = chapter_id(filename)
48
- # Add <item> to the manifest if missing
49
- unless doc.at_xpath("//xmlns:item[@href='#{filename}']")
50
- item = Nokogiri::XML::Node.new('item', doc)
51
- item['id'] = id
52
- item['href'] = filename
53
- item['media-type'] = 'application/xhtml+xml'
54
- manifest.add_child(item)
55
- end
56
-
57
- # Add <itemref> to the spine if missing
58
- unless doc.at_xpath("//xmlns:itemref[@idref='#{id}']")
59
- itemref = Nokogiri::XML::Node.new('itemref', doc)
60
- itemref['idref'] = id
61
- spine.add_child(itemref)
62
- end
63
- end
64
-
65
- File.write(@opf_file, doc.to_xml(indent: 2))
66
- end
67
-
68
- def update_nav_xhtml(filenames)
69
- doc = Nokogiri::XML(File.read(@nav_file)) { |config| config.default_xml.noblanks }
70
- nav = doc.at_xpath('//xmlns:nav[@epub:type="toc"]/xmlns:ol')
71
-
72
- filenames.each do |filename|
73
- # Create a new <li><a href="...">Label</a></li> element
74
- label = File.basename(filename, '.xhtml').gsub('_', ' ').capitalize
75
- label = "Prologue" if label == "Chapter 0"
76
- li = Nokogiri::XML::Node.new('li', doc)
77
- a = Nokogiri::XML::Node.new('a', doc)
78
- a['href'] = filename
79
- a.content = label
80
- li.add_child(a)
81
- nav.add_child(li)
82
- end
83
-
84
- File.write(@nav_file, doc.to_xml(indent: 2))
85
- end
86
- end
87
- end
@@ -1,31 +0,0 @@
1
- require 'optparse'
2
-
3
- module EpubTools
4
- # A simple helper to DRY CLI OptionParser usage across commands
5
- class CLIHelper
6
- # Parses ARGV into options hash, enforces required keys, and displays help/errors.
7
- # options: hash of defaults; required_keys: array of symbols required
8
- def self.parse(options = {}, required_keys = [], &block)
9
- parser = OptionParser.new do |opts|
10
- block.call(opts, options)
11
- opts.on('-h', '--help', 'Prints this help') { puts opts; exit }
12
- end
13
- begin
14
- parser.parse!
15
- unless required_keys.empty?
16
- missing = required_keys.select { |k| options[k].nil? }
17
- unless missing.empty?
18
- STDERR.puts "Missing required options: #{missing.map { |k| "--#{k.to_s.gsub('_','-')}" }.join(', ')}"
19
- STDERR.puts parser
20
- exit 1
21
- end
22
- end
23
- rescue OptionParser::InvalidOption, OptionParser::MissingArgument => e
24
- STDERR.puts e.message
25
- STDERR.puts parser
26
- exit 1
27
- end
28
- options
29
- end
30
- end
31
- end
@@ -1,47 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'nokogiri'
3
- require 'yaml'
4
-
5
- module EpubTools
6
- class TextStyleClassFinder
7
- def initialize(file_path, output_path = 'text_style_classes.yaml', verbose: false)
8
- @file_path = file_path
9
- @output_path = output_path
10
- @verbose = verbose
11
- raise ArgumentError, "File does not exist: #{@file_path}" unless File.exist?(@file_path)
12
- end
13
-
14
- def call
15
- doc = Nokogiri::HTML(File.read(@file_path))
16
- style_blocks = doc.xpath('//style').map(&:text).join("\n")
17
-
18
- italics = extract_classes(style_blocks, /font-style\s*:\s*italic/)
19
- bolds = extract_classes(style_blocks, /font-weight\s*:\s*700/)
20
-
21
- print_summary(italics, bolds) if @verbose
22
-
23
- data = {
24
- "italics" => italics,
25
- "bolds" => bolds
26
- }
27
- File.write(@output_path, data.to_yaml)
28
- end
29
-
30
- private
31
-
32
- def extract_classes(style_text, pattern)
33
- regex = /\.([\w-]+)\s*{[^}]*#{pattern.source}[^}]*}/i
34
- style_text.scan(regex).flatten.uniq
35
- end
36
-
37
- def print_summary(italics, bolds)
38
- unless italics.empty?
39
- puts "Classes with font-style: italic: #{italics.join(", ")}"
40
- end
41
-
42
- unless bolds.empty?
43
- puts "Classes with font-weight: 700: #{bolds.join(", ")}"
44
- end
45
- end
46
- end
47
- end