epub_tools 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.document +2 -0
- data/.github/workflows/ci.yml +9 -8
- data/.gitignore +4 -0
- data/.rubocop.yml +41 -0
- data/Gemfile +17 -8
- data/Gemfile.lock +51 -0
- data/LICENSE +21 -0
- data/README.md +21 -3
- data/bin/epub-tools +3 -109
- data/epub_tools.gemspec +6 -8
- data/lib/epub_tools/add_chapters.rb +124 -0
- data/lib/epub_tools/cli/command_registry.rb +47 -0
- data/lib/epub_tools/cli/option_builder.rb +164 -0
- data/lib/epub_tools/cli/runner.rb +164 -0
- data/lib/epub_tools/cli.rb +45 -0
- data/lib/epub_tools/compile_book.rb +77 -34
- data/lib/epub_tools/epub_initializer.rb +48 -26
- data/lib/epub_tools/loggable.rb +11 -0
- data/lib/epub_tools/pack_ebook.rb +20 -13
- data/lib/epub_tools/split_chapters.rb +40 -21
- data/lib/epub_tools/style_finder.rb +58 -0
- data/lib/epub_tools/unpack_ebook.rb +23 -16
- data/lib/epub_tools/version.rb +2 -1
- data/lib/epub_tools/xhtml_cleaner.rb +28 -8
- data/lib/epub_tools/xhtml_extractor.rb +23 -10
- data/lib/epub_tools.rb +4 -2
- data/test/{add_chapters_to_epub_test.rb → add_chapters_test.rb} +14 -7
- data/test/cli/command_registry_test.rb +66 -0
- data/test/cli/option_builder_test.rb +173 -0
- data/test/cli/runner_test.rb +91 -0
- data/test/cli_commands_test.rb +100 -0
- data/test/cli_test.rb +4 -0
- data/test/cli_version_test.rb +5 -3
- data/test/compile_book_test.rb +11 -2
- data/test/epub_initializer_test.rb +51 -31
- data/test/pack_ebook_test.rb +14 -8
- data/test/split_chapters_test.rb +22 -1
- data/test/{text_style_class_finder_test.rb → style_finder_test.rb} +7 -6
- data/test/test_helper.rb +4 -5
- data/test/unpack_ebook_test.rb +21 -5
- data/test/xhtml_cleaner_test.rb +13 -7
- data/test/xhtml_extractor_test.rb +17 -1
- metadata +24 -39
- data/lib/epub_tools/add_chapters_to_epub.rb +0 -87
- data/lib/epub_tools/cli_helper.rb +0 -31
- data/lib/epub_tools/text_style_class_finder.rb +0 -47
data/test/split_chapters_test.rb
CHANGED
@@ -27,26 +27,47 @@ class SplitChaptersTest < Minitest::Test
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def test_run_generates_chapter_files
|
30
|
-
EpubTools::SplitChapters.new(@input, 'BookTitle', @out,
|
30
|
+
result = EpubTools::SplitChapters.new(input_file: @input, book_title: 'BookTitle', output_dir: @out,
|
31
|
+
output_prefix: 'chap').run
|
32
|
+
|
33
|
+
# Check return value is an array of chapter file paths
|
34
|
+
assert_instance_of Array, result
|
35
|
+
assert_equal 3, result.size
|
36
|
+
|
37
|
+
expected_paths = [
|
38
|
+
File.join(@out, 'chap_0.xhtml'),
|
39
|
+
File.join(@out, 'chap_1.xhtml'),
|
40
|
+
File.join(@out, 'chap_2.xhtml')
|
41
|
+
]
|
42
|
+
|
43
|
+
expected_paths.each do |path|
|
44
|
+
assert_includes result, path
|
45
|
+
assert_path_exists path, "Expected file #{path} to exist"
|
46
|
+
end
|
47
|
+
|
31
48
|
files = Dir.children(@out)
|
49
|
+
|
32
50
|
assert_includes files, 'chap_0.xhtml'
|
33
51
|
assert_includes files, 'chap_1.xhtml'
|
34
52
|
assert_includes files, 'chap_2.xhtml'
|
35
53
|
|
36
54
|
# Prologue
|
37
55
|
prologue = File.read(File.join(@out, 'chap_0.xhtml'))
|
56
|
+
|
38
57
|
assert_includes prologue, '<h1>Prologue</h1>'
|
39
58
|
assert_includes prologue, 'Intro text'
|
40
59
|
refute_includes prologue, 'Chapter 1'
|
41
60
|
|
42
61
|
# Chapter 1
|
43
62
|
ch1 = File.read(File.join(@out, 'chap_1.xhtml'))
|
63
|
+
|
44
64
|
assert_includes ch1, '<h1>Chapter 1</h1>'
|
45
65
|
assert_includes ch1, 'First paragraph'
|
46
66
|
refute_includes ch1, 'Chapter 2'
|
47
67
|
|
48
68
|
# Chapter 2
|
49
69
|
ch2 = File.read(File.join(@out, 'chap_2.xhtml'))
|
70
|
+
|
50
71
|
assert_includes ch2, '<h1>Chapter 2</h1>'
|
51
72
|
assert_includes ch2, 'Second paragraph'
|
52
73
|
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
require_relative 'test_helper'
|
3
|
-
require_relative '../lib/epub_tools/
|
3
|
+
require_relative '../lib/epub_tools/style_finder'
|
4
4
|
|
5
|
-
class
|
5
|
+
class StyleFinder < Minitest::Test
|
6
6
|
def setup
|
7
7
|
@tmp = Dir.mktmpdir
|
8
8
|
@xhtml = File.join(@tmp, 'doc.xhtml')
|
@@ -22,19 +22,20 @@ class TextStyleClassFinderTest < Minitest::Test
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def test_finds_italic_and_bold_classes
|
25
|
-
EpubTools::
|
25
|
+
EpubTools::StyleFinder.new(file_path: @xhtml, output_path: @yaml).run
|
26
26
|
data = YAML.load_file(@yaml)
|
27
|
+
|
27
28
|
assert_equal ['c1'], data['italics']
|
28
29
|
assert_equal ['c2'], data['bolds']
|
29
30
|
end
|
30
31
|
|
31
32
|
def test_verbose_mode
|
32
33
|
text = <<~OUTPUT
|
33
|
-
|
34
|
-
|
34
|
+
Classes with font-style: italic: c1
|
35
|
+
Classes with font-weight: 700: c2
|
35
36
|
OUTPUT
|
36
37
|
assert_output(text) do
|
37
|
-
EpubTools::
|
38
|
+
EpubTools::StyleFinder.new(file_path: @xhtml, output_path: @yaml, verbose: true).run
|
38
39
|
end
|
39
40
|
end
|
40
41
|
end
|
data/test/test_helper.rb
CHANGED
data/test/unpack_ebook_test.rb
CHANGED
@@ -24,6 +24,7 @@ class UnpackEbookTest < Minitest::Test
|
|
24
24
|
Dir.glob(File.join(@build_dir, '**', '*'), File::FNM_DOTMATCH).sort.each do |src_path|
|
25
25
|
rel_path = src_path.sub(%r{^#{Regexp.escape(@build_dir)}/?}, '')
|
26
26
|
next if rel_path.empty? || rel_path == 'mimetype'
|
27
|
+
|
27
28
|
if File.directory?(src_path)
|
28
29
|
zip.mkdir(rel_path)
|
29
30
|
else
|
@@ -40,19 +41,34 @@ class UnpackEbookTest < Minitest::Test
|
|
40
41
|
end
|
41
42
|
|
42
43
|
def test_run_extracts_all_entries
|
43
|
-
EpubTools::UnpackEbook.new(@epub_file, @dest_dir).run
|
44
|
+
result = EpubTools::UnpackEbook.new(epub_file: @epub_file, output_dir: @dest_dir).run
|
45
|
+
|
46
|
+
# Check return value is the output directory path
|
47
|
+
assert_equal @dest_dir, result
|
48
|
+
|
44
49
|
# Check extracted files
|
45
50
|
assert Dir.exist?(@dest_dir)
|
46
51
|
assert_equal 'application/epub+zip', File.read(File.join(@dest_dir, 'mimetype'))
|
47
|
-
|
48
|
-
|
52
|
+
assert_path_exists File.join(@dest_dir, 'META-INF', 'container.xml')
|
53
|
+
assert_path_exists File.join(@dest_dir, 'OEBPS', 'title.xhtml')
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_run_with_default_output_dir
|
57
|
+
# Create the test with default output directory
|
58
|
+
result = EpubTools::UnpackEbook.new(epub_file: @epub_file).run
|
59
|
+
|
60
|
+
expected_dir = File.join(File.dirname(@epub_file), File.basename(@epub_file, '.epub'))
|
61
|
+
|
62
|
+
assert_equal expected_dir, result
|
63
|
+
assert Dir.exist?(expected_dir)
|
64
|
+
assert_path_exists File.join(expected_dir, 'mimetype')
|
49
65
|
end
|
50
66
|
|
51
67
|
def test_missing_epub_raises_error
|
52
68
|
missing = File.join(@tmp, 'nope.epub')
|
53
69
|
error = assert_raises(ArgumentError) do
|
54
|
-
EpubTools::UnpackEbook.new(missing, @dest_dir).run
|
70
|
+
EpubTools::UnpackEbook.new(epub_file: missing, output_dir: @dest_dir).run
|
55
71
|
end
|
56
|
-
assert_includes error.message,
|
72
|
+
assert_includes error.message, 'does not exist'
|
57
73
|
end
|
58
74
|
end
|
data/test/xhtml_cleaner_test.rb
CHANGED
@@ -28,12 +28,18 @@ class XHTMLCleanerTest < Minitest::Test
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def test_cleaner_removes_and_transforms_tags
|
31
|
-
EpubTools::XHTMLCleaner.new(@file, @config).
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
31
|
+
result = EpubTools::XHTMLCleaner.new(filename: @file, class_config: @config).run
|
32
|
+
|
33
|
+
# Check return value is the filename that was cleaned
|
34
|
+
assert_equal @file, result
|
35
|
+
assert_path_exists @file
|
36
|
+
|
37
|
+
content = File.read(@file)
|
38
|
+
|
39
|
+
assert_includes content, '<i>ItalicsOnly</i>'
|
40
|
+
assert_includes content, 'KeepThis'
|
41
|
+
refute_includes content, '<span'
|
42
|
+
refute_includes content, '<hr'
|
43
|
+
refute_includes content, 'RemoveMe'
|
38
44
|
end
|
39
45
|
end
|
@@ -22,8 +22,24 @@ class XHTMLExtractorTest < Minitest::Test
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def test_extracts_xhtml_excluding_nav
|
25
|
-
@extractor.
|
25
|
+
result = @extractor.run
|
26
|
+
|
27
|
+
# Check return value is an array of extracted file paths
|
28
|
+
assert_instance_of Array, result
|
29
|
+
assert_equal 2, result.size
|
30
|
+
|
31
|
+
expected_paths = [
|
32
|
+
File.join(@tgt, 'sample_chapter1.xhtml'),
|
33
|
+
File.join(@tgt, 'sample_ch2.xhtml')
|
34
|
+
]
|
35
|
+
|
36
|
+
expected_paths.each do |path|
|
37
|
+
assert_includes result, path
|
38
|
+
assert_path_exists path
|
39
|
+
end
|
40
|
+
|
26
41
|
files = Dir.children(@tgt)
|
42
|
+
|
27
43
|
assert_includes files, 'sample_chapter1.xhtml'
|
28
44
|
assert_includes files, 'sample_ch2.xhtml'
|
29
45
|
refute_includes files, 'nav.xhtml'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: epub_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Rodas
|
@@ -23,20 +23,6 @@ dependencies:
|
|
23
23
|
- - "~>"
|
24
24
|
- !ruby/object:Gem::Version
|
25
25
|
version: '1.18'
|
26
|
-
- !ruby/object:Gem::Dependency
|
27
|
-
name: rubyzip
|
28
|
-
requirement: !ruby/object:Gem::Requirement
|
29
|
-
requirements:
|
30
|
-
- - "~>"
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: '2.4'
|
33
|
-
type: :runtime
|
34
|
-
prerelease: false
|
35
|
-
version_requirements: !ruby/object:Gem::Requirement
|
36
|
-
requirements:
|
37
|
-
- - "~>"
|
38
|
-
- !ruby/object:Gem::Version
|
39
|
-
version: '2.4'
|
40
26
|
- !ruby/object:Gem::Dependency
|
41
27
|
name: rake
|
42
28
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,33 +38,19 @@ dependencies:
|
|
52
38
|
- !ruby/object:Gem::Version
|
53
39
|
version: '13.2'
|
54
40
|
- !ruby/object:Gem::Dependency
|
55
|
-
name:
|
56
|
-
requirement: !ruby/object:Gem::Requirement
|
57
|
-
requirements:
|
58
|
-
- - "~>"
|
59
|
-
- !ruby/object:Gem::Version
|
60
|
-
version: '5.25'
|
61
|
-
type: :development
|
62
|
-
prerelease: false
|
63
|
-
version_requirements: !ruby/object:Gem::Requirement
|
64
|
-
requirements:
|
65
|
-
- - "~>"
|
66
|
-
- !ruby/object:Gem::Version
|
67
|
-
version: '5.25'
|
68
|
-
- !ruby/object:Gem::Dependency
|
69
|
-
name: simplecov
|
41
|
+
name: rubyzip
|
70
42
|
requirement: !ruby/object:Gem::Requirement
|
71
43
|
requirements:
|
72
44
|
- - "~>"
|
73
45
|
- !ruby/object:Gem::Version
|
74
|
-
version: '
|
75
|
-
type: :
|
46
|
+
version: '2.4'
|
47
|
+
type: :runtime
|
76
48
|
prerelease: false
|
77
49
|
version_requirements: !ruby/object:Gem::Requirement
|
78
50
|
requirements:
|
79
51
|
- - "~>"
|
80
52
|
- !ruby/object:Gem::Version
|
81
|
-
version: '
|
53
|
+
version: '2.4'
|
82
54
|
email:
|
83
55
|
- rodas@hey.com
|
84
56
|
executables:
|
@@ -86,37 +58,49 @@ executables:
|
|
86
58
|
extensions: []
|
87
59
|
extra_rdoc_files: []
|
88
60
|
files:
|
61
|
+
- ".document"
|
89
62
|
- ".github/workflows/ci.yml"
|
90
63
|
- ".gitignore"
|
91
64
|
- ".nova/Configuration.json"
|
65
|
+
- ".rubocop.yml"
|
92
66
|
- ".ruby-version"
|
93
67
|
- Gemfile
|
94
68
|
- Gemfile.lock
|
69
|
+
- LICENSE
|
95
70
|
- README.md
|
96
71
|
- Rakefile
|
97
72
|
- bin/epub-tools
|
98
73
|
- epub_tools.gemspec
|
99
74
|
- lib/epub_tools.rb
|
100
|
-
- lib/epub_tools/
|
101
|
-
- lib/epub_tools/
|
75
|
+
- lib/epub_tools/add_chapters.rb
|
76
|
+
- lib/epub_tools/cli.rb
|
77
|
+
- lib/epub_tools/cli/command_registry.rb
|
78
|
+
- lib/epub_tools/cli/option_builder.rb
|
79
|
+
- lib/epub_tools/cli/runner.rb
|
102
80
|
- lib/epub_tools/compile_book.rb
|
103
81
|
- lib/epub_tools/epub_initializer.rb
|
82
|
+
- lib/epub_tools/loggable.rb
|
104
83
|
- lib/epub_tools/pack_ebook.rb
|
105
84
|
- lib/epub_tools/split_chapters.rb
|
106
|
-
- lib/epub_tools/
|
85
|
+
- lib/epub_tools/style_finder.rb
|
107
86
|
- lib/epub_tools/unpack_ebook.rb
|
108
87
|
- lib/epub_tools/version.rb
|
109
88
|
- lib/epub_tools/xhtml_cleaner.rb
|
110
89
|
- lib/epub_tools/xhtml_extractor.rb
|
111
90
|
- style.css
|
112
|
-
- test/
|
91
|
+
- test/add_chapters_test.rb
|
92
|
+
- test/cli/command_registry_test.rb
|
93
|
+
- test/cli/option_builder_test.rb
|
94
|
+
- test/cli/runner_test.rb
|
95
|
+
- test/cli_commands_test.rb
|
96
|
+
- test/cli_test.rb
|
113
97
|
- test/cli_version_test.rb
|
114
98
|
- test/compile_book_test.rb
|
115
99
|
- test/epub_initializer_test.rb
|
116
100
|
- test/pack_ebook_test.rb
|
117
101
|
- test/split_chapters_test.rb
|
102
|
+
- test/style_finder_test.rb
|
118
103
|
- test/test_helper.rb
|
119
|
-
- test/text_style_class_finder_test.rb
|
120
104
|
- test/unpack_ebook_test.rb
|
121
105
|
- test/xhtml_cleaner_test.rb
|
122
106
|
- test/xhtml_extractor_test.rb
|
@@ -126,6 +110,7 @@ licenses:
|
|
126
110
|
metadata:
|
127
111
|
source_code_uri: https://github.com/jaimerodas/epub_tools/tree/main
|
128
112
|
homepage_uri: https://github.com/jaimerodas/epub_tools
|
113
|
+
rubygems_mfa_required: 'true'
|
129
114
|
rdoc_options: []
|
130
115
|
require_paths:
|
131
116
|
- lib
|
@@ -133,7 +118,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
133
118
|
requirements:
|
134
119
|
- - ">="
|
135
120
|
- !ruby/object:Gem::Version
|
136
|
-
version: '3.
|
121
|
+
version: '3.2'
|
137
122
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
138
123
|
requirements:
|
139
124
|
- - ">="
|
@@ -1,87 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'nokogiri'
|
3
|
-
require 'fileutils'
|
4
|
-
|
5
|
-
module EpubTools
|
6
|
-
class AddChaptersToEpub
|
7
|
-
def initialize(chapters_dir = './chapters', epub_dir = './epub/OEBPS', verbose = false)
|
8
|
-
@chapters_dir = chapters_dir
|
9
|
-
@epub_dir = epub_dir
|
10
|
-
@opf_file = File.join(@epub_dir, 'package.opf')
|
11
|
-
@nav_file = File.join(@epub_dir, 'nav.xhtml')
|
12
|
-
@verbose = verbose
|
13
|
-
end
|
14
|
-
|
15
|
-
def run
|
16
|
-
moved_files = move_chapters
|
17
|
-
update_package_opf(moved_files)
|
18
|
-
update_nav_xhtml(moved_files)
|
19
|
-
@verbose ? moved_files.each {|f| puts "Moved: #{f}"} : moved_files
|
20
|
-
end
|
21
|
-
|
22
|
-
private
|
23
|
-
|
24
|
-
def move_chapters
|
25
|
-
# Sort by chapter number (numeric)
|
26
|
-
chapter_files = Dir.glob(File.join(@chapters_dir, '*.xhtml')).sort_by do |path|
|
27
|
-
# extract first integer from filename (e.g. chapter_10.xhtml -> 10)
|
28
|
-
File.basename(path)[/\d+/].to_i
|
29
|
-
end
|
30
|
-
chapter_files.each do |file|
|
31
|
-
FileUtils.mv(file, @epub_dir)
|
32
|
-
end
|
33
|
-
chapter_files.map { |f| File.basename(f) }
|
34
|
-
end
|
35
|
-
|
36
|
-
def chapter_id(filename)
|
37
|
-
match = filename.match(/chapter_(\d+)\.xhtml/)
|
38
|
-
match ? "chap#{match[1]}" : File.basename(filename, '.xhtml')
|
39
|
-
end
|
40
|
-
|
41
|
-
def update_package_opf(filenames)
|
42
|
-
doc = Nokogiri::XML(File.read(@opf_file)) { |config| config.default_xml.noblanks }
|
43
|
-
manifest = doc.at_xpath('//xmlns:manifest')
|
44
|
-
spine = doc.at_xpath('//xmlns:spine')
|
45
|
-
|
46
|
-
filenames.each do |filename|
|
47
|
-
id = chapter_id(filename)
|
48
|
-
# Add <item> to the manifest if missing
|
49
|
-
unless doc.at_xpath("//xmlns:item[@href='#{filename}']")
|
50
|
-
item = Nokogiri::XML::Node.new('item', doc)
|
51
|
-
item['id'] = id
|
52
|
-
item['href'] = filename
|
53
|
-
item['media-type'] = 'application/xhtml+xml'
|
54
|
-
manifest.add_child(item)
|
55
|
-
end
|
56
|
-
|
57
|
-
# Add <itemref> to the spine if missing
|
58
|
-
unless doc.at_xpath("//xmlns:itemref[@idref='#{id}']")
|
59
|
-
itemref = Nokogiri::XML::Node.new('itemref', doc)
|
60
|
-
itemref['idref'] = id
|
61
|
-
spine.add_child(itemref)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
File.write(@opf_file, doc.to_xml(indent: 2))
|
66
|
-
end
|
67
|
-
|
68
|
-
def update_nav_xhtml(filenames)
|
69
|
-
doc = Nokogiri::XML(File.read(@nav_file)) { |config| config.default_xml.noblanks }
|
70
|
-
nav = doc.at_xpath('//xmlns:nav[@epub:type="toc"]/xmlns:ol')
|
71
|
-
|
72
|
-
filenames.each do |filename|
|
73
|
-
# Create a new <li><a href="...">Label</a></li> element
|
74
|
-
label = File.basename(filename, '.xhtml').gsub('_', ' ').capitalize
|
75
|
-
label = "Prologue" if label == "Chapter 0"
|
76
|
-
li = Nokogiri::XML::Node.new('li', doc)
|
77
|
-
a = Nokogiri::XML::Node.new('a', doc)
|
78
|
-
a['href'] = filename
|
79
|
-
a.content = label
|
80
|
-
li.add_child(a)
|
81
|
-
nav.add_child(li)
|
82
|
-
end
|
83
|
-
|
84
|
-
File.write(@nav_file, doc.to_xml(indent: 2))
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
@@ -1,31 +0,0 @@
|
|
1
|
-
require 'optparse'
|
2
|
-
|
3
|
-
module EpubTools
|
4
|
-
# A simple helper to DRY CLI OptionParser usage across commands
|
5
|
-
class CLIHelper
|
6
|
-
# Parses ARGV into options hash, enforces required keys, and displays help/errors.
|
7
|
-
# options: hash of defaults; required_keys: array of symbols required
|
8
|
-
def self.parse(options = {}, required_keys = [], &block)
|
9
|
-
parser = OptionParser.new do |opts|
|
10
|
-
block.call(opts, options)
|
11
|
-
opts.on('-h', '--help', 'Prints this help') { puts opts; exit }
|
12
|
-
end
|
13
|
-
begin
|
14
|
-
parser.parse!
|
15
|
-
unless required_keys.empty?
|
16
|
-
missing = required_keys.select { |k| options[k].nil? }
|
17
|
-
unless missing.empty?
|
18
|
-
STDERR.puts "Missing required options: #{missing.map { |k| "--#{k.to_s.gsub('_','-')}" }.join(', ')}"
|
19
|
-
STDERR.puts parser
|
20
|
-
exit 1
|
21
|
-
end
|
22
|
-
end
|
23
|
-
rescue OptionParser::InvalidOption, OptionParser::MissingArgument => e
|
24
|
-
STDERR.puts e.message
|
25
|
-
STDERR.puts parser
|
26
|
-
exit 1
|
27
|
-
end
|
28
|
-
options
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
@@ -1,47 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'nokogiri'
|
3
|
-
require 'yaml'
|
4
|
-
|
5
|
-
module EpubTools
|
6
|
-
class TextStyleClassFinder
|
7
|
-
def initialize(file_path, output_path = 'text_style_classes.yaml', verbose: false)
|
8
|
-
@file_path = file_path
|
9
|
-
@output_path = output_path
|
10
|
-
@verbose = verbose
|
11
|
-
raise ArgumentError, "File does not exist: #{@file_path}" unless File.exist?(@file_path)
|
12
|
-
end
|
13
|
-
|
14
|
-
def call
|
15
|
-
doc = Nokogiri::HTML(File.read(@file_path))
|
16
|
-
style_blocks = doc.xpath('//style').map(&:text).join("\n")
|
17
|
-
|
18
|
-
italics = extract_classes(style_blocks, /font-style\s*:\s*italic/)
|
19
|
-
bolds = extract_classes(style_blocks, /font-weight\s*:\s*700/)
|
20
|
-
|
21
|
-
print_summary(italics, bolds) if @verbose
|
22
|
-
|
23
|
-
data = {
|
24
|
-
"italics" => italics,
|
25
|
-
"bolds" => bolds
|
26
|
-
}
|
27
|
-
File.write(@output_path, data.to_yaml)
|
28
|
-
end
|
29
|
-
|
30
|
-
private
|
31
|
-
|
32
|
-
def extract_classes(style_text, pattern)
|
33
|
-
regex = /\.([\w-]+)\s*{[^}]*#{pattern.source}[^}]*}/i
|
34
|
-
style_text.scan(regex).flatten.uniq
|
35
|
-
end
|
36
|
-
|
37
|
-
def print_summary(italics, bolds)
|
38
|
-
unless italics.empty?
|
39
|
-
puts "Classes with font-style: italic: #{italics.join(", ")}"
|
40
|
-
end
|
41
|
-
|
42
|
-
unless bolds.empty?
|
43
|
-
puts "Classes with font-weight: 700: #{bolds.join(", ")}"
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|