epub_tools 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +3 -0
  3. data/.rubocop.yml +10 -17
  4. data/CLAUDE.md +124 -0
  5. data/Gemfile +4 -4
  6. data/Gemfile.lock +39 -34
  7. data/Rakefile +2 -0
  8. data/bin/epub-tools +2 -0
  9. data/epub_tools.gemspec +3 -1
  10. data/lib/epub_tools/add_chapters.rb +47 -29
  11. data/lib/epub_tools/chapter_validator.rb +40 -0
  12. data/lib/epub_tools/cli/command_options_configurator.rb +115 -0
  13. data/lib/epub_tools/cli/command_registry.rb +2 -0
  14. data/lib/epub_tools/cli/option_builder.rb +5 -3
  15. data/lib/epub_tools/cli/runner.rb +59 -110
  16. data/lib/epub_tools/cli.rb +16 -29
  17. data/lib/epub_tools/compile_book.rb +48 -65
  18. data/lib/epub_tools/compile_workspace.rb +40 -0
  19. data/lib/epub_tools/epub_configuration.rb +33 -0
  20. data/lib/epub_tools/epub_file_writer.rb +57 -0
  21. data/lib/epub_tools/epub_initializer.rb +83 -162
  22. data/lib/epub_tools/epub_metadata_builder.rb +92 -0
  23. data/lib/epub_tools/loggable.rb +2 -0
  24. data/lib/epub_tools/pack_ebook.rb +28 -14
  25. data/lib/epub_tools/split_chapters.rb +42 -17
  26. data/lib/epub_tools/style_finder.rb +17 -6
  27. data/lib/epub_tools/unpack_ebook.rb +20 -10
  28. data/lib/epub_tools/version.rb +3 -1
  29. data/lib/epub_tools/xhtml_cleaner.rb +1 -0
  30. data/lib/epub_tools/xhtml_extractor.rb +20 -10
  31. data/lib/epub_tools/xhtml_generator.rb +71 -0
  32. data/lib/epub_tools.rb +2 -0
  33. data/test/add_chapters_test.rb +49 -25
  34. data/test/chapter_validator_test.rb +47 -0
  35. data/test/cli/command_registry_test.rb +2 -0
  36. data/test/cli/option_builder_test.rb +24 -14
  37. data/test/cli/runner_test.rb +15 -15
  38. data/test/cli_commands_test.rb +2 -0
  39. data/test/cli_test.rb +2 -0
  40. data/test/cli_version_test.rb +2 -0
  41. data/test/compile_book_test.rb +17 -102
  42. data/test/compile_workspace_test.rb +55 -0
  43. data/test/epub_initializer_test.rb +55 -27
  44. data/test/pack_ebook_test.rb +33 -9
  45. data/test/split_chapters_test.rb +27 -7
  46. data/test/style_finder_test.rb +2 -0
  47. data/test/test_helper.rb +2 -0
  48. data/test/unpack_ebook_test.rb +45 -20
  49. data/test/xhtml_cleaner_test.rb +2 -0
  50. data/test/xhtml_extractor_test.rb +3 -1
  51. metadata +13 -3
@@ -1,8 +1,12 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  require 'fileutils'
3
- require 'time'
4
- require 'securerandom'
5
5
  require_relative 'loggable'
6
+ require_relative 'xhtml_generator'
7
+ require_relative 'epub_metadata_builder'
8
+ require_relative 'epub_file_writer'
9
+ require_relative 'epub_configuration'
6
10
 
7
11
  module EpubTools
8
12
  # Sets up a basic empty EPUB directory structure with the basic files created:
@@ -15,6 +19,7 @@ module EpubTools
15
19
  # - cover image (optionally)
16
20
  class EpubInitializer
17
21
  include Loggable
22
+
18
23
  # Initializes the class
19
24
  # @param options [Hash] Configuration options
20
25
  # @option options [String] :title Book title (required)
@@ -23,197 +28,113 @@ module EpubTools
23
28
  # @option options [String] :cover_image Optional path to the cover image
24
29
  # @option options [Boolean] :verbose Whether to print progress to STDOUT (default: false)
25
30
  def initialize(options = {})
26
- @title = options.fetch(:title)
27
- @author = options.fetch(:author)
28
- @destination = File.expand_path(options.fetch(:destination))
29
- @uuid = "urn:uuid:#{SecureRandom.uuid}"
30
- @modified = Time.now.utc.iso8601
31
- @cover_image_path = options[:cover_image]
32
- @cover_image_fname = nil
33
- @cover_image_media_type = nil
34
- @verbose = options[:verbose] || false
31
+ @config = EpubConfiguration.new(options)
32
+ @verbose = @config.verbose
33
+ @xhtml_generator = create_xhtml_generator
34
+ @metadata_builder = create_metadata_builder
35
+ @file_writer = create_file_writer
35
36
  end
36
37
 
38
+ private
39
+
40
+ def create_xhtml_generator
41
+ XhtmlGenerator.new(title: @config.title, author: @config.author)
42
+ end
43
+
44
+ def create_metadata_builder
45
+ EpubMetadataBuilder.new(@config)
46
+ end
47
+
48
+ def create_file_writer
49
+ EpubFileWriter.new(@config.destination)
50
+ end
51
+
52
+ public
53
+
37
54
  # Creates the empty ebook and returns the directory
38
55
  def run
39
- create_structure
40
- write_mimetype
56
+ @file_writer.create_structure
57
+ @file_writer.write_mimetype
41
58
  write_title_page
42
- write_container
43
- write_cover if @cover_image_path
59
+ @file_writer.write_container
60
+ write_cover if @config.cover_image_path
44
61
  write_package_opf
45
62
  write_nav
46
- write_style
47
- log "Created empty ebook structure at: #{@destination}"
48
- @destination
63
+ @file_writer.write_style
64
+ log "Created empty ebook structure at: #{@config.destination}"
65
+ @config.destination
49
66
  end
50
67
 
51
68
  private
52
69
 
53
- def create_structure
54
- FileUtils.mkdir_p("#{@destination}/META-INF")
55
- FileUtils.mkdir_p("#{@destination}/OEBPS")
70
+ def write_title_page
71
+ content = @xhtml_generator.build_title_page
72
+ @file_writer.write_xhtml('title.xhtml', content)
56
73
  end
57
74
 
58
- def write_mimetype
59
- File.write("#{@destination}/mimetype", 'application/epub+zip')
60
- end
75
+ # Copies the cover image into the EPUB structure and creates a cover.xhtml page
76
+ def write_cover
77
+ return unless cover_image_exists?
61
78
 
62
- def write_title_page
63
- content = <<~XHTML
64
- <?xml version="1.0" encoding="UTF-8"?>
65
- <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
66
- <head>
67
- <meta charset="UTF-8" />
68
- <title>#{@title}</title>
69
- <link rel="stylesheet" type="text/css" href="style.css"/>
70
- </head>
71
- <body>
72
- <h1 class="title">#{@title}</h1>
73
- <p class="author">by #{@author}</p>
74
- </body>
75
- </html>
76
- XHTML
77
-
78
- File.write("#{@destination}/OEBPS/title.xhtml", content)
79
+ ext = File.extname(@config.cover_image_path).downcase
80
+ media_type = determine_media_type(ext)
81
+ return unless media_type
82
+
83
+ fname = "cover#{ext}"
84
+ @config.update_cover_info(fname, media_type)
85
+ @xhtml_generator.cover_image_fname = fname
86
+ update_metadata_builder_with_cover_info
87
+
88
+ copy_cover_image(ext)
89
+ write_cover_page
79
90
  end
80
91
 
81
- def write_container
82
- content = <<~XML
83
- <?xml version="1.0" encoding="UTF-8"?>
84
- <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
85
- <rootfiles>
86
- <rootfile full-path="OEBPS/package.opf" media-type="application/oebps-package+xml"/>
87
- </rootfiles>
88
- </container>
89
- XML
90
- File.write("#{@destination}/META-INF/container.xml", content)
92
+ def cover_image_exists?
93
+ return true if File.exist?(@config.cover_image_path)
94
+
95
+ warn "Warning: cover image '#{@config.cover_image_path}' not found; skipping cover support."
96
+ false
91
97
  end
92
98
 
93
- # Copies the cover image into the EPUB structure and creates a cover.xhtml page
94
- def write_cover
95
- path = @cover_image_path
96
- unless File.exist?(path)
97
- warn "Warning: cover image '#{path}' not found; skipping cover support."
98
- return
99
+ def determine_media_type(ext)
100
+ case ext
101
+ when '.jpg', '.jpeg' then 'image/jpeg'
102
+ when '.png' then 'image/png'
103
+ when '.gif' then 'image/gif'
104
+ when '.svg' then 'image/svg+xml'
105
+ else
106
+ warn "Warning: unsupported cover image type '#{ext}'; skipping cover support."
107
+ nil
99
108
  end
100
- ext = File.extname(path).downcase
101
- @cover_image_media_type = case ext
102
- when '.jpg', '.jpeg' then 'image/jpeg'
103
- when '.png' then 'image/png'
104
- when '.gif' then 'image/gif'
105
- when '.svg' then 'image/svg+xml'
106
- else
107
- warn "Warning: unsupported cover image type '#{ext}'; skipping cover support."
108
- return
109
- end
110
- @cover_image_fname = "cover#{ext}"
111
- dest = File.join(@destination, 'OEBPS', @cover_image_fname)
112
- FileUtils.cp(path, dest)
113
- write_cover_page
114
109
  end
115
110
 
116
- # Generates a cover.xhtml file displaying the cover image
117
- def write_cover_page
118
- content = <<~XHTML
119
- <?xml version="1.0" encoding="UTF-8"?>
120
- <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
121
- <head>
122
- <meta charset="UTF-8" />
123
- <title>Cover</title>
124
- <link rel="stylesheet" type="text/css" href="style.css"/>
125
- </head>
126
- <body>
127
- <div class="cover-image">
128
- <img src="#{@cover_image_fname}" alt="Cover"/>
129
- </div>
130
- </body>
131
- </html>
132
- XHTML
133
- File.write(File.join(@destination, 'OEBPS', 'cover.xhtml'), content)
111
+ def update_metadata_builder_with_cover_info
112
+ @metadata_builder = EpubMetadataBuilder.new(@config)
134
113
  end
135
114
 
136
- def mitem(id, href, type, properties = nil)
137
- xml = "<item id=\"#{id}\" href=\"#{href}\" media-type=\"#{type}\""
138
- xml += " properties=\"#{properties}\"" if properties
139
- "#{xml}/>"
115
+ def copy_cover_image(_ext)
116
+ dest = File.join(@config.destination, 'OEBPS', @config.cover_image_fname)
117
+ FileUtils.cp(@config.cover_image_path, dest)
118
+ end
119
+
120
+ # Generates a cover.xhtml file displaying the cover image
121
+ def write_cover_page
122
+ content = @xhtml_generator.build_cover_page
123
+ @file_writer.write_xhtml('cover.xhtml', content)
140
124
  end
141
125
 
142
126
  # Generates the package.opf with optional cover image entries
143
127
  def write_package_opf
144
- manifest_items = []
145
- spine_items = []
146
- manifest_items << mitem('style', 'style.css', 'text/css')
147
- manifest_items << mitem('nav', 'nav.xhtml', 'application/xhtml+xml', 'nav')
148
-
149
- if @cover_image_fname
150
- manifest_items << mitem('cover-image', @cover_image_fname, @cover_image_media_type, 'cover-image')
151
- manifest_items << mitem('cover-page', 'cover.xhtml', 'application/xhtml+xml')
152
- spine_items << '<itemref idref="cover-page"/>'
153
- end
154
-
155
- manifest_items << mitem('title', 'title.xhtml', 'application/xhtml+xml')
156
- spine_items << '<itemref idref="title"/>'
157
-
158
- metadata = []
159
- metadata << %(<dc:identifier id="pub-id">#{@uuid}</dc:identifier>)
160
- metadata << %(<dc:title>#{@title}</dc:title>)
161
- metadata << %(<dc:creator>#{@author}</dc:creator>)
162
- metadata << '<dc:language>en</dc:language>'
163
- metadata << %(<meta property="dcterms:modified">#{@modified}</meta>)
164
- metadata << %(<meta property="schema:accessMode">textual</meta>)
165
- metadata << %(<meta property="schema:accessibilityFeature">unknown</meta>)
166
- metadata << %(<meta property="schema:accessibilityHazard">none</meta>)
167
- metadata << %(<meta property="schema:accessModeSufficient">textual</meta>)
168
- metadata << %(<meta name="cover" content="cover-image"/>) if @cover_image_fname
169
-
170
- content = <<~XML
171
- <?xml version="1.0" encoding="utf-8"?>
172
- <package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="pub-id" xml:lang="en">
173
- <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
174
- #{metadata.map { |line| " #{line}" }.join("\n")}
175
- </metadata>
176
- <manifest>
177
- #{manifest_items.map { |line| " #{line}" }.join("\n")}
178
- </manifest>
179
- <spine>
180
- #{spine_items.map { |line| " #{line}" }.join("\n")}
181
- </spine>
182
- </package>
183
- XML
184
-
185
- File.write(File.join(@destination, 'OEBPS', 'package.opf'), content)
128
+ manifest_items, spine_items = @metadata_builder.build_manifest_and_spine
129
+ metadata = @metadata_builder.build_metadata
130
+ content = @metadata_builder.build_opf_xml(metadata, manifest_items, spine_items)
131
+ @file_writer.write_package_opf(content)
186
132
  end
187
133
 
188
134
  # Generates the initial navigation document (Table of Contents)
189
135
  def write_nav
190
- content = <<~XHTML
191
- <?xml version="1.0" encoding="utf-8"?>
192
- <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="en">
193
- <head>
194
- <title>Table of Contents</title>
195
- </head>
196
- <body>
197
- <nav epub:type="toc" id="toc">
198
- <h1>Table of Contents</h1>
199
- <ol>
200
- <li><a href="title.xhtml">Title Page</a></li>
201
- </ol>
202
- </nav>
203
- </body>
204
- </html>
205
- XHTML
206
- File.write(File.join(@destination, 'OEBPS', 'nav.xhtml'), content)
207
- end
208
-
209
- def write_style
210
- src = File.join(Dir.pwd, 'style.css')
211
- dest = File.join(@destination, 'OEBPS', 'style.css')
212
- unless File.exist?(src)
213
- warn "Warning: style.css not found in project root (#{src}), skipping copy."
214
- return
215
- end
216
- FileUtils.cp(src, dest)
136
+ content = @xhtml_generator.build_nav_page
137
+ @file_writer.write_xhtml('nav.xhtml', content)
217
138
  end
218
139
  end
219
140
  end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module EpubTools
4
+ # Builds metadata content for EPUB package.opf files
5
+ class EpubMetadataBuilder
6
+ def initialize(config)
7
+ @config = config
8
+ end
9
+
10
+ # Builds complete metadata array
11
+ def build_metadata
12
+ metadata = []
13
+ add_dublin_core_metadata(metadata)
14
+ add_schema_metadata(metadata)
15
+ add_cover_metadata(metadata) if @config.cover_image_fname
16
+ metadata
17
+ end
18
+
19
+ # Builds manifest and spine items
20
+ def build_manifest_and_spine
21
+ manifest_items = []
22
+ spine_items = []
23
+
24
+ add_base_manifest_items(manifest_items)
25
+ add_cover_items(manifest_items, spine_items) if @config.cover_image_fname
26
+ add_title_items(manifest_items, spine_items)
27
+
28
+ [manifest_items, spine_items]
29
+ end
30
+
31
+ # Builds complete OPF XML content
32
+ def build_opf_xml(metadata, manifest_items, spine_items)
33
+ <<~XML
34
+ <?xml version="1.0" encoding="utf-8"?>
35
+ <package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="pub-id" xml:lang="en">
36
+ <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
37
+ #{metadata.map { |line| " #{line}" }.join("\n")}
38
+ </metadata>
39
+ <manifest>
40
+ #{manifest_items.map { |line| " #{line}" }.join("\n")}
41
+ </manifest>
42
+ <spine>
43
+ #{spine_items.map { |line| " #{line}" }.join("\n")}
44
+ </spine>
45
+ </package>
46
+ XML
47
+ end
48
+
49
+ private
50
+
51
+ def add_dublin_core_metadata(metadata)
52
+ metadata << %(<dc:identifier id="pub-id">#{@config.uuid}</dc:identifier>)
53
+ metadata << %(<dc:title>#{@config.title}</dc:title>)
54
+ metadata << %(<dc:creator>#{@config.author}</dc:creator>)
55
+ metadata << '<dc:language>en</dc:language>'
56
+ metadata << %(<meta property="dcterms:modified">#{@config.modified}</meta>)
57
+ end
58
+
59
+ def add_schema_metadata(metadata)
60
+ metadata << %(<meta property="schema:accessMode">textual</meta>)
61
+ metadata << %(<meta property="schema:accessibilityFeature">unknown</meta>)
62
+ metadata << %(<meta property="schema:accessibilityHazard">none</meta>)
63
+ metadata << %(<meta property="schema:accessModeSufficient">textual</meta>)
64
+ end
65
+
66
+ def add_cover_metadata(metadata)
67
+ metadata << %(<meta name="cover" content="cover-image"/>)
68
+ end
69
+
70
+ def add_base_manifest_items(manifest_items)
71
+ manifest_items << mitem('style', 'style.css', 'text/css')
72
+ manifest_items << mitem('nav', 'nav.xhtml', 'application/xhtml+xml', 'nav')
73
+ end
74
+
75
+ def add_cover_items(manifest_items, spine_items)
76
+ manifest_items << mitem('cover-image', @config.cover_image_fname, @config.cover_image_media_type, 'cover-image')
77
+ manifest_items << mitem('cover-page', 'cover.xhtml', 'application/xhtml+xml')
78
+ spine_items << '<itemref idref="cover-page"/>'
79
+ end
80
+
81
+ def add_title_items(manifest_items, spine_items)
82
+ manifest_items << mitem('title', 'title.xhtml', 'application/xhtml+xml')
83
+ spine_items << '<itemref idref="title"/>'
84
+ end
85
+
86
+ def mitem(id, href, type, properties = nil)
87
+ xml = "<item id=\"#{id}\" href=\"#{href}\" media-type=\"#{type}\""
88
+ xml += " properties=\"#{properties}\"" if properties
89
+ "#{xml}/>"
90
+ end
91
+ end
92
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module EpubTools
2
4
  # Provides logging capability to classes that include it
3
5
  module Loggable
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'zip'
2
4
  require 'fileutils'
3
5
  require 'pathname'
@@ -7,6 +9,7 @@ module EpubTools
7
9
  # Packages an EPUB directory into a .epub file
8
10
  class PackEbook
9
11
  include Loggable
12
+
10
13
  # Initializes the class
11
14
  # @param options [Hash] Configuration options
12
15
  # @option options [String] :input_dir Path to the EPUB directory (containing mimetype, META-INF, OEBPS) (required)
@@ -28,21 +31,9 @@ module EpubTools
28
31
  def run
29
32
  validate_input!
30
33
  Dir.chdir(@input_dir) do
31
- # determine the output path: absolute stays as-is, otherwise sibling to input_dir
32
- target = Pathname.new(@output_file).absolute? ? @output_file : File.join('..', @output_file)
34
+ target = determine_output_path
33
35
  FileUtils.rm_f(target)
34
- Zip::File.open(target, Zip::File::CREATE) do |zip|
35
- # Add mimetype first and uncompressed
36
- add_mimetype(zip)
37
-
38
- # Add all other files with compression, preserving paths
39
- Dir.glob('**/*', File::FNM_DOTMATCH).sort.each do |entry|
40
- next if ['.', '..', 'mimetype'].include?(entry)
41
- next if File.directory?(entry)
42
-
43
- zip.add(entry, entry)
44
- end
45
- end
36
+ create_zip_file(target)
46
37
  end
47
38
  log "EPUB created: #{@output_file}"
48
39
  @output_file
@@ -50,6 +41,29 @@ module EpubTools
50
41
 
51
42
  private
52
43
 
44
+ def determine_output_path
45
+ # determine the output path: absolute stays as-is, otherwise sibling to input_dir
46
+ Pathname.new(@output_file).absolute? ? @output_file : File.join('..', @output_file)
47
+ end
48
+
49
+ def create_zip_file(target)
50
+ Zip::File.open(target, create: true) do |zip|
51
+ # Add mimetype first and uncompressed
52
+ add_mimetype(zip)
53
+ add_content_files(zip)
54
+ end
55
+ end
56
+
57
+ def add_content_files(zip)
58
+ # Add all other files with compression, preserving paths
59
+ Dir.glob('**/*', File::FNM_DOTMATCH).sort.each do |entry|
60
+ next if ['.', '..', 'mimetype'].include?(entry)
61
+ next if File.directory?(entry)
62
+
63
+ zip.add(entry, entry)
64
+ end
65
+ end
66
+
53
67
  def validate_input!
54
68
  raise ArgumentError, "Directory '#{@input_dir}' does not exist." unless Dir.exist?(@input_dir)
55
69
 
@@ -1,4 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  require 'nokogiri'
3
5
  require 'yaml'
4
6
  require 'fileutils'
@@ -16,6 +18,7 @@ module EpubTools
16
18
  # - Saves those files to +output_dir+
17
19
  class SplitChapters
18
20
  include Loggable
21
+
19
22
  # Initializes the class
20
23
  # @param options [Hash] Configuration options
21
24
  # @option options [String] :input_file Path to the source XHTML (required)
@@ -60,21 +63,39 @@ module EpubTools
60
63
  current_fragment = nil
61
64
 
62
65
  doc.at('body').children.each do |node|
63
- if (m = node.text.match(/Chapter\s+(\d+)/i)) && %w[p span h2 h3 h4].include?(node.name)
64
- # start a new chapter (skip the marker node so title isn't duplicated)
65
- chapters[current_number] = current_fragment.to_html if current_number
66
- current_number = m[1].to_i
67
- current_fragment = Nokogiri::HTML::DocumentFragment.parse('')
68
- elsif prologue_marker?(node)
69
- # start the prologue (skip the marker node)
70
- chapters[current_number] = current_fragment.to_html if current_number
71
- current_number = 0
72
- current_fragment = Nokogiri::HTML::DocumentFragment.parse('')
73
- else
74
- current_fragment&.add_child(node.dup)
75
- end
66
+ current_number, current_fragment = process_node(node, chapters, current_number, current_fragment)
67
+ end
68
+
69
+ finalize_chapters(chapters, current_number, current_fragment)
70
+ end
71
+
72
+ def process_node(node, chapters, current_number, current_fragment)
73
+ if chapter_marker?(node)
74
+ start_new_chapter(chapters, node, current_number, current_fragment)
75
+ elsif prologue_marker?(node)
76
+ start_prologue(chapters, current_number, current_fragment)
77
+ else
78
+ current_fragment&.add_child(node.dup)
79
+ [current_number, current_fragment]
76
80
  end
81
+ end
82
+
83
+ def chapter_marker?(node)
84
+ node.text.match?(/Chapter\s+\d+/i) && %w[p span h2 h3 h4].include?(node.name)
85
+ end
86
+
87
+ def start_new_chapter(chapters, node, current_number, current_fragment)
88
+ chapters[current_number] = current_fragment.to_html if current_number
89
+ chapter_number = node.text.match(/Chapter\s+(\d+)/i)[1].to_i
90
+ [chapter_number, Nokogiri::HTML::DocumentFragment.parse('')]
91
+ end
77
92
 
93
+ def start_prologue(chapters, current_number, current_fragment)
94
+ chapters[current_number] = current_fragment.to_html if current_number
95
+ [0, Nokogiri::HTML::DocumentFragment.parse('')]
96
+ end
97
+
98
+ def finalize_chapters(chapters, current_number, current_fragment)
78
99
  chapters[current_number] = current_fragment.to_html if current_number
79
100
  chapters
80
101
  end
@@ -91,7 +112,14 @@ module EpubTools
91
112
  def write_chapter_file(label, content)
92
113
  display_label = display_label(label)
93
114
  filename = File.join(@output_dir, "#{@output_prefix}_#{label}.xhtml")
94
- File.write(filename, <<~HTML)
115
+ File.write(filename, build_xhtml_template(display_label, content))
116
+ XHTMLCleaner.new({ filename: filename }).run
117
+ log("Extracted: #{filename}")
118
+ filename
119
+ end
120
+
121
+ def build_xhtml_template(display_label, content)
122
+ <<~HTML
95
123
  <?xml version="1.0" encoding="UTF-8"?>
96
124
  <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
97
125
  <head>
@@ -104,9 +132,6 @@ module EpubTools
104
132
  </body>
105
133
  </html>
106
134
  HTML
107
- XHTMLCleaner.new({ filename: filename }).run
108
- log("Extracted: #{filename}")
109
- filename
110
135
  end
111
136
 
112
137
  def display_label(label)
@@ -1,4 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  require 'nokogiri'
3
5
  require 'yaml'
4
6
  require_relative 'loggable'
@@ -9,6 +11,7 @@ module EpubTools
9
11
  # {SplitChapters}[rdoc-ref:EpubTools::SplitChapters].
10
12
  class StyleFinder
11
13
  include Loggable
14
+
12
15
  # Initializes the class
13
16
  # @param options [Hash] Configuration options
14
17
  # @option options [String] :file_path XHTML file to be analyzed (required)
@@ -24,18 +27,26 @@ module EpubTools
24
27
  # Runs the finder
25
28
  # @return [Hash] Data containing the extracted style classes (italics and bolds)
26
29
  def run
30
+ style_blocks = extract_style_blocks
31
+ italics, bolds = extract_style_classes(style_blocks)
32
+ generate_output(italics, bolds)
33
+ end
34
+
35
+ def extract_style_blocks
27
36
  doc = Nokogiri::HTML(File.read(@file_path))
28
- style_blocks = doc.xpath('//style').map(&:text).join("\n")
37
+ doc.xpath('//style').map(&:text).join("\n")
38
+ end
29
39
 
40
+ def extract_style_classes(style_blocks)
30
41
  italics = extract_classes(style_blocks, /font-style\s*:\s*italic/)
31
- bolds = extract_classes(style_blocks, /font-weight\s*:\s*700/)
42
+ bolds = extract_classes(style_blocks, /font-weight\s*:\s*700/)
43
+ [italics, bolds]
44
+ end
32
45
 
46
+ def generate_output(italics, bolds)
33
47
  print_summary(italics, bolds) if @verbose
34
48
 
35
- data = {
36
- 'italics' => italics,
37
- 'bolds' => bolds
38
- }
49
+ data = { 'italics' => italics, 'bolds' => bolds }
39
50
  File.write(@output_path, data.to_yaml)
40
51
  data
41
52
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'zip'
2
4
  require 'fileutils'
3
5
  require_relative 'loggable'
@@ -6,6 +8,7 @@ module EpubTools
6
8
  # Unpacks an EPUB (.epub file) into a directory
7
9
  class UnpackEbook
8
10
  include Loggable
11
+
9
12
  # Initializes the class
10
13
  # @param options [Hash] Configuration options
11
14
  # @option options [String] :epub_file Path to the .epub file to unpack (required)
@@ -23,22 +26,29 @@ module EpubTools
23
26
  def run
24
27
  validate!
25
28
  FileUtils.mkdir_p(@output_dir)
29
+ extract_entries
30
+ log "Unpacked #{File.basename(@epub_file)} to #{@output_dir}"
31
+ @output_dir
32
+ end
33
+
34
+ private
35
+
36
+ def extract_entries
26
37
  Zip::File.open(@epub_file) do |zip|
27
38
  zip.each do |entry|
28
- dest_path = File.join(@output_dir, entry.name)
29
- if entry.directory?
30
- FileUtils.mkdir_p(dest_path)
31
- else
32
- FileUtils.mkdir_p(File.dirname(dest_path))
33
- entry.extract(dest_path) { true }
34
- end
39
+ extract_entry(entry)
35
40
  end
36
41
  end
37
- log "Unpacked #{File.basename(@epub_file)} to #{@output_dir}"
38
- @output_dir
39
42
  end
40
43
 
41
- private
44
+ def extract_entry(entry)
45
+ if entry.directory?
46
+ FileUtils.mkdir_p(File.join(@output_dir, entry.name))
47
+ else
48
+ FileUtils.mkdir_p(File.join(@output_dir, File.dirname(entry.name)))
49
+ entry.extract(destination_directory: @output_dir) { true }
50
+ end
51
+ end
42
52
 
43
53
  def default_dir
44
54
  [File.dirname(@epub_file), File.basename(@epub_file, '.epub')].join('/')
@@ -1,4 +1,6 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module EpubTools
2
4
  # Ruby Gem version number
3
- VERSION = '0.4.1'.freeze
5
+ VERSION = '0.5.0'
4
6
  end
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'nokogiri'
4
5
  require 'yaml'