RubyGems - EPUBChop - Versions diffs - 0.0.7 → 0.0.10 - Mend

EPUBChop 0.0.7 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml CHANGED Viewed

@@ -1,15 +1,7 @@
 ---
-!binary "U0hBMQ==":
-  metadata.gz: !binary |-
-    ZDdjMWZlMTIyOTJmMjJiN2I0OGQxYTUzMjA5MTlkNzZhZjcwMDQ3MQ==
-  data.tar.gz: !binary |-
-    ZjdjY2JiNzkwY2RiZTIxMDMyM2M4YTU1MjY0OGMwOTZiYjI4NmY4Mw==
+SHA1:
+  metadata.gz: 2ff6270a266184b41507e29e43994de705c56357
+  data.tar.gz: a257fd9d69ba6eb453c936626b509cc40596adf5
 SHA512:
-  metadata.gz: !binary |-
-    ZjJkYTdmNDI4OGFkNWUxZDRlZjlkZWQ1NDZkYTA3MWE2MmQ3YWMwYmZjZjkz
-    NDZiZGVjMGEwNWY5ZjgwZjA2ZjQ4OTM4MDBhMGRlNWQ3ODk5Njc1ODhlY2U2
-    ZmVjNmEwNGYwYTQ1ZGZiNDc0Nzc4ODliZWI0N2Y1MjcxNDQzMGY=
-  data.tar.gz: !binary |-
-    ZTNlY2E2OTA4NTNkM2I0YWRjMDc1YTkxMWU5YjgyMDY4YWM0ODE2MGVjZjQ5
-    OGI2YmNjNWFmZjA1ZTI1OGViZjI0N2UyNjJiOTQ3MmRhOTUyOTYwZDIwNDc2
-    NGQ0ZWU1N2RhZDBmNGMyOGZlZWM1NzIwMDgzMGM3Y2FkZjdhYWI=
+  metadata.gz: 5b8d5d76d9aabb81e9f0afe92d2c0b3aa058b6be46ce9eb3dc6c61f15eceaaaa49a27a696a2063b6bc9158ddb4992be0020ba1f7072ff9b90c2cbdf73b286527
+  data.tar.gz: 47742a21f5173c931b35f82df4c4fa5ea2f23e2b040073a9f575f38cab819c34a100b3bf3f1dcdd6e05d88ebe3c44445c2dfadb5e378d6f509332371caa00577

data/bin/epubchop CHANGED Viewed

@@ -19,6 +19,7 @@ BANNER
   opt :line1, "Text that is shown on line 1 of the chopped pages", :type => :string, :default => 'Continue reading?'
   opt :line2, "Text that is shown on line 2 of the chopped pages", :type => :string, :default => 'Go to your local library or buy the book.'
   opt :chop, "Follow the SPINE or the NCX of the ePub", :type => :string, :default => 'spine'
+  opt :verbose, "more loging true/false", :type => :boolean, :default => false
 end
 Trollop::die "need an EPUB file name" if ARGV.empty?
@@ -32,9 +33,10 @@ begin
     text << options[:line1] if options.has_key?(:line1)
     text << options[:line2] if options.has_key?(:line2)
     chop_by = options[:chop]
+    verbose = options[:verbose]
-    puts 'loading EPUB'
-    b=EPUBChop.get(filename, :chop_by => chop_by.to_sym)
+    puts "loading EPUB #{filename}"
+    b=EPUBChop.get(filename, :chop_by => chop_by.to_sym, :verbose => verbose)
     puts 'chopping EPUB'
     c=b.chop({:base => base.to_s, :words => words, :text => text})
     puts 'rebuilding EPUB'

data/lib/EPUBChop/chop.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+#encoding: UTF-8
 require 'nokogiri'
 require 'epubinfo'
 require 'tempfile'
@@ -38,12 +39,10 @@ module EPUBChop
       return rebuild_epub_from_tmp_dir(extract_dir)
     rescue Zip::ZipError => e
-      raise RuntimeError, "Error processing EPUB. #{e.message}"
+      raise RuntimeError, "Error processing EPUB #{@book.table_of_contents.parser.path}.\n #{e.message}", e.backtrace
     rescue Exception => e
-      puts "Chopping went wrong. #{e.message}"
-      puts e.backtrace
-      return nil
+      puts e.backtrace.join("\n")
+      raise RuntimeError, "Chopping went wrong for #{@book.table_of_contents.parser.path}.\n #{e.message}", e.backtrace
     ensure
       FileUtils.remove_entry_secure(extract_dir)
     end
@@ -80,50 +79,18 @@ module EPUBChop
           else
             #noinspection RubyResolve
-            resource = Nokogiri::XML(@book.table_of_contents.resources[filename]) do |config|
+            resource = Nokogiri::HTML(@book.table_of_contents.resources[filename]) do |config|
+            #resource = Nokogiri::HTML.parse(@book.table_of_contents.resources[filename], 'UTF-8') do |config|
               config.noblanks.nonet
             end
-            resource.css('script').remove
-            resource.css('style').remove
-            resource_text = resource.at_css('body').text.split[0..processed_file_size]
-            #resource_text_length = resource_text.length
-            # get a string that can be found
-            data = nil
-            window_begin = default_window_begin = 5
-            window_end = 0
-            while data.nil?
-              look_for = resource_text[(processed_file_size - window_begin)..(processed_file_size - window_end)]
-              if look_for.nil?
-                window_begin = default_window_begin += 5
-                window_end = 0
-              else
-                data = resource.at_css("*:contains('#{look_for.join(' ')}')")
-                window_begin -= 1
-                window_end += 1
-                if window_begin == window_end
-                  window_begin = default_window_begin += 5
-                  window_end = 0
-                end
-              end
-            end
+            resource.encoding = 'UTF-8'
-            #limit on found string
-            if data
-              next_data = data.next_element
-              while next_data
-                in_resource = resource.css(next_data.css_path)
-                in_resource.remove
-                next_data = data.nil? || data.next_element.to_s.length == 1 ? nil : data.next_element
-              end
-            end
+            resource = chop_file(resource, processed_file_size)
             #persist page
-            File.open("#{extract_dir}/#{filename}", 'w') do |f|
-              f.puts resource.to_xml(:save_with => Nokogiri::XML::Node::SaveOptions::NO_DECLARATION)
+            File.open("#{extract_dir}/#{filename}", 'w:UTF-8') do |f|
+              #  f.puts resource.to_xml(:save_with => Nokogiri::XML::Node::SaveOptions::NO_DECLARATION)
+              f.puts resource.serialize(:encoding => 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::NO_DECLARATION)
             end
           end
@@ -131,6 +98,61 @@ module EPUBChop
       end
     end
+    def chop_file(resource, processed_file_size)
+      #TODO: get a better algorithm to determine where to chop
+      return resource if resource.nil?
+      resource.css('script').remove
+      resource.css('style').remove
+      resource_text = resource.at_css('body').text.split[0..processed_file_size]
+      # get a string that can be found
+      data = nil
+      window_begin = default_window_begin = 5
+      window_end = 0
+      while data.nil?
+        puts "data window:#{(processed_file_size - window_begin)}..#{(processed_file_size - window_end)}" if @verbose
+        processed_window_begin = processed_file_size - window_begin
+        processed_window_end   = processed_file_size - window_end
+        processed_window_begin = 0 if processed_window_begin < 0
+        processed_window_end   = processed_file_size
+        look_for = resource_text[processed_window_begin..processed_window_end]
+        if look_for.nil?
+          window_begin = default_window_begin += 5
+          window_end = 0
+        else
+          look_for.map! {|m| m.gsub("'", "\'")}
+          data = resource.at_css("p:contains(\"#{look_for.join(' ')}\")")
+          data = resource.at_css("body:contains(\"#{look_for.join(' ')}\")") if data.nil?
+          window_begin -= 1
+          window_end += 1
+          if window_begin == window_end
+            window_begin = default_window_begin += 5
+            window_end = 0
+          end
+        end
+      end
+      #limit on found string
+      if data
+        next_data = data.next_element
+        while next_data
+          in_resource = resource.css(next_data.css_path)
+          in_resource.remove
+          next_data = data.nil? || data.next_element.to_s.length == 1 ? nil : data.next_element
+        end
+      end
+      resource
+    end
     def rebuild_epub_from_tmp_dir(extract_dir)
       #zip new ebook
       new_ebook_name = Tempfile.new(['epub', '.epub'], Dir.tmpdir)
@@ -143,7 +165,7 @@ module EPUBChop
       #minetype should be the first entry and should not be zipped. Else FIDO will not know that this is an EPUB
       mimetype = epub_files.delete("#{extract_dir}/mimetype")
-      mimetype_entry = Zip::Entry.new(zipfile, mimetype.sub("#{extract_dir}/", ''), '', '', 0,0, Zip::Entry::STORED)
+      mimetype_entry = Zip::Entry.new(zipfile, mimetype.sub("#{extract_dir}/", ''), '', '', 0, 0, Zip::Entry::STORED)
       zipfile.add(mimetype_entry, mimetype) unless mimetype.nil?
       #all the other files
@@ -164,13 +186,14 @@ module EPUBChop
     #noinspection RubyInstanceMethodNamingConvention
     def remove_unused_images_from_tmp_dir(extract_dir)
-      puts 'removing unused media'
+      puts 'removing unused media' if @verbose
       not_to_be_deleted_images = []
-      all_images = @book.table_of_contents.resources.images.map {|i| i[:uri]}
+      all_images = @book.table_of_contents.resources.images.map { |i| i[:uri] }
       @book.table_of_contents.resources.html.each do |resource|
         file = Nokogiri::HTML(File.read("#{extract_dir}/#{resource[:uri]}"))
         all_images.each do |image|
+          next if image.nil?
           i = image.split('/').last
           data = file.at_css("img[src$='#{i}']")
@@ -182,7 +205,8 @@ module EPUBChop
       to_be_deleted_images = (all_images - not_to_be_deleted_images)
       to_be_deleted_images.each do |image|
-        puts "\t\tremoving #{image}"
+        next if image.nil?
+        puts "\t\tremoving #{image}" if @verbose
         File.delete("#{extract_dir}/#{image}") if File.exists?("#{extract_dir}/#{image}")
       end
@@ -202,13 +226,14 @@ module EPUBChop
       end
       @chop_by = options[:chop_by] || :spine
+      @verbose = options[:verbose] || false
     end
     def empty_file_with_cover(filename)
       number_of_subdirectories = filename.split('/').size - 1
       cover_path = ''
-      number_of_subdirectories.times{ cover_path += '../'}
+      number_of_subdirectories.times { cover_path += '../' }
       cover_path += @book.cover && @book.cover.exists? ? @book.cover.exists?.to_s : ''
@@ -233,7 +258,7 @@ module EPUBChop
       </div>
       <div style='padding-top:10px;'>
-        <h3>#{CGI.escape_html(@book.titles.first ? @book.titles.first : '' )}</h3>
+        <h3>#{CGI.escape_html(@book.titles.first ? @book.titles.first : '')}</h3>
       </div>
       <div>
@@ -259,7 +284,7 @@ DATA
       resource_word_count = {}
       if @book
         resources = @book.table_of_contents.resources.to_a
-        chop_by = @chop_by.eql?(:ncx)  ? @book.table_of_contents.resources.ncx : @book.table_of_contents.resources.spine
+        chop_by = @chop_by.eql?(:ncx) ? @book.table_of_contents.resources.ncx : @book.table_of_contents.resources.spine
         chop_by.each do |resource|
           raw = Nokogiri::HTML(@book.table_of_contents.resources[resource[:uri]]) do |config|

data/lib/EPUBChop/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module EPUBChop
-  VERSION = "0.0.7"
+  VERSION = "0.0.10"
 end

metadata CHANGED Viewed

@@ -1,99 +1,99 @@
 --- !ruby/object:Gem::Specification
 name: EPUBChop
 version: !ruby/object:Gem::Version
-  version: 0.0.7
+  version: 0.0.10
 platform: ruby
 authors:
 - Mehmet Celik
-autorequire:
+autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-02-19 00:00:00.000000000 Z
+date: 2014-02-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
-  requirement: !ruby/object:Gem::Requirement
+  version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
         version: '1.3'
-  type: :development
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
+  requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
         version: '1.3'
+  prerelease: false
+  type: :development
 - !ruby/object:Gem::Dependency
   name: rake
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ! '>='
+    - - '>='
       - !ruby/object:Gem::Version
         version: '0'
-  type: :development
   prerelease: false
+  type: :development
+- !ruby/object:Gem::Dependency
+  name: rspec
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ! '>='
+    - - '>='
       - !ruby/object:Gem::Version
         version: '0'
-- !ruby/object:Gem::Dependency
-  name: rspec
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ! '>='
+    - - '>='
       - !ruby/object:Gem::Version
         version: '0'
-  type: :development
   prerelease: false
+  type: :development
+- !ruby/object:Gem::Dependency
+  name: epubinfo_with_toc
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ! '>='
+    - - '>='
       - !ruby/object:Gem::Version
         version: '0'
-- !ruby/object:Gem::Dependency
-  name: epubinfo_with_toc
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ! '>='
+    - - '>='
       - !ruby/object:Gem::Version
         version: '0'
-  type: :runtime
   prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - ! '>='
-      - !ruby/object:Gem::Version
-        version: '0'
+  type: :runtime
 - !ruby/object:Gem::Dependency
   name: rubyzip
-  requirement: !ruby/object:Gem::Requirement
+  version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
         version: '1.0'
-  type: :runtime
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
+  requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
         version: '1.0'
+  prerelease: false
+  type: :runtime
 - !ruby/object:Gem::Dependency
   name: nokogiri
-  requirement: !ruby/object:Gem::Requirement
+  version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ! '>='
+    - - '>='
       - !ruby/object:Gem::Version
         version: '0'
-  type: :runtime
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
+  requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ! '>='
+    - - '>='
       - !ruby/object:Gem::Version
         version: '0'
+  prerelease: false
+  type: :runtime
 description: Create EPUB previews
 email:
 - mehmet@celik.be
@@ -122,24 +122,24 @@ homepage: https://github.com/mehmetc/EPUBChop
 licenses:
 - MIT
 metadata: {}
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ! '>='
+  - - '>='
     - !ruby/object:Gem::Version
       version: '0'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ! '>='
+  - - '>='
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.1.10
-signing_key:
+rubyforge_project:
+rubygems_version: 2.2.2
+signing_key:
 specification_version: 4
 summary: Removes unwanted content from an EPUB
 test_files: