EPUBChop 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NTk5MTRjMjQ1ZDk2YTEzM2E3MWNkN2ViODEzYzdlYWQ4ODE0YWE4NA==
5
+ data.tar.gz: !binary |-
6
+ YzYyMTA1YmYzOWY1N2UwNWRlM2ZiOTIwMmU0NTRhNGQ2YjE3YTc3Nw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ MjI4ZDgzNWY0NzI3NmQ4YzAzNzNlNTZkMGMzZDA4MjIwMmVhOTQ4Y2EwODI5
10
+ ZWZjNDNkNDRhOWI4N2YxYWMxZjc5MDU2MjFkMTIzYWQ2MTk5YmI2YTczZjEx
11
+ ZWFjMWY4YTgwN2FkYjJiOGNlYTJhNTk5ZGY3N2VlZDE5MGU0NWM=
12
+ data.tar.gz: !binary |-
13
+ YWYyZDNiMDFkOGExMWY5MGFiM2Y4MDdkYWQ3MWNlMDkxMTQ2MDkzNzIxOWE1
14
+ Mzg3ZDBhNGVmMGJiZmQ0ODgzMmQzYmFkZWU2ZmNhYmZkMjUxMzc0NDQzNzE1
15
+ YWNhOGZhMTVmMzhiODFhMTY1ODAzNGE2MDI5YmE5MzI4MGI3ZmI=
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
19
+ .idea
20
+ Gemfile.lock
21
+ .DS_Store
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - 2.0.0
5
+ - jruby-19mode
data/EPUBChop.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'EPUBChop/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "EPUBChop"
8
+ spec.version = EPUBChop::VERSION
9
+ spec.authors = ["Mehmet Celik"]
10
+ spec.email = ["mehmet@celik.be"]
11
+ spec.description = %q{Create EPUB previews}
12
+ spec.summary = %q{Removes unwanted content from an EPUB}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
24
+ spec.add_runtime_dependency "epubinfo_with_toc"
25
+ spec.add_runtime_dependency "rubyzip", "~> 1.0"
26
+ spec.add_runtime_dependency "nokogiri"
27
+ end
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in EPUBChop.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 Mehmet Celik
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ EPUBChop [![Continuous Integration](https://travis-ci.org/mehmetc/EPUBChop.png?branch=master)](http://travis-ci.org/mehmetc/EPUBChop)
2
+ ========
3
+
4
+ Creates EPUB previews
5
+
6
+ ```
7
+ $ ./bin/epubchop --help
8
+ EPUBChop will create a preview version of an EPUB file.
9
+
10
+ Usage:
11
+ epubchop [options] <filename>
12
+
13
+ where [options] are:
14
+ --words, -w <i>: the amount of words to put in the preview (default: 10)
15
+ --base, -b <s>: if given the base value of the amount of words is ... Possible values percentage (default: percentage)
16
+ --line1, -l <s>: Text that is shown on line 1 of the chopped of pages (default: Continue reading?)
17
+ --line2, -i <s>: Text that is shown on line 2 of the chopped of pages (default: Go to your local library or buy the book.)
18
+ --help, -h: Show this message
19
+ ```
20
+
21
+ ### Example:
22
+ ```ruby
23
+ epubchop --words 10 --base percentage -line1 "Want to read more?" -line2 "Buy the book!" my.epub
24
+ ```
25
+
26
+ ## Contributing to EPUBChop
27
+ * Fork the project.
28
+ * Create a new branch to implement your bugfixes or features
29
+ * Commit and push until you are happy with your contribution.
30
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
31
+
32
+ ## Copyright
33
+
34
+ Copyright (c) 2013 LIBIS/KULeuven, Mehmet Celik. See LICENSE for further details.
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new('spec')
5
+
6
+ desc "run tests"
7
+ task :default => :spec
data/bin/epubchop ADDED
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH << './lib'
3
+ require 'EPUBChop'
4
+ require 'trollop'
5
+
6
+ options = Trollop::options do
7
+ version = "EPUBChop #{EPUBChop::VERSION} (c) 2013 LIBIS/KULeuven, Mehmet Celik"
8
+ banner <<-BANNER
9
+ EPUBChop will create a preview version of an EPUB file.
10
+
11
+ Usage:
12
+ epubchop [options] <filename>
13
+
14
+ where [options] are:
15
+ BANNER
16
+
17
+ opt :words, "the amount of words to put in the preview", :type => :int, :default => 10
18
+ opt :base, "How to interprete the --words options... Possible value: percentage", :type => :string, :default => 'percentage'
19
+ opt :line1, "Text that is shown on line 1 of the chopped pages", :type => :string, :default => 'Continue reading?'
20
+ opt :line2, "Text that is shown on line 2 of the chopped pages", :type => :string, :default => 'Go to your local library or buy the book.'
21
+ end
22
+
23
+ Trollop::die "need an EPUB file name" if ARGV.empty?
24
+
25
+
26
+ begin
27
+ filename = File.expand_path(ARGV[0])
28
+ words = options[:words]
29
+ base = options[:base]
30
+ text = []
31
+ text << options[:line1] if options.has_key?(:line1)
32
+ text << options[:line2] if options.has_key?(:line2)
33
+
34
+ puts 'loading EPUB'
35
+ b=EPUBChop.get(filename)
36
+ puts 'chopping EPUB'
37
+ c=b.chop({:base => base.to_s, :words => words, :text => text})
38
+ puts 'rebuilding EPUB'
39
+ FileUtils.move(c, "chopped_#{File.basename(filename)}")
40
+
41
+ rescue Exception => e
42
+ puts "An error occured\n#{e.message}"
43
+ exit 1
44
+ end
@@ -0,0 +1,209 @@
1
+ require 'nokogiri'
2
+ require 'epubinfo'
3
+ require 'tempfile'
4
+ require 'zip'
5
+
6
+ module EPUBChop
7
+ class Chop
8
+ attr_reader :book, :words, :base, :resource_word_count, :resource_allowed_word_count, :text1, :text2
9
+
10
+ def initialize(input, options ={})
11
+ set_defaults(options)
12
+
13
+
14
+ raise 'Please supply an input file name' if input.nil?
15
+
16
+ #count the number of words in a file
17
+ @resource_word_count = count_words(input)
18
+
19
+ end
20
+
21
+ def total_words
22
+ @resource_word_count.values.inject(0) { |sum, i| sum + i }
23
+ end
24
+
25
+ def resource_allowed_word_count
26
+ #figure out what to return
27
+ @resource_allowed_word_count ||= files_allowed(allowed_words(@words, @base))
28
+ end
29
+
30
+ def chop(options = {})
31
+ set_defaults(options)
32
+
33
+ original_zip_file = @book.table_of_contents.parser.zip_file
34
+ #unzip in temp dir
35
+ extract_dir = Dir.mktmpdir('epub_extract')
36
+ original_zip_file.entries.each do |e|
37
+ file_dir = File.split(e.name)[0]
38
+ Dir.mkdir(File.join(extract_dir, file_dir)) unless Dir.exists?(File.join(extract_dir, file_dir)) || file_dir.eql?(".")
39
+ original_zip_file.extract(e, File.join(extract_dir, e.name))
40
+ end
41
+
42
+ #fix spine files
43
+ filename_list = @resource_word_count.keys
44
+ filename_list.each do |filename|
45
+ original_file_size = @resource_word_count[filename]
46
+ processed_file_size = resource_allowed_word_count[filename]
47
+
48
+ if original_file_size != processed_file_size
49
+ if processed_file_size == 0
50
+ FileUtils.rm("#{extract_dir}/#{filename}", :force => true)
51
+ FileUtils.touch "#{extract_dir}/#{filename}"
52
+ File.open("#{extract_dir}/#{filename}", 'w') do |f|
53
+ f.puts empty_file
54
+ end
55
+
56
+ else
57
+ resource = Nokogiri::XML(@book.table_of_contents.resources[filename]) do |config|
58
+ config.noblanks.nonet
59
+ end
60
+ resource.css('script').remove
61
+ resource.css('style').remove
62
+ resource_text = resource.at_css('body').text.split[0..processed_file_size]
63
+ resource_text_length = resource_text.length
64
+
65
+ # get a string that can be found
66
+ data = nil
67
+ window_begin = 5
68
+ window_end = 0
69
+ while data.nil?
70
+ look_for = resource_text[(processed_file_size - window_begin)..(processed_file_size - window_end)].join(' ')
71
+ data = resource.at_css("p:contains('#{look_for}')")
72
+ window_begin += 1
73
+ window_end += 1
74
+ end
75
+
76
+ #limit on found string
77
+ if data
78
+ next_data = data.next_element
79
+ while next_data
80
+ in_resource = resource.css(next_data.css_path)
81
+ in_resource.remove
82
+
83
+ next_data = data.nil? || data.next_element.to_s.length == 1 ? nil : data.next_element
84
+ end
85
+ end
86
+
87
+ #persist page
88
+ File.open("#{extract_dir}/#{filename}", 'w') do |f|
89
+ f.puts resource.to_xml(:save_with => Nokogiri::XML::Node::SaveOptions::NO_DECLARATION)
90
+ end
91
+
92
+ end
93
+ end
94
+ end
95
+ #TODO:remove unwanted media
96
+
97
+ #zip new ebook
98
+ new_ebook_name = Tempfile.new(['epub', '.epub'], Dir.tmpdir)
99
+ new_ebook_name_path = new_ebook_name.path
100
+ new_ebook_name_path.gsub!('-', '')
101
+
102
+ zipfile = Zip::File.open(new_ebook_name_path, Zip::File::CREATE)
103
+
104
+ Dir[File.join(extract_dir, '**', '**')].each do |file|
105
+ zipfile.add(file.sub("#{extract_dir}/", ''), file)
106
+ end
107
+ zipfile.close
108
+
109
+ return new_ebook_name_path
110
+ rescue Zip::ZipError => e
111
+ raise RuntimeError, ''
112
+ rescue Exception => e
113
+ puts "Chopping went wrong. #{e.message}"
114
+ puts e.backtrace
115
+
116
+ return nil
117
+ ensure
118
+ FileUtils.remove_entry_secure(extract_dir)
119
+ end
120
+
121
+ private
122
+
123
+ def set_defaults(options)
124
+ @words = options[:words] || 10
125
+ @base = options[:base] || :percentage
126
+ if options[:text].is_a?(Array)
127
+ @text1 = options[:text][0] || 'Continue reading?'
128
+ @text2 = options[:text][1] || 'Go to your local library or buy the book.'
129
+ else
130
+ @text1 = options[:text] || 'Continue reading? Go to your local library or buy the book.'
131
+ @text2 = ''
132
+ end
133
+ end
134
+
135
+ def empty_file
136
+ data = <<DATA
137
+ <?xml version="1.0" encoding="utf-8" standalone="no"?>
138
+ <!DOCTYPE html>
139
+ <html xmlns="http://www.w3.org/1999/xhtml">
140
+ <head>
141
+ <title>Read more</title>
142
+ </head>
143
+ <body>
144
+ <center>
145
+ <div style='width:100%;border:1px solid black;margin-top:20px;padding:5px'>
146
+ <div><h2>#{@text1}</h2></div>
147
+ <div><h2>#{@text2}</h2></div>
148
+ </div>
149
+ </center>
150
+ </body>
151
+ </html>
152
+ DATA
153
+ end
154
+
155
+ def count_words(input)
156
+ @book = EPUBInfo.get(input)
157
+ resource_word_count = {}
158
+ if @book
159
+ @book.table_of_contents.resources.spine.each do |resource|
160
+ raw = Nokogiri::HTML(@book.table_of_contents.resources[resource[:uri]]) do |config|
161
+ config.noblanks.nonet
162
+ end
163
+ raw.css('script').remove
164
+ raw.css('style').remove
165
+ size = raw.at_css('body').text.split.size
166
+ resource_word_count.store(resource[:uri], size)
167
+ end
168
+ end
169
+ # resource_word_count.values.inject(0){|sum, i| sum + i}
170
+ resource_word_count
171
+ end
172
+
173
+ def allowed_words(words, base)
174
+ @allowed_words ||= begin
175
+ case base.to_s
176
+ when 'percentage'
177
+ @allowed_words = (total_words * (words / 100.0)).to_i
178
+ else
179
+ @allowed_words = words
180
+ end
181
+ end
182
+
183
+ end
184
+
185
+ def files_allowed(allowed_words)
186
+ word_counter = 0
187
+ resource_allowed_word_count = @resource_word_count.select do |r|
188
+ (word_counter += @resource_word_count[r]) < allowed_words
189
+ end
190
+ word_counter = resource_allowed_word_count.values.inject(0) { |sum, i| sum + i }
191
+
192
+ how_many_words_left = allowed_words - word_counter
193
+ if how_many_words_left > 0
194
+ resource_to_split_name = @resource_word_count.keys[resource_allowed_word_count.length]
195
+ word_count_of_resource_to_split = @resource_word_count[resource_to_split_name]
196
+ if how_many_words_left < word_count_of_resource_to_split
197
+ resource_allowed_word_count.store(resource_to_split_name, how_many_words_left)
198
+ end
199
+ end
200
+
201
+ @resource_word_count.keys[resource_allowed_word_count.length..@resource_word_count.length].each do |r|
202
+ resource_allowed_word_count.store(r, 0)
203
+ end
204
+
205
+ resource_allowed_word_count
206
+ end
207
+
208
+ end
209
+ end
@@ -0,0 +1,3 @@
1
+ module EPUBChop
2
+ VERSION = "0.0.1"
3
+ end
data/lib/EPUBChop.rb ADDED
@@ -0,0 +1,9 @@
1
+ require "EPUBChop/version"
2
+ require 'EPUBChop/chop'
3
+
4
+
5
+ module EPUBChop
6
+ def self.get(path, options = {})
7
+ EPUBChop::Chop.new(path, options)
8
+ end
9
+ end