EPUBChop 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NTk5MTRjMjQ1ZDk2YTEzM2E3MWNkN2ViODEzYzdlYWQ4ODE0YWE4NA==
5
+ data.tar.gz: !binary |-
6
+ YzYyMTA1YmYzOWY1N2UwNWRlM2ZiOTIwMmU0NTRhNGQ2YjE3YTc3Nw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ MjI4ZDgzNWY0NzI3NmQ4YzAzNzNlNTZkMGMzZDA4MjIwMmVhOTQ4Y2EwODI5
10
+ ZWZjNDNkNDRhOWI4N2YxYWMxZjc5MDU2MjFkMTIzYWQ2MTk5YmI2YTczZjEx
11
+ ZWFjMWY4YTgwN2FkYjJiOGNlYTJhNTk5ZGY3N2VlZDE5MGU0NWM=
12
+ data.tar.gz: !binary |-
13
+ YWYyZDNiMDFkOGExMWY5MGFiM2Y4MDdkYWQ3MWNlMDkxMTQ2MDkzNzIxOWE1
14
+ Mzg3ZDBhNGVmMGJiZmQ0ODgzMmQzYmFkZWU2ZmNhYmZkMjUxMzc0NDQzNzE1
15
+ YWNhOGZhMTVmMzhiODFhMTY1ODAzNGE2MDI5YmE5MzI4MGI3ZmI=
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
19
+ .idea
20
+ Gemfile.lock
21
+ .DS_Store
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - 2.0.0
5
+ - jruby-19mode
data/EPUBChop.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'EPUBChop/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "EPUBChop"
8
+ spec.version = EPUBChop::VERSION
9
+ spec.authors = ["Mehmet Celik"]
10
+ spec.email = ["mehmet@celik.be"]
11
+ spec.description = %q{Create EPUB previews}
12
+ spec.summary = %q{Removes unwanted content from an EPUB}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
24
+ spec.add_runtime_dependency "epubinfo_with_toc"
25
+ spec.add_runtime_dependency "rubyzip", "~> 1.0"
26
+ spec.add_runtime_dependency "nokogiri"
27
+ end
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in EPUBChop.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 Mehmet Celik
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ EPUBChop [![Continuous Integration](https://travis-ci.org/mehmetc/EPUBChop.png?branch=master)](http://travis-ci.org/mehmetc/EPUBChop)
2
+ ========
3
+
4
+ Creates EPUB previews
5
+
6
+ ```
7
+ $ ./bin/epubchop --help
8
+ EPUBChop will create a preview version of an EPUB file.
9
+
10
+ Usage:
11
+ epubchop [options] <filename>
12
+
13
+ where [options] are:
14
+ --words, -w <i>: the amount of words to put in the preview (default: 10)
15
+ --base, -b <s>: if given the base value of the amount of words is ... Possible values percentage (default: percentage)
16
+ --line1, -l <s>: Text that is shown on line 1 of the chopped of pages (default: Continue reading?)
17
+ --line2, -i <s>: Text that is shown on line 2 of the chopped of pages (default: Go to your local library or buy the book.)
18
+ --help, -h: Show this message
19
+ ```
20
+
21
+ ### Example:
22
+ ```ruby
23
+ epubchop --words 10 --base percentage -line1 "Want to read more?" -line2 "Buy the book!" my.epub
24
+ ```
25
+
26
+ ## Contributing to EPUBChop
27
+ * Fork the project.
28
+ * Create a new branch to implement your bugfixes or features
29
+ * Commit and push until you are happy with your contribution.
30
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
31
+
32
+ ## Copyright
33
+
34
+ Copyright (c) 2013 LIBIS/KULeuven, Mehmet Celik. See LICENSE for further details.
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new('spec')
5
+
6
+ desc "run tests"
7
+ task :default => :spec
data/bin/epubchop ADDED
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH << './lib'
3
+ require 'EPUBChop'
4
+ require 'trollop'
5
+
6
+ options = Trollop::options do
7
+ version = "EPUBChop #{EPUBChop::VERSION} (c) 2013 LIBIS/KULeuven, Mehmet Celik"
8
+ banner <<-BANNER
9
+ EPUBChop will create a preview version of an EPUB file.
10
+
11
+ Usage:
12
+ epubchop [options] <filename>
13
+
14
+ where [options] are:
15
+ BANNER
16
+
17
+ opt :words, "the amount of words to put in the preview", :type => :int, :default => 10
18
+ opt :base, "How to interprete the --words options... Possible value: percentage", :type => :string, :default => 'percentage'
19
+ opt :line1, "Text that is shown on line 1 of the chopped pages", :type => :string, :default => 'Continue reading?'
20
+ opt :line2, "Text that is shown on line 2 of the chopped pages", :type => :string, :default => 'Go to your local library or buy the book.'
21
+ end
22
+
23
+ Trollop::die "need an EPUB file name" if ARGV.empty?
24
+
25
+
26
+ begin
27
+ filename = File.expand_path(ARGV[0])
28
+ words = options[:words]
29
+ base = options[:base]
30
+ text = []
31
+ text << options[:line1] if options.has_key?(:line1)
32
+ text << options[:line2] if options.has_key?(:line2)
33
+
34
+ puts 'loading EPUB'
35
+ b=EPUBChop.get(filename)
36
+ puts 'chopping EPUB'
37
+ c=b.chop({:base => base.to_s, :words => words, :text => text})
38
+ puts 'rebuilding EPUB'
39
+ FileUtils.move(c, "chopped_#{File.basename(filename)}")
40
+
41
+ rescue Exception => e
42
+ puts "An error occured\n#{e.message}"
43
+ exit 1
44
+ end
@@ -0,0 +1,209 @@
1
+ require 'nokogiri'
2
+ require 'epubinfo'
3
+ require 'tempfile'
4
+ require 'zip'
5
+
6
+ module EPUBChop
7
+ class Chop
8
+ attr_reader :book, :words, :base, :resource_word_count, :resource_allowed_word_count, :text1, :text2
9
+
10
+ def initialize(input, options ={})
11
+ set_defaults(options)
12
+
13
+
14
+ raise 'Please supply an input file name' if input.nil?
15
+
16
+ #count the number of words in a file
17
+ @resource_word_count = count_words(input)
18
+
19
+ end
20
+
21
+ def total_words
22
+ @resource_word_count.values.inject(0) { |sum, i| sum + i }
23
+ end
24
+
25
+ def resource_allowed_word_count
26
+ #figure out what to return
27
+ @resource_allowed_word_count ||= files_allowed(allowed_words(@words, @base))
28
+ end
29
+
30
+ def chop(options = {})
31
+ set_defaults(options)
32
+
33
+ original_zip_file = @book.table_of_contents.parser.zip_file
34
+ #unzip in temp dir
35
+ extract_dir = Dir.mktmpdir('epub_extract')
36
+ original_zip_file.entries.each do |e|
37
+ file_dir = File.split(e.name)[0]
38
+ Dir.mkdir(File.join(extract_dir, file_dir)) unless Dir.exists?(File.join(extract_dir, file_dir)) || file_dir.eql?(".")
39
+ original_zip_file.extract(e, File.join(extract_dir, e.name))
40
+ end
41
+
42
+ #fix spine files
43
+ filename_list = @resource_word_count.keys
44
+ filename_list.each do |filename|
45
+ original_file_size = @resource_word_count[filename]
46
+ processed_file_size = resource_allowed_word_count[filename]
47
+
48
+ if original_file_size != processed_file_size
49
+ if processed_file_size == 0
50
+ FileUtils.rm("#{extract_dir}/#{filename}", :force => true)
51
+ FileUtils.touch "#{extract_dir}/#{filename}"
52
+ File.open("#{extract_dir}/#{filename}", 'w') do |f|
53
+ f.puts empty_file
54
+ end
55
+
56
+ else
57
+ resource = Nokogiri::XML(@book.table_of_contents.resources[filename]) do |config|
58
+ config.noblanks.nonet
59
+ end
60
+ resource.css('script').remove
61
+ resource.css('style').remove
62
+ resource_text = resource.at_css('body').text.split[0..processed_file_size]
63
+ resource_text_length = resource_text.length
64
+
65
+ # get a string that can be found
66
+ data = nil
67
+ window_begin = 5
68
+ window_end = 0
69
+ while data.nil?
70
+ look_for = resource_text[(processed_file_size - window_begin)..(processed_file_size - window_end)].join(' ')
71
+ data = resource.at_css("p:contains('#{look_for}')")
72
+ window_begin += 1
73
+ window_end += 1
74
+ end
75
+
76
+ #limit on found string
77
+ if data
78
+ next_data = data.next_element
79
+ while next_data
80
+ in_resource = resource.css(next_data.css_path)
81
+ in_resource.remove
82
+
83
+ next_data = data.nil? || data.next_element.to_s.length == 1 ? nil : data.next_element
84
+ end
85
+ end
86
+
87
+ #persist page
88
+ File.open("#{extract_dir}/#{filename}", 'w') do |f|
89
+ f.puts resource.to_xml(:save_with => Nokogiri::XML::Node::SaveOptions::NO_DECLARATION)
90
+ end
91
+
92
+ end
93
+ end
94
+ end
95
+ #TODO:remove unwanted media
96
+
97
+ #zip new ebook
98
+ new_ebook_name = Tempfile.new(['epub', '.epub'], Dir.tmpdir)
99
+ new_ebook_name_path = new_ebook_name.path
100
+ new_ebook_name_path.gsub!('-', '')
101
+
102
+ zipfile = Zip::File.open(new_ebook_name_path, Zip::File::CREATE)
103
+
104
+ Dir[File.join(extract_dir, '**', '**')].each do |file|
105
+ zipfile.add(file.sub("#{extract_dir}/", ''), file)
106
+ end
107
+ zipfile.close
108
+
109
+ return new_ebook_name_path
110
+ rescue Zip::ZipError => e
111
+ raise RuntimeError, ''
112
+ rescue Exception => e
113
+ puts "Chopping went wrong. #{e.message}"
114
+ puts e.backtrace
115
+
116
+ return nil
117
+ ensure
118
+ FileUtils.remove_entry_secure(extract_dir)
119
+ end
120
+
121
+ private
122
+
123
+ def set_defaults(options)
124
+ @words = options[:words] || 10
125
+ @base = options[:base] || :percentage
126
+ if options[:text].is_a?(Array)
127
+ @text1 = options[:text][0] || 'Continue reading?'
128
+ @text2 = options[:text][1] || 'Go to your local library or buy the book.'
129
+ else
130
+ @text1 = options[:text] || 'Continue reading? Go to your local library or buy the book.'
131
+ @text2 = ''
132
+ end
133
+ end
134
+
135
+ def empty_file
136
+ data = <<DATA
137
+ <?xml version="1.0" encoding="utf-8" standalone="no"?>
138
+ <!DOCTYPE html>
139
+ <html xmlns="http://www.w3.org/1999/xhtml">
140
+ <head>
141
+ <title>Read more</title>
142
+ </head>
143
+ <body>
144
+ <center>
145
+ <div style='width:100%;border:1px solid black;margin-top:20px;padding:5px'>
146
+ <div><h2>#{@text1}</h2></div>
147
+ <div><h2>#{@text2}</h2></div>
148
+ </div>
149
+ </center>
150
+ </body>
151
+ </html>
152
+ DATA
153
+ end
154
+
155
+ def count_words(input)
156
+ @book = EPUBInfo.get(input)
157
+ resource_word_count = {}
158
+ if @book
159
+ @book.table_of_contents.resources.spine.each do |resource|
160
+ raw = Nokogiri::HTML(@book.table_of_contents.resources[resource[:uri]]) do |config|
161
+ config.noblanks.nonet
162
+ end
163
+ raw.css('script').remove
164
+ raw.css('style').remove
165
+ size = raw.at_css('body').text.split.size
166
+ resource_word_count.store(resource[:uri], size)
167
+ end
168
+ end
169
+ # resource_word_count.values.inject(0){|sum, i| sum + i}
170
+ resource_word_count
171
+ end
172
+
173
+ def allowed_words(words, base)
174
+ @allowed_words ||= begin
175
+ case base.to_s
176
+ when 'percentage'
177
+ @allowed_words = (total_words * (words / 100.0)).to_i
178
+ else
179
+ @allowed_words = words
180
+ end
181
+ end
182
+
183
+ end
184
+
185
+ def files_allowed(allowed_words)
186
+ word_counter = 0
187
+ resource_allowed_word_count = @resource_word_count.select do |r|
188
+ (word_counter += @resource_word_count[r]) < allowed_words
189
+ end
190
+ word_counter = resource_allowed_word_count.values.inject(0) { |sum, i| sum + i }
191
+
192
+ how_many_words_left = allowed_words - word_counter
193
+ if how_many_words_left > 0
194
+ resource_to_split_name = @resource_word_count.keys[resource_allowed_word_count.length]
195
+ word_count_of_resource_to_split = @resource_word_count[resource_to_split_name]
196
+ if how_many_words_left < word_count_of_resource_to_split
197
+ resource_allowed_word_count.store(resource_to_split_name, how_many_words_left)
198
+ end
199
+ end
200
+
201
+ @resource_word_count.keys[resource_allowed_word_count.length..@resource_word_count.length].each do |r|
202
+ resource_allowed_word_count.store(r, 0)
203
+ end
204
+
205
+ resource_allowed_word_count
206
+ end
207
+
208
+ end
209
+ end
@@ -0,0 +1,3 @@
1
+ module EPUBChop
2
+ VERSION = "0.0.1"
3
+ end
data/lib/EPUBChop.rb ADDED
@@ -0,0 +1,9 @@
1
+ require "EPUBChop/version"
2
+ require 'EPUBChop/chop'
3
+
4
+
5
+ module EPUBChop
6
+ def self.get(path, options = {})
7
+ EPUBChop::Chop.new(path, options)
8
+ end
9
+ end