EPUBChop 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +21 -0
- data/.travis.yml +5 -0
- data/EPUBChop.gemspec +27 -0
- data/Gemfile +4 -0
- data/LICENSE +20 -0
- data/README.md +34 -0
- data/Rakefile +7 -0
- data/bin/epubchop +44 -0
- data/lib/EPUBChop/chop.rb +209 -0
- data/lib/EPUBChop/version.rb +3 -0
- data/lib/EPUBChop.rb +9 -0
- data/lib/trollop.rb +783 -0
- data/spec/Verne_20000_West_pg11393.epub +0 -0
- data/spec/epubchop_spec.rb +33 -0
- data/spec/spec_helper.rb +13 -0
- metadata +148 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
NTk5MTRjMjQ1ZDk2YTEzM2E3MWNkN2ViODEzYzdlYWQ4ODE0YWE4NA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
YzYyMTA1YmYzOWY1N2UwNWRlM2ZiOTIwMmU0NTRhNGQ2YjE3YTc3Nw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MjI4ZDgzNWY0NzI3NmQ4YzAzNzNlNTZkMGMzZDA4MjIwMmVhOTQ4Y2EwODI5
|
10
|
+
ZWZjNDNkNDRhOWI4N2YxYWMxZjc5MDU2MjFkMTIzYWQ2MTk5YmI2YTczZjEx
|
11
|
+
ZWFjMWY4YTgwN2FkYjJiOGNlYTJhNTk5ZGY3N2VlZDE5MGU0NWM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
YWYyZDNiMDFkOGExMWY5MGFiM2Y4MDdkYWQ3MWNlMDkxMTQ2MDkzNzIxOWE1
|
14
|
+
Mzg3ZDBhNGVmMGJiZmQ0ODgzMmQzYmFkZWU2ZmNhYmZkMjUxMzc0NDQzNzE1
|
15
|
+
YWNhOGZhMTVmMzhiODFhMTY1ODAzNGE2MDI5YmE5MzI4MGI3ZmI=
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/EPUBChop.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'EPUBChop/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "EPUBChop"
|
8
|
+
spec.version = EPUBChop::VERSION
|
9
|
+
spec.authors = ["Mehmet Celik"]
|
10
|
+
spec.email = ["mehmet@celik.be"]
|
11
|
+
spec.description = %q{Create EPUB previews}
|
12
|
+
spec.summary = %q{Removes unwanted content from an EPUB}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "rspec"
|
24
|
+
spec.add_runtime_dependency "epubinfo_with_toc"
|
25
|
+
spec.add_runtime_dependency "rubyzip", "~> 1.0"
|
26
|
+
spec.add_runtime_dependency "nokogiri"
|
27
|
+
end
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2013 Mehmet Celik
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
10
|
+
subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
EPUBChop [](http://travis-ci.org/mehmetc/EPUBChop)
|
2
|
+
========
|
3
|
+
|
4
|
+
Creates EPUB previews
|
5
|
+
|
6
|
+
```
|
7
|
+
$ ./bin/epubchop --help
|
8
|
+
EPUBChop will create a preview version of an EPUB file.
|
9
|
+
|
10
|
+
Usage:
|
11
|
+
epubchop [options] <filename>
|
12
|
+
|
13
|
+
where [options] are:
|
14
|
+
--words, -w <i>: the amount of words to put in the preview (default: 10)
|
15
|
+
--base, -b <s>: if given the base value of the amount of words is ... Possible values percentage (default: percentage)
|
16
|
+
--line1, -l <s>: Text that is shown on line 1 of the chopped of pages (default: Continue reading?)
|
17
|
+
--line2, -i <s>: Text that is shown on line 2 of the chopped of pages (default: Go to your local library or buy the book.)
|
18
|
+
--help, -h: Show this message
|
19
|
+
```
|
20
|
+
|
21
|
+
### Example:
|
22
|
+
```ruby
|
23
|
+
epubchop --words 10 --base percentage -line1 "Want to read more?" -line2 "Buy the book!" my.epub
|
24
|
+
```
|
25
|
+
|
26
|
+
## Contributing to EPUBChop
|
27
|
+
* Fork the project.
|
28
|
+
* Create a new branch to implement your bugfixes or features
|
29
|
+
* Commit and push until you are happy with your contribution.
|
30
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
31
|
+
|
32
|
+
## Copyright
|
33
|
+
|
34
|
+
Copyright (c) 2013 LIBIS/KULeuven, Mehmet Celik. See LICENSE for further details.
|
data/Rakefile
ADDED
data/bin/epubchop
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH << './lib'
|
3
|
+
require 'EPUBChop'
|
4
|
+
require 'trollop'
|
5
|
+
|
6
|
+
options = Trollop::options do
|
7
|
+
version = "EPUBChop #{EPUBChop::VERSION} (c) 2013 LIBIS/KULeuven, Mehmet Celik"
|
8
|
+
banner <<-BANNER
|
9
|
+
EPUBChop will create a preview version of an EPUB file.
|
10
|
+
|
11
|
+
Usage:
|
12
|
+
epubchop [options] <filename>
|
13
|
+
|
14
|
+
where [options] are:
|
15
|
+
BANNER
|
16
|
+
|
17
|
+
opt :words, "the amount of words to put in the preview", :type => :int, :default => 10
|
18
|
+
opt :base, "How to interprete the --words options... Possible value: percentage", :type => :string, :default => 'percentage'
|
19
|
+
opt :line1, "Text that is shown on line 1 of the chopped pages", :type => :string, :default => 'Continue reading?'
|
20
|
+
opt :line2, "Text that is shown on line 2 of the chopped pages", :type => :string, :default => 'Go to your local library or buy the book.'
|
21
|
+
end
|
22
|
+
|
23
|
+
Trollop::die "need an EPUB file name" if ARGV.empty?
|
24
|
+
|
25
|
+
|
26
|
+
begin
|
27
|
+
filename = File.expand_path(ARGV[0])
|
28
|
+
words = options[:words]
|
29
|
+
base = options[:base]
|
30
|
+
text = []
|
31
|
+
text << options[:line1] if options.has_key?(:line1)
|
32
|
+
text << options[:line2] if options.has_key?(:line2)
|
33
|
+
|
34
|
+
puts 'loading EPUB'
|
35
|
+
b=EPUBChop.get(filename)
|
36
|
+
puts 'chopping EPUB'
|
37
|
+
c=b.chop({:base => base.to_s, :words => words, :text => text})
|
38
|
+
puts 'rebuilding EPUB'
|
39
|
+
FileUtils.move(c, "chopped_#{File.basename(filename)}")
|
40
|
+
|
41
|
+
rescue Exception => e
|
42
|
+
puts "An error occured\n#{e.message}"
|
43
|
+
exit 1
|
44
|
+
end
|
@@ -0,0 +1,209 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'epubinfo'
|
3
|
+
require 'tempfile'
|
4
|
+
require 'zip'
|
5
|
+
|
6
|
+
module EPUBChop
|
7
|
+
class Chop
|
8
|
+
attr_reader :book, :words, :base, :resource_word_count, :resource_allowed_word_count, :text1, :text2
|
9
|
+
|
10
|
+
def initialize(input, options ={})
|
11
|
+
set_defaults(options)
|
12
|
+
|
13
|
+
|
14
|
+
raise 'Please supply an input file name' if input.nil?
|
15
|
+
|
16
|
+
#count the number of words in a file
|
17
|
+
@resource_word_count = count_words(input)
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
def total_words
|
22
|
+
@resource_word_count.values.inject(0) { |sum, i| sum + i }
|
23
|
+
end
|
24
|
+
|
25
|
+
def resource_allowed_word_count
|
26
|
+
#figure out what to return
|
27
|
+
@resource_allowed_word_count ||= files_allowed(allowed_words(@words, @base))
|
28
|
+
end
|
29
|
+
|
30
|
+
def chop(options = {})
|
31
|
+
set_defaults(options)
|
32
|
+
|
33
|
+
original_zip_file = @book.table_of_contents.parser.zip_file
|
34
|
+
#unzip in temp dir
|
35
|
+
extract_dir = Dir.mktmpdir('epub_extract')
|
36
|
+
original_zip_file.entries.each do |e|
|
37
|
+
file_dir = File.split(e.name)[0]
|
38
|
+
Dir.mkdir(File.join(extract_dir, file_dir)) unless Dir.exists?(File.join(extract_dir, file_dir)) || file_dir.eql?(".")
|
39
|
+
original_zip_file.extract(e, File.join(extract_dir, e.name))
|
40
|
+
end
|
41
|
+
|
42
|
+
#fix spine files
|
43
|
+
filename_list = @resource_word_count.keys
|
44
|
+
filename_list.each do |filename|
|
45
|
+
original_file_size = @resource_word_count[filename]
|
46
|
+
processed_file_size = resource_allowed_word_count[filename]
|
47
|
+
|
48
|
+
if original_file_size != processed_file_size
|
49
|
+
if processed_file_size == 0
|
50
|
+
FileUtils.rm("#{extract_dir}/#{filename}", :force => true)
|
51
|
+
FileUtils.touch "#{extract_dir}/#{filename}"
|
52
|
+
File.open("#{extract_dir}/#{filename}", 'w') do |f|
|
53
|
+
f.puts empty_file
|
54
|
+
end
|
55
|
+
|
56
|
+
else
|
57
|
+
resource = Nokogiri::XML(@book.table_of_contents.resources[filename]) do |config|
|
58
|
+
config.noblanks.nonet
|
59
|
+
end
|
60
|
+
resource.css('script').remove
|
61
|
+
resource.css('style').remove
|
62
|
+
resource_text = resource.at_css('body').text.split[0..processed_file_size]
|
63
|
+
resource_text_length = resource_text.length
|
64
|
+
|
65
|
+
# get a string that can be found
|
66
|
+
data = nil
|
67
|
+
window_begin = 5
|
68
|
+
window_end = 0
|
69
|
+
while data.nil?
|
70
|
+
look_for = resource_text[(processed_file_size - window_begin)..(processed_file_size - window_end)].join(' ')
|
71
|
+
data = resource.at_css("p:contains('#{look_for}')")
|
72
|
+
window_begin += 1
|
73
|
+
window_end += 1
|
74
|
+
end
|
75
|
+
|
76
|
+
#limit on found string
|
77
|
+
if data
|
78
|
+
next_data = data.next_element
|
79
|
+
while next_data
|
80
|
+
in_resource = resource.css(next_data.css_path)
|
81
|
+
in_resource.remove
|
82
|
+
|
83
|
+
next_data = data.nil? || data.next_element.to_s.length == 1 ? nil : data.next_element
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
#persist page
|
88
|
+
File.open("#{extract_dir}/#{filename}", 'w') do |f|
|
89
|
+
f.puts resource.to_xml(:save_with => Nokogiri::XML::Node::SaveOptions::NO_DECLARATION)
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
#TODO:remove unwanted media
|
96
|
+
|
97
|
+
#zip new ebook
|
98
|
+
new_ebook_name = Tempfile.new(['epub', '.epub'], Dir.tmpdir)
|
99
|
+
new_ebook_name_path = new_ebook_name.path
|
100
|
+
new_ebook_name_path.gsub!('-', '')
|
101
|
+
|
102
|
+
zipfile = Zip::File.open(new_ebook_name_path, Zip::File::CREATE)
|
103
|
+
|
104
|
+
Dir[File.join(extract_dir, '**', '**')].each do |file|
|
105
|
+
zipfile.add(file.sub("#{extract_dir}/", ''), file)
|
106
|
+
end
|
107
|
+
zipfile.close
|
108
|
+
|
109
|
+
return new_ebook_name_path
|
110
|
+
rescue Zip::ZipError => e
|
111
|
+
raise RuntimeError, ''
|
112
|
+
rescue Exception => e
|
113
|
+
puts "Chopping went wrong. #{e.message}"
|
114
|
+
puts e.backtrace
|
115
|
+
|
116
|
+
return nil
|
117
|
+
ensure
|
118
|
+
FileUtils.remove_entry_secure(extract_dir)
|
119
|
+
end
|
120
|
+
|
121
|
+
private
|
122
|
+
|
123
|
+
def set_defaults(options)
|
124
|
+
@words = options[:words] || 10
|
125
|
+
@base = options[:base] || :percentage
|
126
|
+
if options[:text].is_a?(Array)
|
127
|
+
@text1 = options[:text][0] || 'Continue reading?'
|
128
|
+
@text2 = options[:text][1] || 'Go to your local library or buy the book.'
|
129
|
+
else
|
130
|
+
@text1 = options[:text] || 'Continue reading? Go to your local library or buy the book.'
|
131
|
+
@text2 = ''
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def empty_file
|
136
|
+
data = <<DATA
|
137
|
+
<?xml version="1.0" encoding="utf-8" standalone="no"?>
|
138
|
+
<!DOCTYPE html>
|
139
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
140
|
+
<head>
|
141
|
+
<title>Read more</title>
|
142
|
+
</head>
|
143
|
+
<body>
|
144
|
+
<center>
|
145
|
+
<div style='width:100%;border:1px solid black;margin-top:20px;padding:5px'>
|
146
|
+
<div><h2>#{@text1}</h2></div>
|
147
|
+
<div><h2>#{@text2}</h2></div>
|
148
|
+
</div>
|
149
|
+
</center>
|
150
|
+
</body>
|
151
|
+
</html>
|
152
|
+
DATA
|
153
|
+
end
|
154
|
+
|
155
|
+
def count_words(input)
|
156
|
+
@book = EPUBInfo.get(input)
|
157
|
+
resource_word_count = {}
|
158
|
+
if @book
|
159
|
+
@book.table_of_contents.resources.spine.each do |resource|
|
160
|
+
raw = Nokogiri::HTML(@book.table_of_contents.resources[resource[:uri]]) do |config|
|
161
|
+
config.noblanks.nonet
|
162
|
+
end
|
163
|
+
raw.css('script').remove
|
164
|
+
raw.css('style').remove
|
165
|
+
size = raw.at_css('body').text.split.size
|
166
|
+
resource_word_count.store(resource[:uri], size)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
# resource_word_count.values.inject(0){|sum, i| sum + i}
|
170
|
+
resource_word_count
|
171
|
+
end
|
172
|
+
|
173
|
+
def allowed_words(words, base)
|
174
|
+
@allowed_words ||= begin
|
175
|
+
case base.to_s
|
176
|
+
when 'percentage'
|
177
|
+
@allowed_words = (total_words * (words / 100.0)).to_i
|
178
|
+
else
|
179
|
+
@allowed_words = words
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
184
|
+
|
185
|
+
def files_allowed(allowed_words)
|
186
|
+
word_counter = 0
|
187
|
+
resource_allowed_word_count = @resource_word_count.select do |r|
|
188
|
+
(word_counter += @resource_word_count[r]) < allowed_words
|
189
|
+
end
|
190
|
+
word_counter = resource_allowed_word_count.values.inject(0) { |sum, i| sum + i }
|
191
|
+
|
192
|
+
how_many_words_left = allowed_words - word_counter
|
193
|
+
if how_many_words_left > 0
|
194
|
+
resource_to_split_name = @resource_word_count.keys[resource_allowed_word_count.length]
|
195
|
+
word_count_of_resource_to_split = @resource_word_count[resource_to_split_name]
|
196
|
+
if how_many_words_left < word_count_of_resource_to_split
|
197
|
+
resource_allowed_word_count.store(resource_to_split_name, how_many_words_left)
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
@resource_word_count.keys[resource_allowed_word_count.length..@resource_word_count.length].each do |r|
|
202
|
+
resource_allowed_word_count.store(r, 0)
|
203
|
+
end
|
204
|
+
|
205
|
+
resource_allowed_word_count
|
206
|
+
end
|
207
|
+
|
208
|
+
end
|
209
|
+
end
|