manga-downloadr 1.0.4 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/README.md +33 -12
- data/bin/manga-downloadr +33 -35
- data/lib/manga-downloadr.rb +19 -219
- data/lib/manga-downloadr/chapters.rb +15 -0
- data/lib/manga-downloadr/concurrency.rb +28 -0
- data/lib/manga-downloadr/downloadr_client.rb +24 -0
- data/lib/manga-downloadr/image_downloader.rb +19 -0
- data/lib/manga-downloadr/page_image.rb +25 -0
- data/lib/manga-downloadr/pages.rb +10 -0
- data/lib/manga-downloadr/records.rb +4 -0
- data/lib/manga-downloadr/version.rb +1 -1
- data/lib/manga-downloadr/workflow.rb +83 -0
- metadata +23 -44
- data/lib/retryable_typhoeus.rb +0 -40
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b083f3caf90e18e3afa86b91fbbd6d54c1b14236
|
4
|
+
data.tar.gz: 14ba9e5c6f03356dd90446b966acf73caf201b28
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d15ce2ed73541b567ded076918b114b359443256749e5a12d488f0de9271d69ad0ba3be176557b33d41ec4aeb93952216a7398620b7d77a306ad75fb5a760959
|
7
|
+
data.tar.gz: 03a5734f26846feb483d645aaaf504adab7167cd73004a36e7b347e4c08d16927a33cfbf07289bc2b61a2ddf89a244b47dcdf0230c28a257feee2a6f10f41bdb
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -1,29 +1,49 @@
|
|
1
1
|
# Manga Downloadr
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
[![Build Status](https://travis-ci.org/akitaonrails/manga-downloadr.svg)](https://travis-ci.org/akitaonrails/manga-downloadr)
|
4
|
+
[![Code Climate](https://codeclimate.com/repos/54ac0c066956802e06000ffb/badges/441f1f6af106cc32b2b5/gpa.svg)](https://codeclimate.com/repos/54ac0c066956802e06000ffb/feed)
|
5
|
+
[![Test Coverage](https://codeclimate.com/repos/54ac0c066956802e06000ffb/badges/441f1f6af106cc32b2b5/coverage.svg)](https://codeclimate.com/repos/54ac0c066956802e06000ffb/feed)
|
5
6
|
|
6
|
-
|
7
|
-
|
7
|
+
I just bought a new Kindle Paperwhite and so happens it's the perfect form factor to read good old, black and white, mangas.
|
8
|
+
|
9
|
+
So I decided to automate the process of fetching manga images from MangaReader.net, optimize and compile them into PDF files that fit the Kindle resolution.
|
8
10
|
|
9
11
|
## Installation
|
10
12
|
|
11
|
-
|
13
|
+
Setup your environment with:
|
14
|
+
|
15
|
+
sudo apt-get install imagemagick
|
16
|
+
sudo gem install bundler
|
17
|
+
|
18
|
+
And install manga-downloadr with:
|
12
19
|
|
13
|
-
|
14
|
-
gem install manga-downloadr
|
15
|
-
```
|
20
|
+
gem install manga-downloadr
|
16
21
|
|
17
22
|
## Usage
|
18
23
|
|
19
24
|
And then execute:
|
20
25
|
|
21
|
-
$ manga-downloadr -
|
26
|
+
$ manga-downloadr -u http://www.mangareader.net/onepunch-man -d /tmp/onepunch-man
|
27
|
+
|
28
|
+
In this example, all the pages of the "One Punch Man" will be downloaded to the directory "/tmp/onepunch-man" and they will have the following filename format:
|
29
|
+
|
30
|
+
/tmp/onepunch-man/Onepunch-Man-Chap-00038-Pg-00011.jpg
|
31
|
+
|
32
|
+
## Development
|
33
|
+
|
34
|
+
Tests are in Rspec:
|
35
|
+
|
36
|
+
bundle exec rspec
|
37
|
+
|
38
|
+
Version 2.0 is a complete rewrite, following what was learned writing my [Elixir version](https://github.com/akitaonrails/ex_manga_downloadr).
|
39
|
+
|
40
|
+
This is basically a port of the [Crystal version](https://github.com/akitaonrails/cr_manga_downloadr).
|
41
|
+
|
42
|
+
Elixir has superb parallelism and concurrency through Erlang's OTP architecture so it's easy to process hundreds of parallel requests, limited only to what MangaReader can respond.
|
22
43
|
|
23
|
-
|
24
|
-
where it was interrupted before.
|
44
|
+
Crystal is also super fast (because its compiled to native code) and has very good concurrency (through the use of Go-like CSP channels).
|
25
45
|
|
26
|
-
|
46
|
+
This Ruby version uses native Threads. Because this is I/O intensive, we assume we can run several HTTP requests concurrently. But because Threads have significantly more overhead than Elixir or Crystal architectures, we will be limited by Ruby's MRI interpreter.
|
27
47
|
|
28
48
|
## Contributing
|
29
49
|
|
@@ -35,5 +55,6 @@ If you want to restart from scratch, delete the "/tmp/[your manga].yml" that sav
|
|
35
55
|
|
36
56
|
## TODO
|
37
57
|
|
58
|
+
* Version 2.0 removes the crash-recovery (saving state) from Version 1.0 - could be reimplemented
|
38
59
|
* Move MangaReader specifics to a different class
|
39
60
|
* Add support for MangaFox and other manga sites
|
data/bin/manga-downloadr
CHANGED
@@ -1,27 +1,42 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
$LOAD_PATH.unshift File.join(File.dirname(__FILE__),
|
3
|
-
require
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
3
|
+
require "optparse"
|
4
|
+
require "manga-downloadr"
|
5
|
+
|
6
|
+
opt_manga_directory = "/tmp"
|
7
|
+
opt_manga_root_uri = ""
|
8
|
+
opt_batch_size = 50
|
9
|
+
opt_resize_format = "600x800"
|
10
|
+
opt_pages_per_volume = 250
|
4
11
|
|
5
|
-
options = {}
|
6
12
|
option_parser = OptionParser.new do |opts|
|
7
|
-
|
13
|
+
# Set a banner, displayed at the top
|
14
|
+
# of the help screen.
|
15
|
+
opts.banner = "Scraps all the images from all pages of a MangaReader.net manga"
|
16
|
+
|
17
|
+
opts.on( "-d DIRECTORY", "--d DIRECTORY", "the directory path where to save the manga" ) do |directory|
|
18
|
+
opt_manga_directory = directory
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on( "-u URL", "--url URL", "the MangaReader full URI to the chapters index of the manga" ) do |url|
|
22
|
+
opt_manga_root_uri = url
|
23
|
+
end
|
8
24
|
|
9
|
-
opts.on("-
|
10
|
-
|
11
|
-
options[:url] = v
|
25
|
+
opts.on( "-b BATCH_SIZE", "-batch 50", "the amount of concurrent HTTP fetches to the MangaReader site, don't overdo it") do |batch|
|
26
|
+
opt_batch_size = batch.to_i
|
12
27
|
end
|
13
28
|
|
14
|
-
opts.on("-
|
15
|
-
|
16
|
-
options[:name] = n
|
29
|
+
opts.on( "-r FORMAT", "--resize 600x800", "the current Kindle format is 600x800 but you can change it") do |format|
|
30
|
+
opt_resize_format = format
|
17
31
|
end
|
18
32
|
|
19
|
-
opts.on("-
|
20
|
-
|
21
|
-
options[:directory] = d
|
33
|
+
opts.on( "-v PAGES", "--volume 250", "how many pages should each PDF volume have") do |volume|
|
34
|
+
opt_pages_per_volume = volume.to_i
|
22
35
|
end
|
23
36
|
|
24
|
-
|
37
|
+
# This displays the help screen, all programs are
|
38
|
+
# assumed to have this option.
|
39
|
+
opts.on( "-h", "--help", "Display this screen" ) do
|
25
40
|
puts opts
|
26
41
|
exit
|
27
42
|
end
|
@@ -35,25 +50,8 @@ rescue OptionParser::ParseError
|
|
35
50
|
exit
|
36
51
|
end
|
37
52
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
generator.fetch_chapter_urls!
|
43
|
-
end
|
44
|
-
unless generator.state?(:page_urls)
|
45
|
-
puts "\nMassive parallel scanning of all pages "
|
46
|
-
generator.fetch_page_urls!
|
47
|
-
end
|
48
|
-
unless generator.state?(:image_urls)
|
49
|
-
puts "\nMassive parallel scanning of all images "
|
50
|
-
generator.fetch_image_urls!
|
51
|
-
puts "\nTotal page links found: #{generator.chapter_pages_count}"
|
52
|
-
end
|
53
|
-
unless generator.state?(:images)
|
54
|
-
puts "\nMassive parallel download of all page images "
|
55
|
-
generator.fetch_images!
|
53
|
+
if opt_manga_root_uri.size > 0
|
54
|
+
root_uri = URI.parse(opt_manga_root_uri)
|
55
|
+
config = MangaDownloadr::Config.new(root_uri.host, root_uri.path, opt_manga_directory, opt_batch_size, opt_resize_format, opt_pages_per_volume)
|
56
|
+
MangaDownloadr::Workflow.run(config)
|
56
57
|
end
|
57
|
-
puts "\nCompiling all images into PDF volumes "
|
58
|
-
generator.compile_ebooks!
|
59
|
-
puts "\nProcess finished."
|
data/lib/manga-downloadr.rb
CHANGED
@@ -1,219 +1,19 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
# end
|
21
|
-
|
22
|
-
module MangaDownloadr
|
23
|
-
ImageData = Struct.new(:folder, :filename, :url)
|
24
|
-
|
25
|
-
class Workflow
|
26
|
-
attr_accessor :manga_root_url, :manga_root, :manga_root_folder, :manga_name, :hydra_concurrency
|
27
|
-
attr_accessor :chapter_list, :chapter_pages, :chapter_images, :download_links, :chapter_pages_count
|
28
|
-
attr_accessor :manga_title, :pages_per_volume, :page_size
|
29
|
-
attr_accessor :processing_state
|
30
|
-
attr_accessor :fetch_page_urls_errors, :fetch_image_urls_errors, :fetch_images_errors
|
31
|
-
|
32
|
-
def initialize(root_url = nil, manga_name = nil, manga_root = nil, options = {})
|
33
|
-
root_url or raise ArgumentError.new("URL is required")
|
34
|
-
manga_root or raise ArgumentError.new("Manga root folder is required")
|
35
|
-
manga_name or raise ArgumentError.new("Manga slug is required")
|
36
|
-
|
37
|
-
self.manga_root_url = root_url
|
38
|
-
self.manga_root = manga_root
|
39
|
-
self.manga_root_folder = File.join(manga_root, manga_name)
|
40
|
-
self.manga_name = manga_name
|
41
|
-
|
42
|
-
self.hydra_concurrency = options[:hydra_concurrency] || 100
|
43
|
-
|
44
|
-
self.chapter_pages = {}
|
45
|
-
self.chapter_images = {}
|
46
|
-
|
47
|
-
self.pages_per_volume = options[:pages_per_volume] || 250
|
48
|
-
self.page_size = options[:page_size] || [600, 800]
|
49
|
-
|
50
|
-
self.processing_state = []
|
51
|
-
self.fetch_page_urls_errors = []
|
52
|
-
self.fetch_image_urls_errors = []
|
53
|
-
self.fetch_images_errors = []
|
54
|
-
end
|
55
|
-
|
56
|
-
def fetch_chapter_urls!
|
57
|
-
doc = Nokogiri::HTML(open(manga_root_url))
|
58
|
-
|
59
|
-
self.chapter_list = doc.css("#listing a").map { |l| l['href']}
|
60
|
-
self.manga_title = doc.css("#mangaproperties h1").first.text
|
61
|
-
|
62
|
-
current_state :chapter_urls
|
63
|
-
end
|
64
|
-
|
65
|
-
def fetch_page_urls!
|
66
|
-
hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
|
67
|
-
chapter_list.each do |chapter_link|
|
68
|
-
begin
|
69
|
-
request = Typhoeus::Request.new "http://www.mangareader.net#{chapter_link}"
|
70
|
-
request.on_complete do |response|
|
71
|
-
begin
|
72
|
-
chapter_doc = Nokogiri::HTML(response.body)
|
73
|
-
# pages = chapter_doc.css('#selectpage #pageMenu option')
|
74
|
-
pages = chapter_doc.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option")
|
75
|
-
chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] })
|
76
|
-
puts chapter_link
|
77
|
-
# print '.'
|
78
|
-
rescue => e
|
79
|
-
self.fetch_page_urls_errors << { url: chapter_link, error: e, body: response.body }
|
80
|
-
print 'x'
|
81
|
-
end
|
82
|
-
end
|
83
|
-
hydra.queue request
|
84
|
-
rescue => e
|
85
|
-
puts e
|
86
|
-
end
|
87
|
-
end
|
88
|
-
hydra.run
|
89
|
-
unless fetch_page_urls_errors.empty?
|
90
|
-
puts "\n Errors fetching page urls:"
|
91
|
-
puts fetch_page_urls_errors
|
92
|
-
end
|
93
|
-
|
94
|
-
self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size }
|
95
|
-
current_state :page_urls
|
96
|
-
end
|
97
|
-
|
98
|
-
def fetch_image_urls!
|
99
|
-
hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
|
100
|
-
chapter_list.each do |chapter_key|
|
101
|
-
chapter_pages[chapter_key].each do |page_link|
|
102
|
-
begin
|
103
|
-
request = Typhoeus::Request.new "http://www.mangareader.net#{page_link}"
|
104
|
-
request.on_complete do |response|
|
105
|
-
begin
|
106
|
-
chapter_doc = Nokogiri::HTML(response.body)
|
107
|
-
image = chapter_doc.css('#img').first
|
108
|
-
tokens = image['alt'].match("^(.*?)\s\-\s(.*?)$")
|
109
|
-
extension = File.extname(URI.parse(image['src']).path)
|
110
|
-
|
111
|
-
chapter_images.merge!(chapter_key => []) if chapter_images[chapter_key].nil?
|
112
|
-
chapter_images[chapter_key] << ImageData.new( tokens[1], "#{tokens[2]}#{extension}", image['src'] )
|
113
|
-
print '.'
|
114
|
-
rescue => e
|
115
|
-
self.fetch_image_urls_errors << { url: page_link, error: e }
|
116
|
-
print 'x'
|
117
|
-
end
|
118
|
-
end
|
119
|
-
hydra.queue request
|
120
|
-
rescue => e
|
121
|
-
puts e
|
122
|
-
end
|
123
|
-
end
|
124
|
-
end
|
125
|
-
hydra.run
|
126
|
-
unless fetch_image_urls_errors.empty?
|
127
|
-
puts "\nErrors fetching image urls:"
|
128
|
-
puts fetch_image_urls_errors
|
129
|
-
end
|
130
|
-
|
131
|
-
current_state :image_urls
|
132
|
-
end
|
133
|
-
|
134
|
-
def fetch_images!
|
135
|
-
hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
|
136
|
-
chapter_list.each_with_index do |chapter_key, chapter_index|
|
137
|
-
chapter_images[chapter_key].each do |file|
|
138
|
-
downloaded_filename = File.join(manga_root_folder, file.folder, file.filename)
|
139
|
-
next if File.exists?(downloaded_filename) # effectively resumes the download list without re-downloading everything
|
140
|
-
request = Typhoeus::Request.new file.url
|
141
|
-
request.on_complete do |response|
|
142
|
-
begin
|
143
|
-
# download
|
144
|
-
FileUtils.mkdir_p(File.join(manga_root_folder, file.folder))
|
145
|
-
File.open(downloaded_filename, "wb+") { |f| f.write response.body }
|
146
|
-
|
147
|
-
# resize
|
148
|
-
image = Magick::Image.read( downloaded_filename ).first
|
149
|
-
resized = image.resize_to_fit(600, 800)
|
150
|
-
resized.write( downloaded_filename ) { self.quality = 50 }
|
151
|
-
|
152
|
-
print '.'
|
153
|
-
GC.start # to avoid a leak too big (ImageMagick is notorious for that, specially on resizes)
|
154
|
-
rescue => e
|
155
|
-
self.fetch_images_errors << { url: file.url, error: e }
|
156
|
-
print '.'
|
157
|
-
end
|
158
|
-
end
|
159
|
-
hydra.queue request
|
160
|
-
end
|
161
|
-
end
|
162
|
-
hydra.run
|
163
|
-
unless fetch_images_errors.empty?
|
164
|
-
puts "\nErrors downloading images:"
|
165
|
-
puts fetch_images_errors
|
166
|
-
end
|
167
|
-
|
168
|
-
current_state :images
|
169
|
-
end
|
170
|
-
|
171
|
-
def compile_ebooks!
|
172
|
-
folders = Dir[manga_root_folder + "/*/"].sort_by { |element| ary = element.split(" ").last.to_i }
|
173
|
-
self.download_links = folders.inject([]) do |list, folder|
|
174
|
-
list += Dir[folder + "*.*"].sort_by { |element| ary = element.split(" ").last.to_i }
|
175
|
-
end
|
176
|
-
|
177
|
-
# concatenating PDF files (250 pages per volume)
|
178
|
-
chapter_number = 0
|
179
|
-
while !download_links.empty?
|
180
|
-
chapter_number += 1
|
181
|
-
pdf_file = File.join(manga_root_folder, "#{manga_title} #{chapter_number}.pdf")
|
182
|
-
list = download_links.slice!(0..pages_per_volume)
|
183
|
-
Prawn::Document.generate(pdf_file, page_size: page_size) do |pdf|
|
184
|
-
list.each do |image_file|
|
185
|
-
begin
|
186
|
-
pdf.image image_file, position: :center, vposition: :center
|
187
|
-
rescue => e
|
188
|
-
puts "Error in #{image_file} - #{e}"
|
189
|
-
end
|
190
|
-
end
|
191
|
-
end
|
192
|
-
print '.'
|
193
|
-
end
|
194
|
-
|
195
|
-
current_state :ebooks
|
196
|
-
end
|
197
|
-
|
198
|
-
def state?(state)
|
199
|
-
self.processing_state.include?(state)
|
200
|
-
end
|
201
|
-
|
202
|
-
private def current_state(state)
|
203
|
-
self.processing_state << state
|
204
|
-
MangaDownloadr::Workflow.serialize(self)
|
205
|
-
end
|
206
|
-
|
207
|
-
class << self
|
208
|
-
def serialize(obj)
|
209
|
-
File.open("/tmp/#{obj.manga_name}.yaml", 'w') {|f| f.write(YAML::dump(obj)) }
|
210
|
-
end
|
211
|
-
|
212
|
-
def create(root_url, manga_name, manga_root, options = {})
|
213
|
-
dump_file_name = "/tmp/#{manga_name}.yaml"
|
214
|
-
return YAML::load(File.read(dump_file_name)) if File.exists?(dump_file_name)
|
215
|
-
MangaDownloadr::Workflow.new(root_url, manga_name, manga_root, options)
|
216
|
-
end
|
217
|
-
end
|
218
|
-
end
|
219
|
-
end
|
1
|
+
require "rubygems"
|
2
|
+
require "bundler/setup"
|
3
|
+
|
4
|
+
require "chainable_methods"
|
5
|
+
require "nokogiri"
|
6
|
+
require "fileutils"
|
7
|
+
require "net/http"
|
8
|
+
require "open-uri"
|
9
|
+
|
10
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), "lib")
|
11
|
+
|
12
|
+
require "manga-downloadr/records.rb"
|
13
|
+
require "manga-downloadr/downloadr_client.rb"
|
14
|
+
require "manga-downloadr/concurrency.rb"
|
15
|
+
require "manga-downloadr/chapters.rb"
|
16
|
+
require "manga-downloadr/pages.rb"
|
17
|
+
require "manga-downloadr/page_image.rb"
|
18
|
+
require "manga-downloadr/image_downloader.rb"
|
19
|
+
require "manga-downloadr/workflow.rb"
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module MangaDownloadr
|
2
|
+
class Chapters < DownloadrClient
|
3
|
+
def initialize(domain, root_uri)
|
4
|
+
@root_uri = root_uri
|
5
|
+
super(domain)
|
6
|
+
end
|
7
|
+
|
8
|
+
def fetch
|
9
|
+
get @root_uri do |html|
|
10
|
+
nodes = html.css("#listing a")
|
11
|
+
nodes.map { |node| node["href"] }
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module MangaDownloadr
|
2
|
+
class Concurrency
|
3
|
+
def initialize(engine_klass = nil, config = Config.new, turn_on_engine = true)
|
4
|
+
@engine_klass = engine_klass
|
5
|
+
@config = config
|
6
|
+
@turn_on_engine = turn_on_engine
|
7
|
+
end
|
8
|
+
|
9
|
+
def fetch(collection, &block)
|
10
|
+
results = []
|
11
|
+
collection&.each_slice(@config.download_batch_size) do |batch|
|
12
|
+
mutex = Mutex.new
|
13
|
+
threads = batch.map do |item|
|
14
|
+
Thread.new {
|
15
|
+
engine = @turn_on_engine ? @engine_klass.new(@config.domain) : nil
|
16
|
+
Thread.current["results"] = block.call(item, engine)&.flatten
|
17
|
+
mutex.synchronize do
|
18
|
+
results += Thread.current["results"]
|
19
|
+
end
|
20
|
+
}
|
21
|
+
end
|
22
|
+
threads.each(&:join)
|
23
|
+
puts "Processed so far: #{results&.size}"
|
24
|
+
end
|
25
|
+
results
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module MangaDownloadr
|
2
|
+
class DownloadrClient
|
3
|
+
def initialize(domain)
|
4
|
+
@domain = domain
|
5
|
+
@http_client = Net::HTTP.new(@domain)
|
6
|
+
end
|
7
|
+
|
8
|
+
def get(uri, &block)
|
9
|
+
response = @http_client.get(uri)
|
10
|
+
case response.code
|
11
|
+
when "301"
|
12
|
+
get response.headers["Location"], &block
|
13
|
+
when "200"
|
14
|
+
parsed = Nokogiri::HTML(response.body)
|
15
|
+
block.call(parsed)
|
16
|
+
end
|
17
|
+
rescue Net::HTTPGatewayTimeOut, Net::HTTPRequestTimeOut
|
18
|
+
# TODO: naive infinite retry, it will loop infinitely if the link really doesn't exist
|
19
|
+
# so should have a way to control the amount of retries per link
|
20
|
+
sleep 1
|
21
|
+
get(uri, &block)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module MangaDownloadr
|
2
|
+
class ImageDownloader < DownloadrClient
|
3
|
+
def fetch(image_src, filename)
|
4
|
+
File.delete(filename) if File.exists?(filename)
|
5
|
+
response = @http_client.get(image_src)
|
6
|
+
case response.code
|
7
|
+
when "301"
|
8
|
+
fetch(response.headers["Location"], filename)
|
9
|
+
when "200"
|
10
|
+
File.open(filename, "w") do |f|
|
11
|
+
f.print response.body
|
12
|
+
end
|
13
|
+
end
|
14
|
+
rescue Net::HTTPGatewayTimeOut, Net::HTTPRequestTimeOut
|
15
|
+
sleep 1
|
16
|
+
fetch(image_src, filename)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module MangaDownloadr
|
2
|
+
class PageImage < DownloadrClient
|
3
|
+
def fetch(page_link)
|
4
|
+
get page_link do |html|
|
5
|
+
images = html.css('#img')
|
6
|
+
|
7
|
+
image_alt = images[0]["alt"]
|
8
|
+
image_src = images[0]["src"]
|
9
|
+
|
10
|
+
if image_alt && image_src
|
11
|
+
extension = image_src.split(".").last
|
12
|
+
list = image_alt.split(" ").reverse
|
13
|
+
title_name = list[4..-1].join(" ")
|
14
|
+
chapter_number = list[3].rjust(5, '0')
|
15
|
+
page_number = list[0].rjust(5, '0')
|
16
|
+
|
17
|
+
uri = URI.parse(image_src)
|
18
|
+
Image.new(uri.host, uri.path, "#{title_name}-Chap-#{chapter_number}-Pg-#{page_number}.#{extension}")
|
19
|
+
else
|
20
|
+
raise Exception.new("Couldn't find proper metadata alt in the image tag")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
module MangaDownloadr
|
2
|
+
class Pages < DownloadrClient
|
3
|
+
def fetch(chapter_link)
|
4
|
+
get chapter_link do |html|
|
5
|
+
nodes = html.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option")
|
6
|
+
nodes.map { |node| [chapter_link, node.children.to_s].join("/") }
|
7
|
+
end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module MangaDownloadr
|
2
|
+
class Workflow
|
3
|
+
def self.run(config = Config.new)
|
4
|
+
FileUtils.mkdir_p config.download_directory
|
5
|
+
|
6
|
+
CM(Workflow, config)
|
7
|
+
.fetch_chapters
|
8
|
+
.fetch_pages(config)
|
9
|
+
.fetch_images(config)
|
10
|
+
.download_images(config)
|
11
|
+
.optimize_images(config)
|
12
|
+
.prepare_volumes(config)
|
13
|
+
.unwrap
|
14
|
+
|
15
|
+
puts "Done!"
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.fetch_chapters(config)
|
19
|
+
puts "Fetching chapters ..."
|
20
|
+
chapters = Chapters.new(config.domain, config.root_uri).fetch
|
21
|
+
puts "Number of Chapters: #{chapters&.size}"
|
22
|
+
chapters
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.fetch_pages(chapters, config)
|
26
|
+
puts "Fetching pages from all chapters ..."
|
27
|
+
reactor = Concurrency.new(Pages, config)
|
28
|
+
reactor.fetch(chapters) do |link, engine|
|
29
|
+
engine&.fetch(link)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.fetch_images(pages, config)
|
34
|
+
puts "Feching the Image URLs from each Page ..."
|
35
|
+
reactor = Concurrency.new(PageImage, config)
|
36
|
+
reactor.fetch(pages) do |link, engine|
|
37
|
+
[ engine&.fetch(link) ]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.download_images(images, config)
|
42
|
+
puts "Downloading each image ..."
|
43
|
+
reactor = Concurrency.new(ImageDownloader, config, false)
|
44
|
+
reactor.fetch(images) do |image, _|
|
45
|
+
image_file = File.join(config.download_directory, image.filename)
|
46
|
+
unless File.exists?(image_file)
|
47
|
+
ImageDownloader.new(image.host).fetch(image.path, image_file)
|
48
|
+
end
|
49
|
+
[ image_file ]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.optimize_images(downloads, config)
|
54
|
+
puts "Running mogrify to convert all images down to Kindle supported size (600x800)"
|
55
|
+
`mogrify -resize #{config.image_dimensions} #{config.download_directory}/*.jpg`
|
56
|
+
downloads
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.prepare_volumes(downloads, config)
|
60
|
+
manga_name = config.download_directory.split("/")&.last
|
61
|
+
index = 1
|
62
|
+
volumes = []
|
63
|
+
downloads.each_slice(config.pages_per_volume) do |batch|
|
64
|
+
volume_directory = "#{config.download_directory}/#{manga_name}_#{index}"
|
65
|
+
volume_file = "#{volume_directory}.pdf"
|
66
|
+
volumes << volume_file
|
67
|
+
FileUtils.mkdir_p volume_directory
|
68
|
+
|
69
|
+
puts "Moving images to #{volume_directory} ..."
|
70
|
+
batch.each do |file|
|
71
|
+
destination_file = file.split("/").last
|
72
|
+
`mv #{file} #{volume_directory}/#{destination_file}`
|
73
|
+
end
|
74
|
+
|
75
|
+
puts "Generating #{volume_file} ..."
|
76
|
+
`convert #{volume_directory}/*.jpg #{volume_file}`
|
77
|
+
|
78
|
+
index += 1
|
79
|
+
end
|
80
|
+
volumes
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: manga-downloadr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- AkitaOnRails
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-06-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -25,63 +25,35 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.6'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: chainable_methods
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 0.1.2
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 0.1.2
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '2.13'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '2.13'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: prawn
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '1.3'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '1.3'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: fastimage
|
42
|
+
name: rake
|
71
43
|
requirement: !ruby/object:Gem::Requirement
|
72
44
|
requirements:
|
73
|
-
- - "
|
45
|
+
- - ">="
|
74
46
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
76
|
-
type: :
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
77
49
|
prerelease: false
|
78
50
|
version_requirements: !ruby/object:Gem::Requirement
|
79
51
|
requirements:
|
80
|
-
- - "
|
52
|
+
- - ">="
|
81
53
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
54
|
+
version: '0'
|
83
55
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
56
|
+
name: rspec
|
85
57
|
requirement: !ruby/object:Gem::Requirement
|
86
58
|
requirements:
|
87
59
|
- - ">="
|
@@ -95,7 +67,7 @@ dependencies:
|
|
95
67
|
- !ruby/object:Gem::Version
|
96
68
|
version: '0'
|
97
69
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
70
|
+
name: webmock
|
99
71
|
requirement: !ruby/object:Gem::Requirement
|
100
72
|
requirements:
|
101
73
|
- - ">="
|
@@ -109,7 +81,7 @@ dependencies:
|
|
109
81
|
- !ruby/object:Gem::Version
|
110
82
|
version: '0'
|
111
83
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
84
|
+
name: pry
|
113
85
|
requirement: !ruby/object:Gem::Requirement
|
114
86
|
requirements:
|
115
87
|
- - ">="
|
@@ -134,8 +106,15 @@ files:
|
|
134
106
|
- Rakefile
|
135
107
|
- bin/manga-downloadr
|
136
108
|
- lib/manga-downloadr.rb
|
109
|
+
- lib/manga-downloadr/chapters.rb
|
110
|
+
- lib/manga-downloadr/concurrency.rb
|
111
|
+
- lib/manga-downloadr/downloadr_client.rb
|
112
|
+
- lib/manga-downloadr/image_downloader.rb
|
113
|
+
- lib/manga-downloadr/page_image.rb
|
114
|
+
- lib/manga-downloadr/pages.rb
|
115
|
+
- lib/manga-downloadr/records.rb
|
137
116
|
- lib/manga-downloadr/version.rb
|
138
|
-
- lib/
|
117
|
+
- lib/manga-downloadr/workflow.rb
|
139
118
|
homepage: http://github.com/akitaonrails/manga-downloadr
|
140
119
|
licenses:
|
141
120
|
- MIT
|
@@ -156,7 +135,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
156
135
|
version: '0'
|
157
136
|
requirements: []
|
158
137
|
rubyforge_project:
|
159
|
-
rubygems_version: 2.
|
138
|
+
rubygems_version: 2.5.1
|
160
139
|
signing_key:
|
161
140
|
specification_version: 4
|
162
141
|
summary: downloads and compile to a Kindle optimized manga in PDF
|
data/lib/retryable_typhoeus.rb
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
# original source: https://gist.github.com/kunalmodi/2939288
|
2
|
-
module RetryableTyphoeus
|
3
|
-
DEFAULT_RETRIES = 1
|
4
|
-
|
5
|
-
module RequestExtension
|
6
|
-
def original_on_complete=(proc)
|
7
|
-
@original_on_complete = proc
|
8
|
-
end
|
9
|
-
|
10
|
-
def original_on_complete
|
11
|
-
@original_on_complete
|
12
|
-
end
|
13
|
-
|
14
|
-
def retries=(retries)
|
15
|
-
@retries = retries
|
16
|
-
end
|
17
|
-
|
18
|
-
def retries
|
19
|
-
@retries ||= 0
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
module HydraExtension
|
24
|
-
def queue_with_retry(request, opts = {})
|
25
|
-
request.retries = (opts[:retries] || RetryableTyphoeus::DEFAULT_RETRIES).to_i
|
26
|
-
request.original_on_complete ||= request.on_complete
|
27
|
-
request.on_complete do |response|
|
28
|
-
if response.success? || response.request.retries <= 0
|
29
|
-
request.original_on_complete.map do |callback|
|
30
|
-
response.handled_response = callback.call(response)
|
31
|
-
end
|
32
|
-
else
|
33
|
-
response.request.retries -= 1
|
34
|
-
queue response.request
|
35
|
-
end
|
36
|
-
end
|
37
|
-
queue request
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|