manga-downloadr 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +14 -0
- data/bin/manga-downloadr +21 -2
- data/lib/manga-downloadr/chapters.rb +2 -2
- data/lib/manga-downloadr/concurrency.rb +31 -13
- data/lib/manga-downloadr/downloadr_client.rb +21 -2
- data/lib/manga-downloadr/image_downloader.rb +1 -1
- data/lib/manga-downloadr/records.rb +5 -1
- data/lib/manga-downloadr/version.rb +1 -1
- data/lib/manga-downloadr/workflow.rb +14 -2
- metadata +58 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 26aa19f21806d820b467c181ab6a48552580e3a1
|
4
|
+
data.tar.gz: d51638cea0d79fd66c01520e117cc6e6a6b7b836
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d79dabfc879ccff72159929c7d6383d2f4f6fa8747b641e439caccbe9163f1335b99e2e5efd9740751198d902f28b51ace9f64001544518e286c0e3765189f3
|
7
|
+
data.tar.gz: 003812def9a76ab1bc988f7884bfb773aa439c1754c8c14c4d24270722c7a0dad97aee2084a9be5ed4e2dcedcdc372b9d01926cdc71eab824e4429224529a655
|
data/README.md
CHANGED
@@ -29,6 +29,10 @@ In this example, all the pages of the "One Punch Man" will be downloaded to the
|
|
29
29
|
|
30
30
|
/tmp/onepunch-man/Onepunch-Man-Chap-00038-Pg-00011.jpg
|
31
31
|
|
32
|
+
You can turn on HTTP cache to be able to resume an interrupted process later if you want:
|
33
|
+
|
34
|
+
$ manga-downloadr -u http://www.mangareader.net/onepunch-man -d /tmp/onepunch-man --cache
|
35
|
+
|
32
36
|
## Development
|
33
37
|
|
34
38
|
Tests are in Rspec:
|
@@ -45,6 +49,16 @@ Crystal is also super fast (because its compiled to native code) and has very go
|
|
45
49
|
|
46
50
|
This Ruby version uses native Threads. Because this is I/O intensive, we assume we can run several HTTP requests concurrently. But because Threads have significantly more overhead than Elixir or Crystal architectures, we will be limited by Ruby's MRI interpreter.
|
47
51
|
|
52
|
+
There is not a test mode you can use for benchmark purposes:
|
53
|
+
|
54
|
+
time bin/manga-downloadr --test
|
55
|
+
# or in JRuby:
|
56
|
+
# time jruby --dev -S bin/manga-downloadr --test
|
57
|
+
|
58
|
+
This will use One-Punch Man as a test sample and you can also turn on the cache to not have external I/O interference
|
59
|
+
|
60
|
+
time bin/manga-downloadr --test --cache
|
61
|
+
|
48
62
|
## Contributing
|
49
63
|
|
50
64
|
1. Fork it ( https://github.com/akitaonrails/manga-downloadr/fork )
|
data/bin/manga-downloadr
CHANGED
@@ -2,12 +2,15 @@
|
|
2
2
|
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
3
3
|
require "optparse"
|
4
4
|
require "manga-downloadr"
|
5
|
+
require "benchmark"
|
5
6
|
|
6
7
|
opt_manga_directory = "/tmp"
|
7
8
|
opt_manga_root_uri = ""
|
8
9
|
opt_batch_size = 50
|
9
10
|
opt_resize_format = "600x800"
|
10
11
|
opt_pages_per_volume = 250
|
12
|
+
opt_run_tests = false
|
13
|
+
opt_cache_pages = false
|
11
14
|
|
12
15
|
option_parser = OptionParser.new do |opts|
|
13
16
|
# Set a banner, displayed at the top
|
@@ -34,6 +37,16 @@ option_parser = OptionParser.new do |opts|
|
|
34
37
|
opt_pages_per_volume = volume.to_i
|
35
38
|
end
|
36
39
|
|
40
|
+
opts.on( "-c", "--cache", "turn on the HTTP cache so you can resume a process if you needed to stop before ending") do |_|
|
41
|
+
opt_cache_pages = true
|
42
|
+
end
|
43
|
+
|
44
|
+
opts.on( "-t", "--test", "run a simulation to one-punch man manga") do |_|
|
45
|
+
opt_manga_root_uri = "http://www.mangareader.net/onepunch-man"
|
46
|
+
opt_manga_directory = "/tmp/cr-one-punch"
|
47
|
+
opt_run_tests = true
|
48
|
+
end
|
49
|
+
|
37
50
|
# This displays the help screen, all programs are
|
38
51
|
# assumed to have this option.
|
39
52
|
opts.on( "-h", "--help", "Display this screen" ) do
|
@@ -52,6 +65,12 @@ end
|
|
52
65
|
|
53
66
|
if opt_manga_root_uri.size > 0
|
54
67
|
root_uri = URI.parse(opt_manga_root_uri)
|
55
|
-
config = MangaDownloadr::Config.new(root_uri.host, root_uri.path, opt_manga_directory, opt_batch_size, opt_resize_format, opt_pages_per_volume)
|
56
|
-
|
68
|
+
config = MangaDownloadr::Config.new(root_uri.host, root_uri.path, opt_manga_directory, opt_batch_size, opt_resize_format, opt_pages_per_volume, opt_cache_pages)
|
69
|
+
if opt_run_tests
|
70
|
+
puts Benchmark.measure("One-Punch Man test - MRI Ruby version") {
|
71
|
+
MangaDownloadr::Workflow.run_tests(config)
|
72
|
+
}
|
73
|
+
else
|
74
|
+
MangaDownloadr::Workflow.run(config)
|
75
|
+
end
|
57
76
|
end
|
@@ -1,26 +1,44 @@
|
|
1
|
+
require "thread/pool"
|
2
|
+
|
1
3
|
module MangaDownloadr
|
2
4
|
class Concurrency
|
3
5
|
def initialize(engine_klass = nil, config = Config.new, turn_on_engine = true)
|
4
|
-
@engine_klass
|
5
|
-
@config
|
6
|
+
@engine_klass = engine_klass
|
7
|
+
@config = config
|
6
8
|
@turn_on_engine = turn_on_engine
|
7
9
|
end
|
8
10
|
|
9
11
|
def fetch(collection, &block)
|
12
|
+
pool = Thread.pool(@config.download_batch_size)
|
13
|
+
mutex = Mutex.new
|
14
|
+
results = []
|
15
|
+
|
16
|
+
collection.each do |item|
|
17
|
+
pool.process {
|
18
|
+
engine = @turn_on_engine ? @engine_klass.new(@config.domain, @config.cache_http) : nil
|
19
|
+
reply = block.call(item, engine)&.flatten
|
20
|
+
mutex.synchronize do
|
21
|
+
results += ( reply || [] )
|
22
|
+
end
|
23
|
+
}
|
24
|
+
end
|
25
|
+
pool.shutdown
|
26
|
+
|
27
|
+
results
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
# this method is the same as the above but sequential, without Threads
|
33
|
+
# it's not to be used in the application, just to be used as a baseline for benchmark
|
34
|
+
def fetch_sequential(collection, &block)
|
10
35
|
results = []
|
36
|
+
engine = @turn_on_engine ? @engine_klass.new(@config.domain, @config.cache_http) : nil
|
11
37
|
collection&.each_slice(@config.download_batch_size) do |batch|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
engine = @turn_on_engine ? @engine_klass.new(@config.domain) : nil
|
16
|
-
Thread.current["results"] = block.call(item, engine)&.flatten
|
17
|
-
mutex.synchronize do
|
18
|
-
results += Thread.current["results"]
|
19
|
-
end
|
20
|
-
}
|
38
|
+
batch.each do |item|
|
39
|
+
batch_results = block.call(item, engine)&.flatten
|
40
|
+
results += ( batch_results || [])
|
21
41
|
end
|
22
|
-
threads.each(&:join)
|
23
|
-
puts "Processed so far: #{results&.size}"
|
24
42
|
end
|
25
43
|
results
|
26
44
|
end
|
@@ -1,16 +1,31 @@
|
|
1
|
+
require "digest"
|
2
|
+
|
1
3
|
module MangaDownloadr
|
2
4
|
class DownloadrClient
|
3
|
-
def initialize(domain)
|
5
|
+
def initialize(domain, cache_http)
|
4
6
|
@domain = domain
|
7
|
+
@cache_http = cache_http
|
5
8
|
@http_client = Net::HTTP.new(@domain)
|
6
9
|
end
|
7
10
|
|
8
11
|
def get(uri, &block)
|
9
|
-
|
12
|
+
cache_path = "/tmp/manga-downloadr-cache/#{cache_filename(uri)}"
|
13
|
+
response = if @cache_http && File.exists?(cache_path)
|
14
|
+
body = File.read(cache_path)
|
15
|
+
MangaDownloadr::HTTPResponse.new("200", body)
|
16
|
+
else
|
17
|
+
@http_client.get(uri, { "User-Agent": USER_AGENT })
|
18
|
+
end
|
19
|
+
|
10
20
|
case response.code
|
11
21
|
when "301"
|
12
22
|
get response.headers["Location"], &block
|
13
23
|
when "200"
|
24
|
+
if @cache_http && !File.exists?(cache_path)
|
25
|
+
File.open(cache_path, "w") do |f|
|
26
|
+
f.write response.body
|
27
|
+
end
|
28
|
+
end
|
14
29
|
parsed = Nokogiri::HTML(response.body)
|
15
30
|
block.call(parsed)
|
16
31
|
end
|
@@ -20,5 +35,9 @@ module MangaDownloadr
|
|
20
35
|
sleep 1
|
21
36
|
get(uri, &block)
|
22
37
|
end
|
38
|
+
|
39
|
+
private def cache_filename(uri)
|
40
|
+
Digest::MD5.hexdigest(uri)
|
41
|
+
end
|
23
42
|
end
|
24
43
|
end
|
@@ -2,7 +2,7 @@ module MangaDownloadr
|
|
2
2
|
class ImageDownloader < DownloadrClient
|
3
3
|
def fetch(image_src, filename)
|
4
4
|
File.delete(filename) if File.exists?(filename)
|
5
|
-
response = @http_client.get(image_src)
|
5
|
+
response = @http_client.get(image_src, { "User-Agent": USER_AGENT })
|
6
6
|
case response.code
|
7
7
|
when "301"
|
8
8
|
fetch(response.headers["Location"], filename)
|
@@ -1,4 +1,8 @@
|
|
1
1
|
module MangaDownloadr
|
2
2
|
Image = Struct.new *%i[host path filename]
|
3
|
-
Config = Struct.new *%i[domain root_uri download_directory download_batch_size image_dimensions pages_per_volume]
|
3
|
+
Config = Struct.new *%i[domain root_uri download_directory download_batch_size image_dimensions pages_per_volume cache_http]
|
4
|
+
|
5
|
+
HTTPResponse = Struct.new *%i[code body]
|
6
|
+
|
7
|
+
USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/45.0.2454.101 Chrome/45.0.2454.101 Safari/537.36"
|
4
8
|
end
|
@@ -3,7 +3,7 @@ module MangaDownloadr
|
|
3
3
|
def self.run(config = Config.new)
|
4
4
|
FileUtils.mkdir_p config.download_directory
|
5
5
|
|
6
|
-
CM(
|
6
|
+
CM(config, Workflow)
|
7
7
|
.fetch_chapters
|
8
8
|
.fetch_pages(config)
|
9
9
|
.fetch_images(config)
|
@@ -15,9 +15,21 @@ module MangaDownloadr
|
|
15
15
|
puts "Done!"
|
16
16
|
end
|
17
17
|
|
18
|
+
def self.run_tests(config = Config.new)
|
19
|
+
FileUtils.mkdir_p "/tmp/manga-downloadr-cache"
|
20
|
+
|
21
|
+
CM(Workflow, config)
|
22
|
+
.fetch_chapters
|
23
|
+
.fetch_pages(config)
|
24
|
+
.fetch_images(config)
|
25
|
+
.unwrap
|
26
|
+
|
27
|
+
puts "Done!"
|
28
|
+
end
|
29
|
+
|
18
30
|
def self.fetch_chapters(config)
|
19
31
|
puts "Fetching chapters ..."
|
20
|
-
chapters = Chapters.new(config.domain, config.root_uri).fetch
|
32
|
+
chapters = Chapters.new(config.domain, config.root_uri, config.cache_http).fetch
|
21
33
|
puts "Number of Chapters: #{chapters&.size}"
|
22
34
|
chapters
|
23
35
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: manga-downloadr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- AkitaOnRails
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -30,70 +30,110 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.
|
33
|
+
version: 0.2.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.
|
40
|
+
version: 0.2.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: thread
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.2.0
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.2.0
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: bundler
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.11'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.11'
|
41
69
|
- !ruby/object:Gem::Dependency
|
42
70
|
name: rake
|
43
71
|
requirement: !ruby/object:Gem::Requirement
|
44
72
|
requirements:
|
45
|
-
- - "
|
73
|
+
- - "~>"
|
46
74
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
75
|
+
version: '10.0'
|
48
76
|
type: :development
|
49
77
|
prerelease: false
|
50
78
|
version_requirements: !ruby/object:Gem::Requirement
|
51
79
|
requirements:
|
52
|
-
- - "
|
80
|
+
- - "~>"
|
53
81
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
82
|
+
version: '10.0'
|
55
83
|
- !ruby/object:Gem::Dependency
|
56
84
|
name: rspec
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
58
86
|
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 3.4.0
|
59
90
|
- - ">="
|
60
91
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
92
|
+
version: 3.4.0
|
62
93
|
type: :development
|
63
94
|
prerelease: false
|
64
95
|
version_requirements: !ruby/object:Gem::Requirement
|
65
96
|
requirements:
|
97
|
+
- - "~>"
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: 3.4.0
|
66
100
|
- - ">="
|
67
101
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
102
|
+
version: 3.4.0
|
69
103
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
104
|
+
name: pry
|
71
105
|
requirement: !ruby/object:Gem::Requirement
|
72
106
|
requirements:
|
73
|
-
- - "
|
107
|
+
- - "~>"
|
74
108
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
109
|
+
version: 0.10.3
|
76
110
|
type: :development
|
77
111
|
prerelease: false
|
78
112
|
version_requirements: !ruby/object:Gem::Requirement
|
79
113
|
requirements:
|
80
|
-
- - "
|
114
|
+
- - "~>"
|
81
115
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
116
|
+
version: 0.10.3
|
83
117
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
118
|
+
name: webmock
|
85
119
|
requirement: !ruby/object:Gem::Requirement
|
86
120
|
requirements:
|
121
|
+
- - "~>"
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '2.1'
|
87
124
|
- - ">="
|
88
125
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
126
|
+
version: 2.1.0
|
90
127
|
type: :development
|
91
128
|
prerelease: false
|
92
129
|
version_requirements: !ruby/object:Gem::Requirement
|
93
130
|
requirements:
|
131
|
+
- - "~>"
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '2.1'
|
94
134
|
- - ">="
|
95
135
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
136
|
+
version: 2.1.0
|
97
137
|
description: downloads any manga from MangaReader.net
|
98
138
|
email: boss@akitaonrails.com
|
99
139
|
executables:
|