manga-downloadr 2.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b083f3caf90e18e3afa86b91fbbd6d54c1b14236
4
- data.tar.gz: 14ba9e5c6f03356dd90446b966acf73caf201b28
3
+ metadata.gz: 26aa19f21806d820b467c181ab6a48552580e3a1
4
+ data.tar.gz: d51638cea0d79fd66c01520e117cc6e6a6b7b836
5
5
  SHA512:
6
- metadata.gz: d15ce2ed73541b567ded076918b114b359443256749e5a12d488f0de9271d69ad0ba3be176557b33d41ec4aeb93952216a7398620b7d77a306ad75fb5a760959
7
- data.tar.gz: 03a5734f26846feb483d645aaaf504adab7167cd73004a36e7b347e4c08d16927a33cfbf07289bc2b61a2ddf89a244b47dcdf0230c28a257feee2a6f10f41bdb
6
+ metadata.gz: 2d79dabfc879ccff72159929c7d6383d2f4f6fa8747b641e439caccbe9163f1335b99e2e5efd9740751198d902f28b51ace9f64001544518e286c0e3765189f3
7
+ data.tar.gz: 003812def9a76ab1bc988f7884bfb773aa439c1754c8c14c4d24270722c7a0dad97aee2084a9be5ed4e2dcedcdc372b9d01926cdc71eab824e4429224529a655
data/README.md CHANGED
@@ -29,6 +29,10 @@ In this example, all the pages of the "One Punch Man" will be downloaded to the
29
29
 
30
30
  /tmp/onepunch-man/Onepunch-Man-Chap-00038-Pg-00011.jpg
31
31
 
32
+ You can turn on HTTP cache to be able to resume an interrupted process later if you want:
33
+
34
+ $ manga-downloadr -u http://www.mangareader.net/onepunch-man -d /tmp/onepunch-man --cache
35
+
32
36
  ## Development
33
37
 
34
38
  Tests are in Rspec:
@@ -45,6 +49,16 @@ Crystal is also super fast (because its compiled to native code) and has very go
45
49
 
46
50
  This Ruby version uses native Threads. Because this is I/O intensive, we assume we can run several HTTP requests concurrently. But because Threads have significantly more overhead than Elixir or Crystal architectures, we will be limited by Ruby's MRI interpreter.
47
51
 
52
+ There is not a test mode you can use for benchmark purposes:
53
+
54
+ time bin/manga-downloadr --test
55
+ # or in JRuby:
56
+ # time jruby --dev -S bin/manga-downloadr --test
57
+
58
+ This will use One-Punch Man as a test sample and you can also turn on the cache to not have external I/O interference
59
+
60
+ time bin/manga-downloadr --test --cache
61
+
48
62
  ## Contributing
49
63
 
50
64
  1. Fork it ( https://github.com/akitaonrails/manga-downloadr/fork )
@@ -2,12 +2,15 @@
2
2
  $LOAD_PATH.unshift File.join(File.dirname(__FILE__), "..", "lib")
3
3
  require "optparse"
4
4
  require "manga-downloadr"
5
+ require "benchmark"
5
6
 
6
7
  opt_manga_directory = "/tmp"
7
8
  opt_manga_root_uri = ""
8
9
  opt_batch_size = 50
9
10
  opt_resize_format = "600x800"
10
11
  opt_pages_per_volume = 250
12
+ opt_run_tests = false
13
+ opt_cache_pages = false
11
14
 
12
15
  option_parser = OptionParser.new do |opts|
13
16
  # Set a banner, displayed at the top
@@ -34,6 +37,16 @@ option_parser = OptionParser.new do |opts|
34
37
  opt_pages_per_volume = volume.to_i
35
38
  end
36
39
 
40
+ opts.on( "-c", "--cache", "turn on the HTTP cache so you can resume a process if you needed to stop before ending") do |_|
41
+ opt_cache_pages = true
42
+ end
43
+
44
+ opts.on( "-t", "--test", "run a simulation to one-punch man manga") do |_|
45
+ opt_manga_root_uri = "http://www.mangareader.net/onepunch-man"
46
+ opt_manga_directory = "/tmp/cr-one-punch"
47
+ opt_run_tests = true
48
+ end
49
+
37
50
  # This displays the help screen, all programs are
38
51
  # assumed to have this option.
39
52
  opts.on( "-h", "--help", "Display this screen" ) do
@@ -52,6 +65,12 @@ end
52
65
 
53
66
  if opt_manga_root_uri.size > 0
54
67
  root_uri = URI.parse(opt_manga_root_uri)
55
- config = MangaDownloadr::Config.new(root_uri.host, root_uri.path, opt_manga_directory, opt_batch_size, opt_resize_format, opt_pages_per_volume)
56
- MangaDownloadr::Workflow.run(config)
68
+ config = MangaDownloadr::Config.new(root_uri.host, root_uri.path, opt_manga_directory, opt_batch_size, opt_resize_format, opt_pages_per_volume, opt_cache_pages)
69
+ if opt_run_tests
70
+ puts Benchmark.measure("One-Punch Man test - MRI Ruby version") {
71
+ MangaDownloadr::Workflow.run_tests(config)
72
+ }
73
+ else
74
+ MangaDownloadr::Workflow.run(config)
75
+ end
57
76
  end
@@ -1,8 +1,8 @@
1
1
  module MangaDownloadr
2
2
  class Chapters < DownloadrClient
3
- def initialize(domain, root_uri)
3
+ def initialize(domain, root_uri, cache_http)
4
4
  @root_uri = root_uri
5
- super(domain)
5
+ super(domain, cache_http)
6
6
  end
7
7
 
8
8
  def fetch
@@ -1,26 +1,44 @@
1
+ require "thread/pool"
2
+
1
3
  module MangaDownloadr
2
4
  class Concurrency
3
5
  def initialize(engine_klass = nil, config = Config.new, turn_on_engine = true)
4
- @engine_klass = engine_klass
5
- @config = config
6
+ @engine_klass = engine_klass
7
+ @config = config
6
8
  @turn_on_engine = turn_on_engine
7
9
  end
8
10
 
9
11
  def fetch(collection, &block)
12
+ pool = Thread.pool(@config.download_batch_size)
13
+ mutex = Mutex.new
14
+ results = []
15
+
16
+ collection.each do |item|
17
+ pool.process {
18
+ engine = @turn_on_engine ? @engine_klass.new(@config.domain, @config.cache_http) : nil
19
+ reply = block.call(item, engine)&.flatten
20
+ mutex.synchronize do
21
+ results += ( reply || [] )
22
+ end
23
+ }
24
+ end
25
+ pool.shutdown
26
+
27
+ results
28
+ end
29
+
30
+ private
31
+
32
+ # this method is the same as the above but sequential, without Threads
33
+ # it's not to be used in the application, just to be used as a baseline for benchmark
34
+ def fetch_sequential(collection, &block)
10
35
  results = []
36
+ engine = @turn_on_engine ? @engine_klass.new(@config.domain, @config.cache_http) : nil
11
37
  collection&.each_slice(@config.download_batch_size) do |batch|
12
- mutex = Mutex.new
13
- threads = batch.map do |item|
14
- Thread.new {
15
- engine = @turn_on_engine ? @engine_klass.new(@config.domain) : nil
16
- Thread.current["results"] = block.call(item, engine)&.flatten
17
- mutex.synchronize do
18
- results += Thread.current["results"]
19
- end
20
- }
38
+ batch.each do |item|
39
+ batch_results = block.call(item, engine)&.flatten
40
+ results += ( batch_results || [])
21
41
  end
22
- threads.each(&:join)
23
- puts "Processed so far: #{results&.size}"
24
42
  end
25
43
  results
26
44
  end
@@ -1,16 +1,31 @@
1
+ require "digest"
2
+
1
3
  module MangaDownloadr
2
4
  class DownloadrClient
3
- def initialize(domain)
5
+ def initialize(domain, cache_http)
4
6
  @domain = domain
7
+ @cache_http = cache_http
5
8
  @http_client = Net::HTTP.new(@domain)
6
9
  end
7
10
 
8
11
  def get(uri, &block)
9
- response = @http_client.get(uri)
12
+ cache_path = "/tmp/manga-downloadr-cache/#{cache_filename(uri)}"
13
+ response = if @cache_http && File.exists?(cache_path)
14
+ body = File.read(cache_path)
15
+ MangaDownloadr::HTTPResponse.new("200", body)
16
+ else
17
+ @http_client.get(uri, { "User-Agent": USER_AGENT })
18
+ end
19
+
10
20
  case response.code
11
21
  when "301"
12
22
  get response.headers["Location"], &block
13
23
  when "200"
24
+ if @cache_http && !File.exists?(cache_path)
25
+ File.open(cache_path, "w") do |f|
26
+ f.write response.body
27
+ end
28
+ end
14
29
  parsed = Nokogiri::HTML(response.body)
15
30
  block.call(parsed)
16
31
  end
@@ -20,5 +35,9 @@ module MangaDownloadr
20
35
  sleep 1
21
36
  get(uri, &block)
22
37
  end
38
+
39
+ private def cache_filename(uri)
40
+ Digest::MD5.hexdigest(uri)
41
+ end
23
42
  end
24
43
  end
@@ -2,7 +2,7 @@ module MangaDownloadr
2
2
  class ImageDownloader < DownloadrClient
3
3
  def fetch(image_src, filename)
4
4
  File.delete(filename) if File.exists?(filename)
5
- response = @http_client.get(image_src)
5
+ response = @http_client.get(image_src, { "User-Agent": USER_AGENT })
6
6
  case response.code
7
7
  when "301"
8
8
  fetch(response.headers["Location"], filename)
@@ -1,4 +1,8 @@
1
1
  module MangaDownloadr
2
2
  Image = Struct.new *%i[host path filename]
3
- Config = Struct.new *%i[domain root_uri download_directory download_batch_size image_dimensions pages_per_volume]
3
+ Config = Struct.new *%i[domain root_uri download_directory download_batch_size image_dimensions pages_per_volume cache_http]
4
+
5
+ HTTPResponse = Struct.new *%i[code body]
6
+
7
+ USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/45.0.2454.101 Chrome/45.0.2454.101 Safari/537.36"
4
8
  end
@@ -1,3 +1,3 @@
1
1
  module MangaDownloadr
2
- VERSION = "2.0.0"
2
+ VERSION = "2.0.1"
3
3
  end
@@ -3,7 +3,7 @@ module MangaDownloadr
3
3
  def self.run(config = Config.new)
4
4
  FileUtils.mkdir_p config.download_directory
5
5
 
6
- CM(Workflow, config)
6
+ CM(config, Workflow)
7
7
  .fetch_chapters
8
8
  .fetch_pages(config)
9
9
  .fetch_images(config)
@@ -15,9 +15,21 @@ module MangaDownloadr
15
15
  puts "Done!"
16
16
  end
17
17
 
18
+ def self.run_tests(config = Config.new)
19
+ FileUtils.mkdir_p "/tmp/manga-downloadr-cache"
20
+
21
+ CM(Workflow, config)
22
+ .fetch_chapters
23
+ .fetch_pages(config)
24
+ .fetch_images(config)
25
+ .unwrap
26
+
27
+ puts "Done!"
28
+ end
29
+
18
30
  def self.fetch_chapters(config)
19
31
  puts "Fetching chapters ..."
20
- chapters = Chapters.new(config.domain, config.root_uri).fetch
32
+ chapters = Chapters.new(config.domain, config.root_uri, config.cache_http).fetch
21
33
  puts "Number of Chapters: #{chapters&.size}"
22
34
  chapters
23
35
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: manga-downloadr
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - AkitaOnRails
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-06 00:00:00.000000000 Z
11
+ date: 2016-06-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -30,70 +30,110 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.1.2
33
+ version: 0.2.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.1.2
40
+ version: 0.2.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: thread
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.2.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.2.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.11'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.11'
41
69
  - !ruby/object:Gem::Dependency
42
70
  name: rake
43
71
  requirement: !ruby/object:Gem::Requirement
44
72
  requirements:
45
- - - ">="
73
+ - - "~>"
46
74
  - !ruby/object:Gem::Version
47
- version: '0'
75
+ version: '10.0'
48
76
  type: :development
49
77
  prerelease: false
50
78
  version_requirements: !ruby/object:Gem::Requirement
51
79
  requirements:
52
- - - ">="
80
+ - - "~>"
53
81
  - !ruby/object:Gem::Version
54
- version: '0'
82
+ version: '10.0'
55
83
  - !ruby/object:Gem::Dependency
56
84
  name: rspec
57
85
  requirement: !ruby/object:Gem::Requirement
58
86
  requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 3.4.0
59
90
  - - ">="
60
91
  - !ruby/object:Gem::Version
61
- version: '0'
92
+ version: 3.4.0
62
93
  type: :development
63
94
  prerelease: false
64
95
  version_requirements: !ruby/object:Gem::Requirement
65
96
  requirements:
97
+ - - "~>"
98
+ - !ruby/object:Gem::Version
99
+ version: 3.4.0
66
100
  - - ">="
67
101
  - !ruby/object:Gem::Version
68
- version: '0'
102
+ version: 3.4.0
69
103
  - !ruby/object:Gem::Dependency
70
- name: webmock
104
+ name: pry
71
105
  requirement: !ruby/object:Gem::Requirement
72
106
  requirements:
73
- - - ">="
107
+ - - "~>"
74
108
  - !ruby/object:Gem::Version
75
- version: '0'
109
+ version: 0.10.3
76
110
  type: :development
77
111
  prerelease: false
78
112
  version_requirements: !ruby/object:Gem::Requirement
79
113
  requirements:
80
- - - ">="
114
+ - - "~>"
81
115
  - !ruby/object:Gem::Version
82
- version: '0'
116
+ version: 0.10.3
83
117
  - !ruby/object:Gem::Dependency
84
- name: pry
118
+ name: webmock
85
119
  requirement: !ruby/object:Gem::Requirement
86
120
  requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '2.1'
87
124
  - - ">="
88
125
  - !ruby/object:Gem::Version
89
- version: '0'
126
+ version: 2.1.0
90
127
  type: :development
91
128
  prerelease: false
92
129
  version_requirements: !ruby/object:Gem::Requirement
93
130
  requirements:
131
+ - - "~>"
132
+ - !ruby/object:Gem::Version
133
+ version: '2.1'
94
134
  - - ">="
95
135
  - !ruby/object:Gem::Version
96
- version: '0'
136
+ version: 2.1.0
97
137
  description: downloads any manga from MangaReader.net
98
138
  email: boss@akitaonrails.com
99
139
  executables: