image-dumper 0.5.5 → 0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/bin/dumper +47 -40
  3. data/lib/dumper.rb +38 -36
  4. data/lib/dumper/dumper.rb +114 -52
  5. data/lib/dumper/profiles/4chan.rb +49 -33
  6. data/lib/dumper/profiles/behoimi.rb +48 -38
  7. data/lib/dumper/profiles/booru.rb +54 -40
  8. data/lib/dumper/profiles/deviantart.rb +49 -33
  9. data/lib/dumper/profiles/fakku.rb +61 -51
  10. data/lib/dumper/profiles/fc2.rb +49 -33
  11. data/lib/dumper/profiles/gelbooru.rb +54 -40
  12. data/lib/dumper/profiles/imagebam.rb +55 -37
  13. data/lib/dumper/profiles/mangaeden.rb +69 -53
  14. data/lib/dumper/profiles/mangago.rb +53 -40
  15. data/lib/dumper/profiles/mangahere.rb +68 -49
  16. data/lib/dumper/profiles/multiplayer.rb +55 -41
  17. data/lib/dumper/profiles/redblow.rb +49 -36
  18. data/lib/dumper/profiles/sankakucomplex.rb +81 -58
  19. data/lib/dumper/profiles/teca.rb +54 -34
  20. data/lib/dumper/profiles/yande.rb +51 -42
  21. data/lib/dumper/utils.rb +40 -42
  22. data/lib/dumper/version.rb +23 -23
  23. data/spec/4chan_spec.rb +28 -0
  24. data/spec/behoimi_spec.rb +28 -0
  25. data/spec/booru_spec.rb +28 -0
  26. data/spec/deviantart_spec.rb +28 -0
  27. data/spec/fakku_spec.rb +28 -0
  28. data/spec/fc2_spec.rb +28 -0
  29. data/spec/gelbooru_spec.rb +28 -0
  30. data/spec/imagebam_spec.rb +28 -0
  31. data/spec/mangaeden_spec.rb +28 -0
  32. data/spec/mangago_spec.rb +28 -0
  33. data/spec/mangahere_spec.rb +28 -0
  34. data/spec/multiplayer_spec.rb +28 -0
  35. data/spec/redblow_spec.rb +28 -0
  36. data/spec/sankakucomplex_spec.rb +41 -0
  37. data/spec/teca_spec.rb +28 -0
  38. data/spec/yande_spec.rb +28 -0
  39. metadata +119 -6
  40. data/lib/dumper/profiles/i_doujin.rb +0 -38
  41. data/lib/dumper/profiles/mi9.rb +0 -44
  42. data/lib/dumper/profiles/wallpaperhere.rb +0 -43
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8bb0916cbaf373d4b57cf666146ba1d04db68835
4
- data.tar.gz: d0dd26d401d89d3b5173653663d9f54fe6a0275b
3
+ metadata.gz: d620c7ba1dbc1bd22e170e520822d8711c1fd1f8
4
+ data.tar.gz: 61eb44fd3c2ed01b948acb3993081437bf65b8ca
5
5
  SHA512:
6
- metadata.gz: 936f144bd3c4c50d566e45a145749920ff14f28be43f8b28b95baabdb3958bdeb2286c74133944703c6d37b6e6cffc1b8d07f4dcd30308e16db65a716ba02da9
7
- data.tar.gz: 328ea9e69894a4529cb763ee3103b218de43bac5e3da1869f0cffe63188e678a503ad244e43ca65dc58eeb16bf6ddd8faf4ea3529b9770170e915dd37c72fa99
6
+ metadata.gz: 8e7a28e8d7fb7b2ee24043ae8c3cbdbbcfe5ec7b1267a1445cf20d54cfd641bd0e586b1da3db9f983e61b0b8f77ecff178df72be1a54cbe467965e620c410d33
7
+ data.tar.gz: b6381b814c485917103bb4bb15ed6bf6aa2092e47ad5e963f2480fd649445e8800aba1e7371e92bb183d459a7bbc2cfe84eaea5b7dd57606d7b340423ceff009
data/bin/dumper CHANGED
@@ -17,95 +17,102 @@
17
17
  # You should have received a copy of the GNU General Public License
18
18
  # along with Dumper. If not, see <http://www.gnu.org/licenses/>.
19
19
  #++
20
-
21
20
  require 'dumper'
22
21
 
23
22
  options = {}
24
23
 
25
24
  OptionParser.new do |o|
26
- options[:url] = []
25
+ options[:url ] = []
27
26
  options[:path] = []
28
27
 
29
- o.on '-l', '--list', 'Show available profiles' do
28
+ o.on '-l', '--list', 'Show available profiles' do
30
29
  abort 'Profiles available:'.tap { |s|
31
30
  Dumper::Profiles::list.sort { |a, b| a <=> b }.each { |p| s << "\n" + (' ' * 3) + p }
32
31
  } if ARGV.empty?
33
32
  end
34
33
 
35
- o.on '-i', '--info PROFILE', 'Get info about profiles' do |profile|
34
+ o.on '-i', '--info PROFILE', 'Get info about profiles' do |profile|
36
35
  if Dumper::Profiles::list.include? profile
37
- method = ("info_#{profile}").to_sym
36
+ method = ("info_#{profile}").to_sym
38
37
  Dumper::Profiles::send(method).tap { |i|
39
- puts "Option 'from' is #{i[:from] ? 'available' : 'not available'}"
40
- puts "Option 'to' is #{i[:to] ? 'available' : 'not available'}"
38
+ puts "Option 'from' is #{i[:from] ? 'available' : 'not available'}."
39
+ puts "Option 'to' is #{i[:to] ? 'available' : 'not available'}."
40
+ abort "It dumps #{i[:type]}."
41
41
  }
42
42
  else
43
- puts 'Profile not found.'
43
+ abort 'Profile not found.'
44
44
  end
45
- abort
46
45
  end
47
46
 
48
- o.on '-u', '--url URL', 'Target URL' do |url|
49
- options[:url] << url
47
+ o.on '-u', '--url URL', 'Target URL' do |url|
48
+ options[:url] << url
50
49
  end
51
50
 
52
- o.on '-f', '--file FILE', 'File containing a list of URLs, a double pipe (||) and the target folder, one per line' do |file|
53
- file = File.open(file).read.gsub(/\r\n?/, "\n")
54
- file.each_line { |line|
55
- split = line.split('||')
56
- options[:url] << split[0].strip
57
- options[:path] << split[1].strip
58
- }
51
+ o.on '-o', '--from PAGE', 'Start to save from... (if allowed)' do |pages|
52
+ options[:from] = pages.to_i
53
+ end
54
+
55
+ o.on '-g', '--to PAGE', 'Finish to save at... (if allowed)' do |pages|
56
+ options[:to] = pages.to_i
57
+ end
58
+
59
+ o.on '-p', '--path PATH', 'Target folder' do |path|
60
+ options[:path] << path
59
61
  end
60
62
 
61
- o.on '-p', '--path PATH', 'Target folder' do |path|
62
- options[:path] << path
63
+ o.on '-x', '--xpath XPATH', 'Use a custom xpath' do |xpath|
64
+ options[:xpath] = xpath
63
65
  end
64
66
 
65
- o.on '-r', '--profile PROFILE', 'Use the given profile' do |profile|
67
+ o.on '-r', '--profile PROFILE', 'Force the use of the given profile' do |profile|
66
68
  options[:profile] = profile
67
69
  end
68
70
 
69
- o.on '-x', '--xpath XPATH', 'Custom xpath' do |xpath|
70
- options[:xpath] = xpath
71
+ o.on '-t', '--threads MIN:MAX', 'Set the number of threads to use' do |threads|
72
+ threads = threads.split ?:
73
+ Dumper::Profiles.pool_size = threads[0].to_i, threads[1].to_i
71
74
  end
72
75
 
73
- o.on '-o', '--from PAGE', 'Start to save from... (if allowed)' do |pages|
74
- options[:from] = pages.to_i
76
+ o.on '-s', '--silence', 'Print only important messages' do
77
+ Dumper::Profiles.verbose = false
75
78
  end
76
79
 
77
- o.on '-g', '--to PAGE', 'Finish to save at... (if allowed)' do |pages|
78
- options[:to] = pages.to_i
80
+ o.on '-f', '--file FILE', 'File containing in each line "URL||Folder"' do |file|
81
+ file = File.open(file).read.gsub(/\r\n?/, "\n")
82
+ file.each_line { |line|
83
+ split = line.split '||'
84
+ options[:url] << split[0].strip
85
+ options[:path] << split[1].strip
86
+ }
79
87
  end
80
88
  end.parse!
81
89
 
82
90
  if options[:url].empty?
83
- abort 'URL or list of URLs is required.'
91
+ abort 'An URL or a list of URLs is required.'
84
92
  elsif options[:path].empty?
85
93
  abort 'Path is required.'
86
94
  end
87
95
 
88
96
  options[:url].each_with_index { |url, i|
89
97
  begin
90
-
91
- host = options.has_key?(:profile) ? options[:profile] : URI.parse(url).host.split(?.)[-2]
92
98
  Dir.mkdir(options[:path][i]) unless File.directory? options[:path][i]
93
-
94
- if Dumper::Profiles::list.include? host.gsub(?-, ?_)
95
- method = ('get_' + host.gsub(?-, ?_)).to_sym
96
- if options.include?(:from) && options.include?(:to)
97
- Dumper::Profiles::send method, url, options[:path][i], options[:from], options[:to]
98
- else
99
- Dumper::Profiles::send method, url, options[:path][i]
99
+
100
+ host = options[:profile] || URI.parse(url).host.split(?.)[-2]
101
+
102
+ if options[:xpath].nil? && Dumper::Profiles::list.include?(host.gsub(?-, ?_))
103
+ method = "get_#{host.gsub ?-, ?_}".to_sym
104
+ case
105
+ when options[:from] && options[:to] then Dumper::Profiles::send method, url, options[:path][i], options[:from], options[:to]
106
+ when options[:from] then Dumper::Profiles::send method, url, options[:path][i], options[:from]
107
+ else Dumper::Profiles::send method, url, options[:path][i]
100
108
  end
101
109
  else
102
110
  Dumper::Profiles::get_generic url, options[:path][i], options[:xpath]
103
111
  end
104
-
105
112
  rescue Nokogiri::XML::XPath::SyntaxError => e
106
- puts e.to_s.gsub(/expression/, 'xpath')
113
+ puts e.to_s.gsub /expression/, 'xpath'
107
114
  puts 'Cannot dump.'
108
- rescue OpenURI::HTTPError => e
115
+ rescue OpenURI::HTTPError => e
109
116
  puts "Error opening #{url}: #{e}"
110
117
  rescue URI::InvalidURIError => e
111
118
  puts "URL #{url} is not valid: #{e}"
data/lib/dumper.rb CHANGED
@@ -1,36 +1,38 @@
1
- #--
2
- # Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
3
- #
4
- # This file is part of Dumper.
5
- #
6
- # Smogon-API is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Smogon-API is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Smogon-API. If not, see <http://www.gnu.org/licenses/>.
18
- #++
19
-
20
- require 'open-uri'
21
- require 'uri'
22
- require 'optparse'
23
- require 'net/http'
24
- require 'nokogiri'
25
- require 'openssl'
26
- require 'certified'
27
- require 'addressable/uri'
28
- require 'base64'
29
-
30
- require 'dumper/utils'
31
- Dir.glob(File.expand_path("../dumper/profiles/*.rb", __FILE__)).each { |f|
32
- require "dumper/profiles/#{File.basename(f).split(?.)[0]}"
33
- }
34
- require 'dumper/dumper'
35
-
36
- require 'dumper/version'
1
+ #--
2
+ # Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
3
+ #
4
+ # This file is part of Dumper.
5
+ #
6
+ # Smogon-API is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Smogon-API is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Smogon-API. If not, see <http://www.gnu.org/licenses/>.
18
+ #++
19
+
20
+ require 'open-uri'
21
+ require 'net/http'
22
+ require 'uri'
23
+ require 'optparse'
24
+ require 'base64'
25
+ require 'nokogiri'
26
+ require 'openssl'
27
+ require 'certified'
28
+ require 'addressable/uri'
29
+ require 'json'
30
+ require 'thread/pool'
31
+
32
+ require 'dumper/utils'
33
+ require 'dumper/dumper'
34
+ require 'dumper/version'
35
+
36
+ Dir.glob(File.expand_path("../dumper/profiles/*.rb", __FILE__)).each { |f|
37
+ require "dumper/profiles/#{File.basename(f).split(?.)[0]}"
38
+ }
data/lib/dumper/dumper.rb CHANGED
@@ -17,60 +17,122 @@
17
17
  # along with Dumper. If not, see <http://www.gnu.org/licenses/>.
18
18
  #++
19
19
 
20
- module Dumper; module Profiles
21
-
22
- def self.list
23
- Dir.glob(File.expand_path('../profiles/*.rb', __FILE__)).sort { |a, b| b <=> a }.map { |f|
24
- f = File.basename(f).split(?.)[0]
25
- }
26
- end
27
-
28
- def self.get(path, p, ua = '', ref = '', filename = '')
29
- p = p.to_s
30
-
31
- begin
32
- if p.start_with? 'data:image/'
33
- filename = File.join path, filename == '' ? rand(1000).to_s + '.' + p.split('data:image/')[1].split(?;)[0] : filename
34
- filename.gsub!(File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR)
35
- if File.exists? filename
36
- puts "File #{filename} already exists."
37
- else
38
- puts "Downloading base64 image as #{filename}..."
39
- p.gsub!(/data:image\/png;base64,/, '')
40
- File.open(filename, 'wb') { |f| f.write Base64.decode64(p) }
41
- end
42
- else
43
- filename = File.join path, filename == '' ? File.basename(p) : filename
44
- filename.gsub!(File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR)
45
- if File.exists? filename
46
- puts "File #{filename} already exists."
47
- else
48
- filename = File.join path, rand(1000).to_s + '.jpg' unless filename[-4] == ?. || filename[-5] == ?.
49
- puts "Downloading #{p} as #{filename}..."
50
- File.open(filename, 'wb') { |f| f.write open(p, 'User-Agent' => ua, 'Referer' => ref).read }
51
- end
20
+ module Dumper
21
+ module Profiles
22
+ USER_AGENT = 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:16.0) Gecko/20100101 Firefox/16.0'
23
+
24
+ class Profile
25
+ include Dumper::Profiles
26
+
27
+ def initialize(&block)
28
+ min = pool_size[:min]
29
+ max = pool_size[:max]
30
+
31
+ @pool = Thread.pool min, max
32
+ puts "Using #{min}:#{max || min} threads..."
33
+
34
+ instance_eval &block
35
+ end
36
+
37
+ def dump(url, path, *args)
38
+ raise NotImplementedError
39
+ end
40
+
41
+ def shutdown
42
+ @pool.shutdown
52
43
  end
53
- rescue Exception => e
54
- p e
55
- puts "Error downloading \#{p}."
56
- return false
57
44
  end
58
- return true
59
- end
60
45
 
61
- def self.get_generic(url, path, xpath)
62
- uri = nil
63
- Nokogiri::HTML(open(url)).xpath(xpath).each { |p|
64
- if p.to_s.start_with? ?/
65
- uri = URI(url) if uri.nil?
66
- p = "#{uri.scheme}://#{uri.host}#{p}"
46
+ def pool_size
47
+ {
48
+ min: @min || 4,
49
+ max: @max
50
+ }
51
+ end
52
+
53
+ def verbose?
54
+ @verbose == nil || @verbose == true
55
+ end
56
+
57
+ class << self
58
+ def pool_size=(min, max = nil)
59
+ @min = min
60
+ @max = max
67
61
  end
68
- self.get path, p
69
- }
70
- end
71
-
72
- def method_missing(method, *args, &block)
73
- "'#{method.split('get_')[1]}' profile not found."
62
+
63
+ def verbose=(verbose)
64
+ @verbose = verbose
65
+ end
66
+
67
+ def verbose?
68
+ @verbose == nil || @verbose == true
69
+ end
70
+
71
+ def list
72
+ Dir.glob(File.expand_path('../profiles/*.rb', __FILE__)).sort { |a, b| b <=> a }.map { |f|
73
+ f = File.basename(f).split(?.)[0]
74
+ }
75
+ end
76
+
77
+ def get(path, url, options = {})
78
+ url = url.to_s
79
+
80
+ begin
81
+ if url.start_with? 'data:image/'
82
+ filename = File.join path, options[:filename] || rand(1000).to_s + '.' + url.split('data:image/')[1].split(?;)[0]
83
+ filename.gsub! File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR
84
+
85
+ url.gsub /data:image\/png;base64,/, ''
86
+
87
+ if File.exists? filename
88
+ puts "File #{filename} already exists." if verbose?
89
+ else
90
+ puts "Downloading base64 image as #{filename}..." if verbose?
91
+ File.open(filename, 'wb') { |f|
92
+ f.write Base64.decode64(url)
93
+ }
94
+ end
95
+ else
96
+ filename = File.join path, options[:filename] || File.basename(url)
97
+ filename.gsub! File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR
98
+
99
+ if File.exists? filename
100
+ puts "File #{filename} already exists." if verbose?
101
+ else
102
+ filename = File.join(path, rand(1000).to_s + '.jpg') unless filename[-4] == ?. || filename[-5] == ?.
103
+ puts "Downloading #{url} as #{filename}..." if verbose?
104
+
105
+ File.open(filename, 'wb') { |f|
106
+ f.write open(url,
107
+ 'User-Agent' => options[:user_agent] || USER_AGENT,
108
+ 'Referer' => options[:referer ] || url
109
+ ).read
110
+ }
111
+ end
112
+ end
113
+ rescue Exception => e
114
+ p e
115
+ puts "Error downloading #{url}."
116
+ return false
117
+ end
118
+
119
+ true
120
+ end
121
+
122
+ def get_generic(url, path, xpath)
123
+ uri = nil
124
+ Nokogiri::HTML(open(url)).xpath(xpath).each { |p|
125
+ if p.to_s.start_with? ?/
126
+ uri = URI(url) if uri.nil?
127
+ p = "#{uri.scheme}://#{uri.host}#{p}"
128
+ end
129
+ get path, p
130
+ }
131
+ end
132
+ end
133
+
134
+ def method_missing(method, *args, &block)
135
+ "'#{method.split('get_')[1]}' profile not found."
136
+ end
74
137
  end
75
-
76
- end; end
138
+ end
@@ -1,34 +1,50 @@
1
- #--
2
- # Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
3
- #
4
- # This file is part of Dumper.
5
- #
6
- # Dumper is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Dumper is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Dumper. If not, see <http://www.gnu.org/licenses/>.
18
- #++
19
-
20
- module Dumper
21
- module Profiles
22
-
23
- def self.get_4chan(url, path, from = 1, to = 1)
24
- Nokogiri::HTML(open(url)).xpath('//a[@class = "fileThumb"]/@href').each { |p|
25
- self.get path, "http:#{p}"
26
- }
27
- end
28
-
29
- def self.info_4chan
30
- { :from => false, :to => false }
31
- end
32
-
33
- end
1
+ #--
2
+ # Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
3
+ #
4
+ # This file is part of Dumper.
5
+ #
6
+ # Dumper is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Dumper is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Dumper. If not, see <http://www.gnu.org/licenses/>.
18
+ #++
19
+
20
+ module Dumper
21
+ module Profiles
22
+
23
+ class FourChan < Profile
24
+ def dump(url, path, from, to)
25
+ from -= 1
26
+ to -= 1 if to >= 1
27
+
28
+ Nokogiri::HTML(open(url)).xpath('//a[@class = "fileThumb"]/@href')[from..to].each { |p|
29
+ @pool.process {
30
+ Dumper::Profiles.get path, "http:#{p}"
31
+ }
32
+ }
33
+ end
34
+ end
35
+
36
+ class << self
37
+ def get_4chan(url, path, from = 1, to = -1)
38
+ FourChan.new { |p|
39
+ p.dump url, path, from, to
40
+ p.shutdown
41
+ }
42
+ end
43
+
44
+ def info_4chan
45
+ { from: :enabled, to: :enabled, type: :images }
46
+ end
47
+ end
48
+
49
+ end
34
50
  end