image-dumper 0.5.5 → 0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/bin/dumper +47 -40
  3. data/lib/dumper.rb +38 -36
  4. data/lib/dumper/dumper.rb +114 -52
  5. data/lib/dumper/profiles/4chan.rb +49 -33
  6. data/lib/dumper/profiles/behoimi.rb +48 -38
  7. data/lib/dumper/profiles/booru.rb +54 -40
  8. data/lib/dumper/profiles/deviantart.rb +49 -33
  9. data/lib/dumper/profiles/fakku.rb +61 -51
  10. data/lib/dumper/profiles/fc2.rb +49 -33
  11. data/lib/dumper/profiles/gelbooru.rb +54 -40
  12. data/lib/dumper/profiles/imagebam.rb +55 -37
  13. data/lib/dumper/profiles/mangaeden.rb +69 -53
  14. data/lib/dumper/profiles/mangago.rb +53 -40
  15. data/lib/dumper/profiles/mangahere.rb +68 -49
  16. data/lib/dumper/profiles/multiplayer.rb +55 -41
  17. data/lib/dumper/profiles/redblow.rb +49 -36
  18. data/lib/dumper/profiles/sankakucomplex.rb +81 -58
  19. data/lib/dumper/profiles/teca.rb +54 -34
  20. data/lib/dumper/profiles/yande.rb +51 -42
  21. data/lib/dumper/utils.rb +40 -42
  22. data/lib/dumper/version.rb +23 -23
  23. data/spec/4chan_spec.rb +28 -0
  24. data/spec/behoimi_spec.rb +28 -0
  25. data/spec/booru_spec.rb +28 -0
  26. data/spec/deviantart_spec.rb +28 -0
  27. data/spec/fakku_spec.rb +28 -0
  28. data/spec/fc2_spec.rb +28 -0
  29. data/spec/gelbooru_spec.rb +28 -0
  30. data/spec/imagebam_spec.rb +28 -0
  31. data/spec/mangaeden_spec.rb +28 -0
  32. data/spec/mangago_spec.rb +28 -0
  33. data/spec/mangahere_spec.rb +28 -0
  34. data/spec/multiplayer_spec.rb +28 -0
  35. data/spec/redblow_spec.rb +28 -0
  36. data/spec/sankakucomplex_spec.rb +41 -0
  37. data/spec/teca_spec.rb +28 -0
  38. data/spec/yande_spec.rb +28 -0
  39. metadata +119 -6
  40. data/lib/dumper/profiles/i_doujin.rb +0 -38
  41. data/lib/dumper/profiles/mi9.rb +0 -44
  42. data/lib/dumper/profiles/wallpaperhere.rb +0 -43
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8bb0916cbaf373d4b57cf666146ba1d04db68835
4
- data.tar.gz: d0dd26d401d89d3b5173653663d9f54fe6a0275b
3
+ metadata.gz: d620c7ba1dbc1bd22e170e520822d8711c1fd1f8
4
+ data.tar.gz: 61eb44fd3c2ed01b948acb3993081437bf65b8ca
5
5
  SHA512:
6
- metadata.gz: 936f144bd3c4c50d566e45a145749920ff14f28be43f8b28b95baabdb3958bdeb2286c74133944703c6d37b6e6cffc1b8d07f4dcd30308e16db65a716ba02da9
7
- data.tar.gz: 328ea9e69894a4529cb763ee3103b218de43bac5e3da1869f0cffe63188e678a503ad244e43ca65dc58eeb16bf6ddd8faf4ea3529b9770170e915dd37c72fa99
6
+ metadata.gz: 8e7a28e8d7fb7b2ee24043ae8c3cbdbbcfe5ec7b1267a1445cf20d54cfd641bd0e586b1da3db9f983e61b0b8f77ecff178df72be1a54cbe467965e620c410d33
7
+ data.tar.gz: b6381b814c485917103bb4bb15ed6bf6aa2092e47ad5e963f2480fd649445e8800aba1e7371e92bb183d459a7bbc2cfe84eaea5b7dd57606d7b340423ceff009
data/bin/dumper CHANGED
@@ -17,95 +17,102 @@
17
17
  # You should have received a copy of the GNU General Public License
18
18
  # along with Dumper. If not, see <http://www.gnu.org/licenses/>.
19
19
  #++
20
-
21
20
  require 'dumper'
22
21
 
23
22
  options = {}
24
23
 
25
24
  OptionParser.new do |o|
26
- options[:url] = []
25
+ options[:url ] = []
27
26
  options[:path] = []
28
27
 
29
- o.on '-l', '--list', 'Show available profiles' do
28
+ o.on '-l', '--list', 'Show available profiles' do
30
29
  abort 'Profiles available:'.tap { |s|
31
30
  Dumper::Profiles::list.sort { |a, b| a <=> b }.each { |p| s << "\n" + (' ' * 3) + p }
32
31
  } if ARGV.empty?
33
32
  end
34
33
 
35
- o.on '-i', '--info PROFILE', 'Get info about profiles' do |profile|
34
+ o.on '-i', '--info PROFILE', 'Get info about profiles' do |profile|
36
35
  if Dumper::Profiles::list.include? profile
37
- method = ("info_#{profile}").to_sym
36
+ method = ("info_#{profile}").to_sym
38
37
  Dumper::Profiles::send(method).tap { |i|
39
- puts "Option 'from' is #{i[:from] ? 'available' : 'not available'}"
40
- puts "Option 'to' is #{i[:to] ? 'available' : 'not available'}"
38
+ puts "Option 'from' is #{i[:from] ? 'available' : 'not available'}."
39
+ puts "Option 'to' is #{i[:to] ? 'available' : 'not available'}."
40
+ abort "It dumps #{i[:type]}."
41
41
  }
42
42
  else
43
- puts 'Profile not found.'
43
+ abort 'Profile not found.'
44
44
  end
45
- abort
46
45
  end
47
46
 
48
- o.on '-u', '--url URL', 'Target URL' do |url|
49
- options[:url] << url
47
+ o.on '-u', '--url URL', 'Target URL' do |url|
48
+ options[:url] << url
50
49
  end
51
50
 
52
- o.on '-f', '--file FILE', 'File containing a list of URLs, a double pipe (||) and the target folder, one per line' do |file|
53
- file = File.open(file).read.gsub(/\r\n?/, "\n")
54
- file.each_line { |line|
55
- split = line.split('||')
56
- options[:url] << split[0].strip
57
- options[:path] << split[1].strip
58
- }
51
+ o.on '-o', '--from PAGE', 'Start to save from... (if allowed)' do |pages|
52
+ options[:from] = pages.to_i
53
+ end
54
+
55
+ o.on '-g', '--to PAGE', 'Finish to save at... (if allowed)' do |pages|
56
+ options[:to] = pages.to_i
57
+ end
58
+
59
+ o.on '-p', '--path PATH', 'Target folder' do |path|
60
+ options[:path] << path
59
61
  end
60
62
 
61
- o.on '-p', '--path PATH', 'Target folder' do |path|
62
- options[:path] << path
63
+ o.on '-x', '--xpath XPATH', 'Use a custom xpath' do |xpath|
64
+ options[:xpath] = xpath
63
65
  end
64
66
 
65
- o.on '-r', '--profile PROFILE', 'Use the given profile' do |profile|
67
+ o.on '-r', '--profile PROFILE', 'Force the use of the given profile' do |profile|
66
68
  options[:profile] = profile
67
69
  end
68
70
 
69
- o.on '-x', '--xpath XPATH', 'Custom xpath' do |xpath|
70
- options[:xpath] = xpath
71
+ o.on '-t', '--threads MIN:MAX', 'Set the number of threads to use' do |threads|
72
+ threads = threads.split ?:
73
+ Dumper::Profiles.pool_size = threads[0].to_i, threads[1].to_i
71
74
  end
72
75
 
73
- o.on '-o', '--from PAGE', 'Start to save from... (if allowed)' do |pages|
74
- options[:from] = pages.to_i
76
+ o.on '-s', '--silence', 'Print only important messages' do
77
+ Dumper::Profiles.verbose = false
75
78
  end
76
79
 
77
- o.on '-g', '--to PAGE', 'Finish to save at... (if allowed)' do |pages|
78
- options[:to] = pages.to_i
80
+ o.on '-f', '--file FILE', 'File containing in each line "URL||Folder"' do |file|
81
+ file = File.open(file).read.gsub(/\r\n?/, "\n")
82
+ file.each_line { |line|
83
+ split = line.split '||'
84
+ options[:url] << split[0].strip
85
+ options[:path] << split[1].strip
86
+ }
79
87
  end
80
88
  end.parse!
81
89
 
82
90
  if options[:url].empty?
83
- abort 'URL or list of URLs is required.'
91
+ abort 'An URL or a list of URLs is required.'
84
92
  elsif options[:path].empty?
85
93
  abort 'Path is required.'
86
94
  end
87
95
 
88
96
  options[:url].each_with_index { |url, i|
89
97
  begin
90
-
91
- host = options.has_key?(:profile) ? options[:profile] : URI.parse(url).host.split(?.)[-2]
92
98
  Dir.mkdir(options[:path][i]) unless File.directory? options[:path][i]
93
-
94
- if Dumper::Profiles::list.include? host.gsub(?-, ?_)
95
- method = ('get_' + host.gsub(?-, ?_)).to_sym
96
- if options.include?(:from) && options.include?(:to)
97
- Dumper::Profiles::send method, url, options[:path][i], options[:from], options[:to]
98
- else
99
- Dumper::Profiles::send method, url, options[:path][i]
99
+
100
+ host = options[:profile] || URI.parse(url).host.split(?.)[-2]
101
+
102
+ if options[:xpath].nil? && Dumper::Profiles::list.include?(host.gsub(?-, ?_))
103
+ method = "get_#{host.gsub ?-, ?_}".to_sym
104
+ case
105
+ when options[:from] && options[:to] then Dumper::Profiles::send method, url, options[:path][i], options[:from], options[:to]
106
+ when options[:from] then Dumper::Profiles::send method, url, options[:path][i], options[:from]
107
+ else Dumper::Profiles::send method, url, options[:path][i]
100
108
  end
101
109
  else
102
110
  Dumper::Profiles::get_generic url, options[:path][i], options[:xpath]
103
111
  end
104
-
105
112
  rescue Nokogiri::XML::XPath::SyntaxError => e
106
- puts e.to_s.gsub(/expression/, 'xpath')
113
+ puts e.to_s.gsub /expression/, 'xpath'
107
114
  puts 'Cannot dump.'
108
- rescue OpenURI::HTTPError => e
115
+ rescue OpenURI::HTTPError => e
109
116
  puts "Error opening #{url}: #{e}"
110
117
  rescue URI::InvalidURIError => e
111
118
  puts "URL #{url} is not valid: #{e}"
data/lib/dumper.rb CHANGED
@@ -1,36 +1,38 @@
1
- #--
2
- # Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
3
- #
4
- # This file is part of Dumper.
5
- #
6
- # Smogon-API is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Smogon-API is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Smogon-API. If not, see <http://www.gnu.org/licenses/>.
18
- #++
19
-
20
- require 'open-uri'
21
- require 'uri'
22
- require 'optparse'
23
- require 'net/http'
24
- require 'nokogiri'
25
- require 'openssl'
26
- require 'certified'
27
- require 'addressable/uri'
28
- require 'base64'
29
-
30
- require 'dumper/utils'
31
- Dir.glob(File.expand_path("../dumper/profiles/*.rb", __FILE__)).each { |f|
32
- require "dumper/profiles/#{File.basename(f).split(?.)[0]}"
33
- }
34
- require 'dumper/dumper'
35
-
36
- require 'dumper/version'
1
+ #--
2
+ # Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
3
+ #
4
+ # This file is part of Dumper.
5
+ #
6
+ # Smogon-API is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Smogon-API is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Smogon-API. If not, see <http://www.gnu.org/licenses/>.
18
+ #++
19
+
20
+ require 'open-uri'
21
+ require 'net/http'
22
+ require 'uri'
23
+ require 'optparse'
24
+ require 'base64'
25
+ require 'nokogiri'
26
+ require 'openssl'
27
+ require 'certified'
28
+ require 'addressable/uri'
29
+ require 'json'
30
+ require 'thread/pool'
31
+
32
+ require 'dumper/utils'
33
+ require 'dumper/dumper'
34
+ require 'dumper/version'
35
+
36
+ Dir.glob(File.expand_path("../dumper/profiles/*.rb", __FILE__)).each { |f|
37
+ require "dumper/profiles/#{File.basename(f).split(?.)[0]}"
38
+ }
data/lib/dumper/dumper.rb CHANGED
@@ -17,60 +17,122 @@
17
17
  # along with Dumper. If not, see <http://www.gnu.org/licenses/>.
18
18
  #++
19
19
 
20
- module Dumper; module Profiles
21
-
22
- def self.list
23
- Dir.glob(File.expand_path('../profiles/*.rb', __FILE__)).sort { |a, b| b <=> a }.map { |f|
24
- f = File.basename(f).split(?.)[0]
25
- }
26
- end
27
-
28
- def self.get(path, p, ua = '', ref = '', filename = '')
29
- p = p.to_s
30
-
31
- begin
32
- if p.start_with? 'data:image/'
33
- filename = File.join path, filename == '' ? rand(1000).to_s + '.' + p.split('data:image/')[1].split(?;)[0] : filename
34
- filename.gsub!(File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR)
35
- if File.exists? filename
36
- puts "File #{filename} already exists."
37
- else
38
- puts "Downloading base64 image as #{filename}..."
39
- p.gsub!(/data:image\/png;base64,/, '')
40
- File.open(filename, 'wb') { |f| f.write Base64.decode64(p) }
41
- end
42
- else
43
- filename = File.join path, filename == '' ? File.basename(p) : filename
44
- filename.gsub!(File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR)
45
- if File.exists? filename
46
- puts "File #{filename} already exists."
47
- else
48
- filename = File.join path, rand(1000).to_s + '.jpg' unless filename[-4] == ?. || filename[-5] == ?.
49
- puts "Downloading #{p} as #{filename}..."
50
- File.open(filename, 'wb') { |f| f.write open(p, 'User-Agent' => ua, 'Referer' => ref).read }
51
- end
20
+ module Dumper
21
+ module Profiles
22
+ USER_AGENT = 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:16.0) Gecko/20100101 Firefox/16.0'
23
+
24
+ class Profile
25
+ include Dumper::Profiles
26
+
27
+ def initialize(&block)
28
+ min = pool_size[:min]
29
+ max = pool_size[:max]
30
+
31
+ @pool = Thread.pool min, max
32
+ puts "Using #{min}:#{max || min} threads..."
33
+
34
+ instance_eval &block
35
+ end
36
+
37
+ def dump(url, path, *args)
38
+ raise NotImplementedError
39
+ end
40
+
41
+ def shutdown
42
+ @pool.shutdown
52
43
  end
53
- rescue Exception => e
54
- p e
55
- puts "Error downloading \#{p}."
56
- return false
57
44
  end
58
- return true
59
- end
60
45
 
61
- def self.get_generic(url, path, xpath)
62
- uri = nil
63
- Nokogiri::HTML(open(url)).xpath(xpath).each { |p|
64
- if p.to_s.start_with? ?/
65
- uri = URI(url) if uri.nil?
66
- p = "#{uri.scheme}://#{uri.host}#{p}"
46
+ def pool_size
47
+ {
48
+ min: @min || 4,
49
+ max: @max
50
+ }
51
+ end
52
+
53
+ def verbose?
54
+ @verbose == nil || @verbose == true
55
+ end
56
+
57
+ class << self
58
+ def pool_size=(min, max = nil)
59
+ @min = min
60
+ @max = max
67
61
  end
68
- self.get path, p
69
- }
70
- end
71
-
72
- def method_missing(method, *args, &block)
73
- "'#{method.split('get_')[1]}' profile not found."
62
+
63
+ def verbose=(verbose)
64
+ @verbose = verbose
65
+ end
66
+
67
+ def verbose?
68
+ @verbose == nil || @verbose == true
69
+ end
70
+
71
+ def list
72
+ Dir.glob(File.expand_path('../profiles/*.rb', __FILE__)).sort { |a, b| b <=> a }.map { |f|
73
+ f = File.basename(f).split(?.)[0]
74
+ }
75
+ end
76
+
77
+ def get(path, url, options = {})
78
+ url = url.to_s
79
+
80
+ begin
81
+ if url.start_with? 'data:image/'
82
+ filename = File.join path, options[:filename] || rand(1000).to_s + '.' + url.split('data:image/')[1].split(?;)[0]
83
+ filename.gsub! File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR
84
+
85
+ url.gsub /data:image\/png;base64,/, ''
86
+
87
+ if File.exists? filename
88
+ puts "File #{filename} already exists." if verbose?
89
+ else
90
+ puts "Downloading base64 image as #{filename}..." if verbose?
91
+ File.open(filename, 'wb') { |f|
92
+ f.write Base64.decode64(url)
93
+ }
94
+ end
95
+ else
96
+ filename = File.join path, options[:filename] || File.basename(url)
97
+ filename.gsub! File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR
98
+
99
+ if File.exists? filename
100
+ puts "File #{filename} already exists." if verbose?
101
+ else
102
+ filename = File.join(path, rand(1000).to_s + '.jpg') unless filename[-4] == ?. || filename[-5] == ?.
103
+ puts "Downloading #{url} as #{filename}..." if verbose?
104
+
105
+ File.open(filename, 'wb') { |f|
106
+ f.write open(url,
107
+ 'User-Agent' => options[:user_agent] || USER_AGENT,
108
+ 'Referer' => options[:referer ] || url
109
+ ).read
110
+ }
111
+ end
112
+ end
113
+ rescue Exception => e
114
+ p e
115
+ puts "Error downloading #{url}."
116
+ return false
117
+ end
118
+
119
+ true
120
+ end
121
+
122
+ def get_generic(url, path, xpath)
123
+ uri = nil
124
+ Nokogiri::HTML(open(url)).xpath(xpath).each { |p|
125
+ if p.to_s.start_with? ?/
126
+ uri = URI(url) if uri.nil?
127
+ p = "#{uri.scheme}://#{uri.host}#{p}"
128
+ end
129
+ get path, p
130
+ }
131
+ end
132
+ end
133
+
134
+ def method_missing(method, *args, &block)
135
+ "'#{method.split('get_')[1]}' profile not found."
136
+ end
74
137
  end
75
-
76
- end; end
138
+ end
@@ -1,34 +1,50 @@
1
- #--
2
- # Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
3
- #
4
- # This file is part of Dumper.
5
- #
6
- # Dumper is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Dumper is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Dumper. If not, see <http://www.gnu.org/licenses/>.
18
- #++
19
-
20
- module Dumper
21
- module Profiles
22
-
23
- def self.get_4chan(url, path, from = 1, to = 1)
24
- Nokogiri::HTML(open(url)).xpath('//a[@class = "fileThumb"]/@href').each { |p|
25
- self.get path, "http:#{p}"
26
- }
27
- end
28
-
29
- def self.info_4chan
30
- { :from => false, :to => false }
31
- end
32
-
33
- end
1
+ #--
2
+ # Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
3
+ #
4
+ # This file is part of Dumper.
5
+ #
6
+ # Dumper is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Dumper is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Dumper. If not, see <http://www.gnu.org/licenses/>.
18
+ #++
19
+
20
+ module Dumper
21
+ module Profiles
22
+
23
+ class FourChan < Profile
24
+ def dump(url, path, from, to)
25
+ from -= 1
26
+ to -= 1 if to >= 1
27
+
28
+ Nokogiri::HTML(open(url)).xpath('//a[@class = "fileThumb"]/@href')[from..to].each { |p|
29
+ @pool.process {
30
+ Dumper::Profiles.get path, "http:#{p}"
31
+ }
32
+ }
33
+ end
34
+ end
35
+
36
+ class << self
37
+ def get_4chan(url, path, from = 1, to = -1)
38
+ FourChan.new { |p|
39
+ p.dump url, path, from, to
40
+ p.shutdown
41
+ }
42
+ end
43
+
44
+ def info_4chan
45
+ { from: :enabled, to: :enabled, type: :images }
46
+ end
47
+ end
48
+
49
+ end
34
50
  end