image-dumper 0.5.5 → 0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/dumper +47 -40
- data/lib/dumper.rb +38 -36
- data/lib/dumper/dumper.rb +114 -52
- data/lib/dumper/profiles/4chan.rb +49 -33
- data/lib/dumper/profiles/behoimi.rb +48 -38
- data/lib/dumper/profiles/booru.rb +54 -40
- data/lib/dumper/profiles/deviantart.rb +49 -33
- data/lib/dumper/profiles/fakku.rb +61 -51
- data/lib/dumper/profiles/fc2.rb +49 -33
- data/lib/dumper/profiles/gelbooru.rb +54 -40
- data/lib/dumper/profiles/imagebam.rb +55 -37
- data/lib/dumper/profiles/mangaeden.rb +69 -53
- data/lib/dumper/profiles/mangago.rb +53 -40
- data/lib/dumper/profiles/mangahere.rb +68 -49
- data/lib/dumper/profiles/multiplayer.rb +55 -41
- data/lib/dumper/profiles/redblow.rb +49 -36
- data/lib/dumper/profiles/sankakucomplex.rb +81 -58
- data/lib/dumper/profiles/teca.rb +54 -34
- data/lib/dumper/profiles/yande.rb +51 -42
- data/lib/dumper/utils.rb +40 -42
- data/lib/dumper/version.rb +23 -23
- data/spec/4chan_spec.rb +28 -0
- data/spec/behoimi_spec.rb +28 -0
- data/spec/booru_spec.rb +28 -0
- data/spec/deviantart_spec.rb +28 -0
- data/spec/fakku_spec.rb +28 -0
- data/spec/fc2_spec.rb +28 -0
- data/spec/gelbooru_spec.rb +28 -0
- data/spec/imagebam_spec.rb +28 -0
- data/spec/mangaeden_spec.rb +28 -0
- data/spec/mangago_spec.rb +28 -0
- data/spec/mangahere_spec.rb +28 -0
- data/spec/multiplayer_spec.rb +28 -0
- data/spec/redblow_spec.rb +28 -0
- data/spec/sankakucomplex_spec.rb +41 -0
- data/spec/teca_spec.rb +28 -0
- data/spec/yande_spec.rb +28 -0
- metadata +119 -6
- data/lib/dumper/profiles/i_doujin.rb +0 -38
- data/lib/dumper/profiles/mi9.rb +0 -44
- data/lib/dumper/profiles/wallpaperhere.rb +0 -43
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d620c7ba1dbc1bd22e170e520822d8711c1fd1f8
|
4
|
+
data.tar.gz: 61eb44fd3c2ed01b948acb3993081437bf65b8ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e7a28e8d7fb7b2ee24043ae8c3cbdbbcfe5ec7b1267a1445cf20d54cfd641bd0e586b1da3db9f983e61b0b8f77ecff178df72be1a54cbe467965e620c410d33
|
7
|
+
data.tar.gz: b6381b814c485917103bb4bb15ed6bf6aa2092e47ad5e963f2480fd649445e8800aba1e7371e92bb183d459a7bbc2cfe84eaea5b7dd57606d7b340423ceff009
|
data/bin/dumper
CHANGED
@@ -17,95 +17,102 @@
|
|
17
17
|
# You should have received a copy of the GNU General Public License
|
18
18
|
# along with Dumper. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#++
|
20
|
-
|
21
20
|
require 'dumper'
|
22
21
|
|
23
22
|
options = {}
|
24
23
|
|
25
24
|
OptionParser.new do |o|
|
26
|
-
options[:url]
|
25
|
+
options[:url ] = []
|
27
26
|
options[:path] = []
|
28
27
|
|
29
|
-
o.on '-l', '--list',
|
28
|
+
o.on '-l', '--list', 'Show available profiles' do
|
30
29
|
abort 'Profiles available:'.tap { |s|
|
31
30
|
Dumper::Profiles::list.sort { |a, b| a <=> b }.each { |p| s << "\n" + (' ' * 3) + p }
|
32
31
|
} if ARGV.empty?
|
33
32
|
end
|
34
33
|
|
35
|
-
o.on '-i', '--info PROFILE',
|
34
|
+
o.on '-i', '--info PROFILE', 'Get info about profiles' do |profile|
|
36
35
|
if Dumper::Profiles::list.include? profile
|
37
|
-
method = ("info_#{profile}").to_sym
|
36
|
+
method = ("info_#{profile}").to_sym
|
38
37
|
Dumper::Profiles::send(method).tap { |i|
|
39
|
-
puts "Option 'from' is #{i[:from] ? 'available' : 'not available'}"
|
40
|
-
puts "Option 'to' is #{i[:to] ? 'available' : 'not available'}"
|
38
|
+
puts "Option 'from' is #{i[:from] ? 'available' : 'not available'}."
|
39
|
+
puts "Option 'to' is #{i[:to] ? 'available' : 'not available'}."
|
40
|
+
abort "It dumps #{i[:type]}."
|
41
41
|
}
|
42
42
|
else
|
43
|
-
|
43
|
+
abort 'Profile not found.'
|
44
44
|
end
|
45
|
-
abort
|
46
45
|
end
|
47
46
|
|
48
|
-
o.on '-u', '--url URL',
|
49
|
-
options[:url]
|
47
|
+
o.on '-u', '--url URL', 'Target URL' do |url|
|
48
|
+
options[:url] << url
|
50
49
|
end
|
51
50
|
|
52
|
-
o.on '-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
51
|
+
o.on '-o', '--from PAGE', 'Start to save from... (if allowed)' do |pages|
|
52
|
+
options[:from] = pages.to_i
|
53
|
+
end
|
54
|
+
|
55
|
+
o.on '-g', '--to PAGE', 'Finish to save at... (if allowed)' do |pages|
|
56
|
+
options[:to] = pages.to_i
|
57
|
+
end
|
58
|
+
|
59
|
+
o.on '-p', '--path PATH', 'Target folder' do |path|
|
60
|
+
options[:path] << path
|
59
61
|
end
|
60
62
|
|
61
|
-
o.on '-
|
62
|
-
options[:
|
63
|
+
o.on '-x', '--xpath XPATH', 'Use a custom xpath' do |xpath|
|
64
|
+
options[:xpath] = xpath
|
63
65
|
end
|
64
66
|
|
65
|
-
o.on '-r', '--profile PROFILE', '
|
67
|
+
o.on '-r', '--profile PROFILE', 'Force the use of the given profile' do |profile|
|
66
68
|
options[:profile] = profile
|
67
69
|
end
|
68
70
|
|
69
|
-
o.on '-
|
70
|
-
|
71
|
+
o.on '-t', '--threads MIN:MAX', 'Set the number of threads to use' do |threads|
|
72
|
+
threads = threads.split ?:
|
73
|
+
Dumper::Profiles.pool_size = threads[0].to_i, threads[1].to_i
|
71
74
|
end
|
72
75
|
|
73
|
-
o.on '-
|
74
|
-
|
76
|
+
o.on '-s', '--silence', 'Print only important messages' do
|
77
|
+
Dumper::Profiles.verbose = false
|
75
78
|
end
|
76
79
|
|
77
|
-
o.on '-
|
78
|
-
|
80
|
+
o.on '-f', '--file FILE', 'File containing in each line "URL||Folder"' do |file|
|
81
|
+
file = File.open(file).read.gsub(/\r\n?/, "\n")
|
82
|
+
file.each_line { |line|
|
83
|
+
split = line.split '||'
|
84
|
+
options[:url] << split[0].strip
|
85
|
+
options[:path] << split[1].strip
|
86
|
+
}
|
79
87
|
end
|
80
88
|
end.parse!
|
81
89
|
|
82
90
|
if options[:url].empty?
|
83
|
-
abort 'URL or list of URLs is required.'
|
91
|
+
abort 'An URL or a list of URLs is required.'
|
84
92
|
elsif options[:path].empty?
|
85
93
|
abort 'Path is required.'
|
86
94
|
end
|
87
95
|
|
88
96
|
options[:url].each_with_index { |url, i|
|
89
97
|
begin
|
90
|
-
|
91
|
-
host = options.has_key?(:profile) ? options[:profile] : URI.parse(url).host.split(?.)[-2]
|
92
98
|
Dir.mkdir(options[:path][i]) unless File.directory? options[:path][i]
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
Dumper::Profiles::send method, url, options[:path][i]
|
99
|
+
|
100
|
+
host = options[:profile] || URI.parse(url).host.split(?.)[-2]
|
101
|
+
|
102
|
+
if options[:xpath].nil? && Dumper::Profiles::list.include?(host.gsub(?-, ?_))
|
103
|
+
method = "get_#{host.gsub ?-, ?_}".to_sym
|
104
|
+
case
|
105
|
+
when options[:from] && options[:to] then Dumper::Profiles::send method, url, options[:path][i], options[:from], options[:to]
|
106
|
+
when options[:from] then Dumper::Profiles::send method, url, options[:path][i], options[:from]
|
107
|
+
else Dumper::Profiles::send method, url, options[:path][i]
|
100
108
|
end
|
101
109
|
else
|
102
110
|
Dumper::Profiles::get_generic url, options[:path][i], options[:xpath]
|
103
111
|
end
|
104
|
-
|
105
112
|
rescue Nokogiri::XML::XPath::SyntaxError => e
|
106
|
-
puts e.to_s.gsub
|
113
|
+
puts e.to_s.gsub /expression/, 'xpath'
|
107
114
|
puts 'Cannot dump.'
|
108
|
-
rescue OpenURI::HTTPError
|
115
|
+
rescue OpenURI::HTTPError => e
|
109
116
|
puts "Error opening #{url}: #{e}"
|
110
117
|
rescue URI::InvalidURIError => e
|
111
118
|
puts "URL #{url} is not valid: #{e}"
|
data/lib/dumper.rb
CHANGED
@@ -1,36 +1,38 @@
|
|
1
|
-
#--
|
2
|
-
# Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
|
3
|
-
#
|
4
|
-
# This file is part of Dumper.
|
5
|
-
#
|
6
|
-
# Smogon-API is free software: you can redistribute it and/or modify
|
7
|
-
# it under the terms of the GNU General Public License as published by
|
8
|
-
# the Free Software Foundation, either version 3 of the License, or
|
9
|
-
# (at your option) any later version.
|
10
|
-
#
|
11
|
-
# Smogon-API is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
# GNU General Public License for more details.
|
15
|
-
#
|
16
|
-
# You should have received a copy of the GNU General Public License
|
17
|
-
# along with Smogon-API. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
#++
|
19
|
-
|
20
|
-
require 'open-uri'
|
21
|
-
require '
|
22
|
-
require '
|
23
|
-
require '
|
24
|
-
require '
|
25
|
-
require '
|
26
|
-
require '
|
27
|
-
require '
|
28
|
-
require '
|
29
|
-
|
30
|
-
require '
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
require 'dumper/
|
35
|
-
|
36
|
-
|
1
|
+
#--
|
2
|
+
# Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
|
3
|
+
#
|
4
|
+
# This file is part of Dumper.
|
5
|
+
#
|
6
|
+
# Smogon-API is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Smogon-API is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with Smogon-API. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require 'open-uri'
|
21
|
+
require 'net/http'
|
22
|
+
require 'uri'
|
23
|
+
require 'optparse'
|
24
|
+
require 'base64'
|
25
|
+
require 'nokogiri'
|
26
|
+
require 'openssl'
|
27
|
+
require 'certified'
|
28
|
+
require 'addressable/uri'
|
29
|
+
require 'json'
|
30
|
+
require 'thread/pool'
|
31
|
+
|
32
|
+
require 'dumper/utils'
|
33
|
+
require 'dumper/dumper'
|
34
|
+
require 'dumper/version'
|
35
|
+
|
36
|
+
Dir.glob(File.expand_path("../dumper/profiles/*.rb", __FILE__)).each { |f|
|
37
|
+
require "dumper/profiles/#{File.basename(f).split(?.)[0]}"
|
38
|
+
}
|
data/lib/dumper/dumper.rb
CHANGED
@@ -17,60 +17,122 @@
|
|
17
17
|
# along with Dumper. If not, see <http://www.gnu.org/licenses/>.
|
18
18
|
#++
|
19
19
|
|
20
|
-
module Dumper
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
filename = File.join path, filename == '' ? File.basename(p) : filename
|
44
|
-
filename.gsub!(File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR)
|
45
|
-
if File.exists? filename
|
46
|
-
puts "File #{filename} already exists."
|
47
|
-
else
|
48
|
-
filename = File.join path, rand(1000).to_s + '.jpg' unless filename[-4] == ?. || filename[-5] == ?.
|
49
|
-
puts "Downloading #{p} as #{filename}..."
|
50
|
-
File.open(filename, 'wb') { |f| f.write open(p, 'User-Agent' => ua, 'Referer' => ref).read }
|
51
|
-
end
|
20
|
+
module Dumper
|
21
|
+
module Profiles
|
22
|
+
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:16.0) Gecko/20100101 Firefox/16.0'
|
23
|
+
|
24
|
+
class Profile
|
25
|
+
include Dumper::Profiles
|
26
|
+
|
27
|
+
def initialize(&block)
|
28
|
+
min = pool_size[:min]
|
29
|
+
max = pool_size[:max]
|
30
|
+
|
31
|
+
@pool = Thread.pool min, max
|
32
|
+
puts "Using #{min}:#{max || min} threads..."
|
33
|
+
|
34
|
+
instance_eval &block
|
35
|
+
end
|
36
|
+
|
37
|
+
def dump(url, path, *args)
|
38
|
+
raise NotImplementedError
|
39
|
+
end
|
40
|
+
|
41
|
+
def shutdown
|
42
|
+
@pool.shutdown
|
52
43
|
end
|
53
|
-
rescue Exception => e
|
54
|
-
p e
|
55
|
-
puts "Error downloading \#{p}."
|
56
|
-
return false
|
57
44
|
end
|
58
|
-
return true
|
59
|
-
end
|
60
45
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
46
|
+
def pool_size
|
47
|
+
{
|
48
|
+
min: @min || 4,
|
49
|
+
max: @max
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
def verbose?
|
54
|
+
@verbose == nil || @verbose == true
|
55
|
+
end
|
56
|
+
|
57
|
+
class << self
|
58
|
+
def pool_size=(min, max = nil)
|
59
|
+
@min = min
|
60
|
+
@max = max
|
67
61
|
end
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
62
|
+
|
63
|
+
def verbose=(verbose)
|
64
|
+
@verbose = verbose
|
65
|
+
end
|
66
|
+
|
67
|
+
def verbose?
|
68
|
+
@verbose == nil || @verbose == true
|
69
|
+
end
|
70
|
+
|
71
|
+
def list
|
72
|
+
Dir.glob(File.expand_path('../profiles/*.rb', __FILE__)).sort { |a, b| b <=> a }.map { |f|
|
73
|
+
f = File.basename(f).split(?.)[0]
|
74
|
+
}
|
75
|
+
end
|
76
|
+
|
77
|
+
def get(path, url, options = {})
|
78
|
+
url = url.to_s
|
79
|
+
|
80
|
+
begin
|
81
|
+
if url.start_with? 'data:image/'
|
82
|
+
filename = File.join path, options[:filename] || rand(1000).to_s + '.' + url.split('data:image/')[1].split(?;)[0]
|
83
|
+
filename.gsub! File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR
|
84
|
+
|
85
|
+
url.gsub /data:image\/png;base64,/, ''
|
86
|
+
|
87
|
+
if File.exists? filename
|
88
|
+
puts "File #{filename} already exists." if verbose?
|
89
|
+
else
|
90
|
+
puts "Downloading base64 image as #{filename}..." if verbose?
|
91
|
+
File.open(filename, 'wb') { |f|
|
92
|
+
f.write Base64.decode64(url)
|
93
|
+
}
|
94
|
+
end
|
95
|
+
else
|
96
|
+
filename = File.join path, options[:filename] || File.basename(url)
|
97
|
+
filename.gsub! File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR
|
98
|
+
|
99
|
+
if File.exists? filename
|
100
|
+
puts "File #{filename} already exists." if verbose?
|
101
|
+
else
|
102
|
+
filename = File.join(path, rand(1000).to_s + '.jpg') unless filename[-4] == ?. || filename[-5] == ?.
|
103
|
+
puts "Downloading #{url} as #{filename}..." if verbose?
|
104
|
+
|
105
|
+
File.open(filename, 'wb') { |f|
|
106
|
+
f.write open(url,
|
107
|
+
'User-Agent' => options[:user_agent] || USER_AGENT,
|
108
|
+
'Referer' => options[:referer ] || url
|
109
|
+
).read
|
110
|
+
}
|
111
|
+
end
|
112
|
+
end
|
113
|
+
rescue Exception => e
|
114
|
+
p e
|
115
|
+
puts "Error downloading #{url}."
|
116
|
+
return false
|
117
|
+
end
|
118
|
+
|
119
|
+
true
|
120
|
+
end
|
121
|
+
|
122
|
+
def get_generic(url, path, xpath)
|
123
|
+
uri = nil
|
124
|
+
Nokogiri::HTML(open(url)).xpath(xpath).each { |p|
|
125
|
+
if p.to_s.start_with? ?/
|
126
|
+
uri = URI(url) if uri.nil?
|
127
|
+
p = "#{uri.scheme}://#{uri.host}#{p}"
|
128
|
+
end
|
129
|
+
get path, p
|
130
|
+
}
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def method_missing(method, *args, &block)
|
135
|
+
"'#{method.split('get_')[1]}' profile not found."
|
136
|
+
end
|
74
137
|
end
|
75
|
-
|
76
|
-
end; end
|
138
|
+
end
|
@@ -1,34 +1,50 @@
|
|
1
|
-
#--
|
2
|
-
# Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
|
3
|
-
#
|
4
|
-
# This file is part of Dumper.
|
5
|
-
#
|
6
|
-
# Dumper is free software: you can redistribute it and/or modify
|
7
|
-
# it under the terms of the GNU General Public License as published by
|
8
|
-
# the Free Software Foundation, either version 3 of the License, or
|
9
|
-
# (at your option) any later version.
|
10
|
-
#
|
11
|
-
# Dumper is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
# GNU General Public License for more details.
|
15
|
-
#
|
16
|
-
# You should have received a copy of the GNU General Public License
|
17
|
-
# along with Dumper. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
#++
|
19
|
-
|
20
|
-
module Dumper
|
21
|
-
module Profiles
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
1
|
+
#--
|
2
|
+
# Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
|
3
|
+
#
|
4
|
+
# This file is part of Dumper.
|
5
|
+
#
|
6
|
+
# Dumper is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Dumper is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with Dumper. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
#++
|
19
|
+
|
20
|
+
module Dumper
|
21
|
+
module Profiles
|
22
|
+
|
23
|
+
class FourChan < Profile
|
24
|
+
def dump(url, path, from, to)
|
25
|
+
from -= 1
|
26
|
+
to -= 1 if to >= 1
|
27
|
+
|
28
|
+
Nokogiri::HTML(open(url)).xpath('//a[@class = "fileThumb"]/@href')[from..to].each { |p|
|
29
|
+
@pool.process {
|
30
|
+
Dumper::Profiles.get path, "http:#{p}"
|
31
|
+
}
|
32
|
+
}
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class << self
|
37
|
+
def get_4chan(url, path, from = 1, to = -1)
|
38
|
+
FourChan.new { |p|
|
39
|
+
p.dump url, path, from, to
|
40
|
+
p.shutdown
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
def info_4chan
|
45
|
+
{ from: :enabled, to: :enabled, type: :images }
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
34
50
|
end
|