image-dumper 0.5.5 → 0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/dumper +47 -40
- data/lib/dumper.rb +38 -36
- data/lib/dumper/dumper.rb +114 -52
- data/lib/dumper/profiles/4chan.rb +49 -33
- data/lib/dumper/profiles/behoimi.rb +48 -38
- data/lib/dumper/profiles/booru.rb +54 -40
- data/lib/dumper/profiles/deviantart.rb +49 -33
- data/lib/dumper/profiles/fakku.rb +61 -51
- data/lib/dumper/profiles/fc2.rb +49 -33
- data/lib/dumper/profiles/gelbooru.rb +54 -40
- data/lib/dumper/profiles/imagebam.rb +55 -37
- data/lib/dumper/profiles/mangaeden.rb +69 -53
- data/lib/dumper/profiles/mangago.rb +53 -40
- data/lib/dumper/profiles/mangahere.rb +68 -49
- data/lib/dumper/profiles/multiplayer.rb +55 -41
- data/lib/dumper/profiles/redblow.rb +49 -36
- data/lib/dumper/profiles/sankakucomplex.rb +81 -58
- data/lib/dumper/profiles/teca.rb +54 -34
- data/lib/dumper/profiles/yande.rb +51 -42
- data/lib/dumper/utils.rb +40 -42
- data/lib/dumper/version.rb +23 -23
- data/spec/4chan_spec.rb +28 -0
- data/spec/behoimi_spec.rb +28 -0
- data/spec/booru_spec.rb +28 -0
- data/spec/deviantart_spec.rb +28 -0
- data/spec/fakku_spec.rb +28 -0
- data/spec/fc2_spec.rb +28 -0
- data/spec/gelbooru_spec.rb +28 -0
- data/spec/imagebam_spec.rb +28 -0
- data/spec/mangaeden_spec.rb +28 -0
- data/spec/mangago_spec.rb +28 -0
- data/spec/mangahere_spec.rb +28 -0
- data/spec/multiplayer_spec.rb +28 -0
- data/spec/redblow_spec.rb +28 -0
- data/spec/sankakucomplex_spec.rb +41 -0
- data/spec/teca_spec.rb +28 -0
- data/spec/yande_spec.rb +28 -0
- metadata +119 -6
- data/lib/dumper/profiles/i_doujin.rb +0 -38
- data/lib/dumper/profiles/mi9.rb +0 -44
- data/lib/dumper/profiles/wallpaperhere.rb +0 -43
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d620c7ba1dbc1bd22e170e520822d8711c1fd1f8
|
4
|
+
data.tar.gz: 61eb44fd3c2ed01b948acb3993081437bf65b8ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e7a28e8d7fb7b2ee24043ae8c3cbdbbcfe5ec7b1267a1445cf20d54cfd641bd0e586b1da3db9f983e61b0b8f77ecff178df72be1a54cbe467965e620c410d33
|
7
|
+
data.tar.gz: b6381b814c485917103bb4bb15ed6bf6aa2092e47ad5e963f2480fd649445e8800aba1e7371e92bb183d459a7bbc2cfe84eaea5b7dd57606d7b340423ceff009
|
data/bin/dumper
CHANGED
@@ -17,95 +17,102 @@
|
|
17
17
|
# You should have received a copy of the GNU General Public License
|
18
18
|
# along with Dumper. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#++
|
20
|
-
|
21
20
|
require 'dumper'
|
22
21
|
|
23
22
|
options = {}
|
24
23
|
|
25
24
|
OptionParser.new do |o|
|
26
|
-
options[:url]
|
25
|
+
options[:url ] = []
|
27
26
|
options[:path] = []
|
28
27
|
|
29
|
-
o.on '-l', '--list',
|
28
|
+
o.on '-l', '--list', 'Show available profiles' do
|
30
29
|
abort 'Profiles available:'.tap { |s|
|
31
30
|
Dumper::Profiles::list.sort { |a, b| a <=> b }.each { |p| s << "\n" + (' ' * 3) + p }
|
32
31
|
} if ARGV.empty?
|
33
32
|
end
|
34
33
|
|
35
|
-
o.on '-i', '--info PROFILE',
|
34
|
+
o.on '-i', '--info PROFILE', 'Get info about profiles' do |profile|
|
36
35
|
if Dumper::Profiles::list.include? profile
|
37
|
-
method = ("info_#{profile}").to_sym
|
36
|
+
method = ("info_#{profile}").to_sym
|
38
37
|
Dumper::Profiles::send(method).tap { |i|
|
39
|
-
puts "Option 'from' is #{i[:from] ? 'available' : 'not available'}"
|
40
|
-
puts "Option 'to' is #{i[:to] ? 'available' : 'not available'}"
|
38
|
+
puts "Option 'from' is #{i[:from] ? 'available' : 'not available'}."
|
39
|
+
puts "Option 'to' is #{i[:to] ? 'available' : 'not available'}."
|
40
|
+
abort "It dumps #{i[:type]}."
|
41
41
|
}
|
42
42
|
else
|
43
|
-
|
43
|
+
abort 'Profile not found.'
|
44
44
|
end
|
45
|
-
abort
|
46
45
|
end
|
47
46
|
|
48
|
-
o.on '-u', '--url URL',
|
49
|
-
options[:url]
|
47
|
+
o.on '-u', '--url URL', 'Target URL' do |url|
|
48
|
+
options[:url] << url
|
50
49
|
end
|
51
50
|
|
52
|
-
o.on '-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
51
|
+
o.on '-o', '--from PAGE', 'Start to save from... (if allowed)' do |pages|
|
52
|
+
options[:from] = pages.to_i
|
53
|
+
end
|
54
|
+
|
55
|
+
o.on '-g', '--to PAGE', 'Finish to save at... (if allowed)' do |pages|
|
56
|
+
options[:to] = pages.to_i
|
57
|
+
end
|
58
|
+
|
59
|
+
o.on '-p', '--path PATH', 'Target folder' do |path|
|
60
|
+
options[:path] << path
|
59
61
|
end
|
60
62
|
|
61
|
-
o.on '-
|
62
|
-
options[:
|
63
|
+
o.on '-x', '--xpath XPATH', 'Use a custom xpath' do |xpath|
|
64
|
+
options[:xpath] = xpath
|
63
65
|
end
|
64
66
|
|
65
|
-
o.on '-r', '--profile PROFILE', '
|
67
|
+
o.on '-r', '--profile PROFILE', 'Force the use of the given profile' do |profile|
|
66
68
|
options[:profile] = profile
|
67
69
|
end
|
68
70
|
|
69
|
-
o.on '-
|
70
|
-
|
71
|
+
o.on '-t', '--threads MIN:MAX', 'Set the number of threads to use' do |threads|
|
72
|
+
threads = threads.split ?:
|
73
|
+
Dumper::Profiles.pool_size = threads[0].to_i, threads[1].to_i
|
71
74
|
end
|
72
75
|
|
73
|
-
o.on '-
|
74
|
-
|
76
|
+
o.on '-s', '--silence', 'Print only important messages' do
|
77
|
+
Dumper::Profiles.verbose = false
|
75
78
|
end
|
76
79
|
|
77
|
-
o.on '-
|
78
|
-
|
80
|
+
o.on '-f', '--file FILE', 'File containing in each line "URL||Folder"' do |file|
|
81
|
+
file = File.open(file).read.gsub(/\r\n?/, "\n")
|
82
|
+
file.each_line { |line|
|
83
|
+
split = line.split '||'
|
84
|
+
options[:url] << split[0].strip
|
85
|
+
options[:path] << split[1].strip
|
86
|
+
}
|
79
87
|
end
|
80
88
|
end.parse!
|
81
89
|
|
82
90
|
if options[:url].empty?
|
83
|
-
abort 'URL or list of URLs is required.'
|
91
|
+
abort 'An URL or a list of URLs is required.'
|
84
92
|
elsif options[:path].empty?
|
85
93
|
abort 'Path is required.'
|
86
94
|
end
|
87
95
|
|
88
96
|
options[:url].each_with_index { |url, i|
|
89
97
|
begin
|
90
|
-
|
91
|
-
host = options.has_key?(:profile) ? options[:profile] : URI.parse(url).host.split(?.)[-2]
|
92
98
|
Dir.mkdir(options[:path][i]) unless File.directory? options[:path][i]
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
Dumper::Profiles::send method, url, options[:path][i]
|
99
|
+
|
100
|
+
host = options[:profile] || URI.parse(url).host.split(?.)[-2]
|
101
|
+
|
102
|
+
if options[:xpath].nil? && Dumper::Profiles::list.include?(host.gsub(?-, ?_))
|
103
|
+
method = "get_#{host.gsub ?-, ?_}".to_sym
|
104
|
+
case
|
105
|
+
when options[:from] && options[:to] then Dumper::Profiles::send method, url, options[:path][i], options[:from], options[:to]
|
106
|
+
when options[:from] then Dumper::Profiles::send method, url, options[:path][i], options[:from]
|
107
|
+
else Dumper::Profiles::send method, url, options[:path][i]
|
100
108
|
end
|
101
109
|
else
|
102
110
|
Dumper::Profiles::get_generic url, options[:path][i], options[:xpath]
|
103
111
|
end
|
104
|
-
|
105
112
|
rescue Nokogiri::XML::XPath::SyntaxError => e
|
106
|
-
puts e.to_s.gsub
|
113
|
+
puts e.to_s.gsub /expression/, 'xpath'
|
107
114
|
puts 'Cannot dump.'
|
108
|
-
rescue OpenURI::HTTPError
|
115
|
+
rescue OpenURI::HTTPError => e
|
109
116
|
puts "Error opening #{url}: #{e}"
|
110
117
|
rescue URI::InvalidURIError => e
|
111
118
|
puts "URL #{url} is not valid: #{e}"
|
data/lib/dumper.rb
CHANGED
@@ -1,36 +1,38 @@
|
|
1
|
-
#--
|
2
|
-
# Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
|
3
|
-
#
|
4
|
-
# This file is part of Dumper.
|
5
|
-
#
|
6
|
-
# Smogon-API is free software: you can redistribute it and/or modify
|
7
|
-
# it under the terms of the GNU General Public License as published by
|
8
|
-
# the Free Software Foundation, either version 3 of the License, or
|
9
|
-
# (at your option) any later version.
|
10
|
-
#
|
11
|
-
# Smogon-API is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
# GNU General Public License for more details.
|
15
|
-
#
|
16
|
-
# You should have received a copy of the GNU General Public License
|
17
|
-
# along with Smogon-API. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
#++
|
19
|
-
|
20
|
-
require 'open-uri'
|
21
|
-
require '
|
22
|
-
require '
|
23
|
-
require '
|
24
|
-
require '
|
25
|
-
require '
|
26
|
-
require '
|
27
|
-
require '
|
28
|
-
require '
|
29
|
-
|
30
|
-
require '
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
require 'dumper/
|
35
|
-
|
36
|
-
|
1
|
+
#--
|
2
|
+
# Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
|
3
|
+
#
|
4
|
+
# This file is part of Dumper.
|
5
|
+
#
|
6
|
+
# Smogon-API is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Smogon-API is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with Smogon-API. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require 'open-uri'
|
21
|
+
require 'net/http'
|
22
|
+
require 'uri'
|
23
|
+
require 'optparse'
|
24
|
+
require 'base64'
|
25
|
+
require 'nokogiri'
|
26
|
+
require 'openssl'
|
27
|
+
require 'certified'
|
28
|
+
require 'addressable/uri'
|
29
|
+
require 'json'
|
30
|
+
require 'thread/pool'
|
31
|
+
|
32
|
+
require 'dumper/utils'
|
33
|
+
require 'dumper/dumper'
|
34
|
+
require 'dumper/version'
|
35
|
+
|
36
|
+
Dir.glob(File.expand_path("../dumper/profiles/*.rb", __FILE__)).each { |f|
|
37
|
+
require "dumper/profiles/#{File.basename(f).split(?.)[0]}"
|
38
|
+
}
|
data/lib/dumper/dumper.rb
CHANGED
@@ -17,60 +17,122 @@
|
|
17
17
|
# along with Dumper. If not, see <http://www.gnu.org/licenses/>.
|
18
18
|
#++
|
19
19
|
|
20
|
-
module Dumper
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
filename = File.join path, filename == '' ? File.basename(p) : filename
|
44
|
-
filename.gsub!(File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR)
|
45
|
-
if File.exists? filename
|
46
|
-
puts "File #{filename} already exists."
|
47
|
-
else
|
48
|
-
filename = File.join path, rand(1000).to_s + '.jpg' unless filename[-4] == ?. || filename[-5] == ?.
|
49
|
-
puts "Downloading #{p} as #{filename}..."
|
50
|
-
File.open(filename, 'wb') { |f| f.write open(p, 'User-Agent' => ua, 'Referer' => ref).read }
|
51
|
-
end
|
20
|
+
module Dumper
|
21
|
+
module Profiles
|
22
|
+
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:16.0) Gecko/20100101 Firefox/16.0'
|
23
|
+
|
24
|
+
class Profile
|
25
|
+
include Dumper::Profiles
|
26
|
+
|
27
|
+
def initialize(&block)
|
28
|
+
min = pool_size[:min]
|
29
|
+
max = pool_size[:max]
|
30
|
+
|
31
|
+
@pool = Thread.pool min, max
|
32
|
+
puts "Using #{min}:#{max || min} threads..."
|
33
|
+
|
34
|
+
instance_eval &block
|
35
|
+
end
|
36
|
+
|
37
|
+
def dump(url, path, *args)
|
38
|
+
raise NotImplementedError
|
39
|
+
end
|
40
|
+
|
41
|
+
def shutdown
|
42
|
+
@pool.shutdown
|
52
43
|
end
|
53
|
-
rescue Exception => e
|
54
|
-
p e
|
55
|
-
puts "Error downloading \#{p}."
|
56
|
-
return false
|
57
44
|
end
|
58
|
-
return true
|
59
|
-
end
|
60
45
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
46
|
+
def pool_size
|
47
|
+
{
|
48
|
+
min: @min || 4,
|
49
|
+
max: @max
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
def verbose?
|
54
|
+
@verbose == nil || @verbose == true
|
55
|
+
end
|
56
|
+
|
57
|
+
class << self
|
58
|
+
def pool_size=(min, max = nil)
|
59
|
+
@min = min
|
60
|
+
@max = max
|
67
61
|
end
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
62
|
+
|
63
|
+
def verbose=(verbose)
|
64
|
+
@verbose = verbose
|
65
|
+
end
|
66
|
+
|
67
|
+
def verbose?
|
68
|
+
@verbose == nil || @verbose == true
|
69
|
+
end
|
70
|
+
|
71
|
+
def list
|
72
|
+
Dir.glob(File.expand_path('../profiles/*.rb', __FILE__)).sort { |a, b| b <=> a }.map { |f|
|
73
|
+
f = File.basename(f).split(?.)[0]
|
74
|
+
}
|
75
|
+
end
|
76
|
+
|
77
|
+
def get(path, url, options = {})
|
78
|
+
url = url.to_s
|
79
|
+
|
80
|
+
begin
|
81
|
+
if url.start_with? 'data:image/'
|
82
|
+
filename = File.join path, options[:filename] || rand(1000).to_s + '.' + url.split('data:image/')[1].split(?;)[0]
|
83
|
+
filename.gsub! File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR
|
84
|
+
|
85
|
+
url.gsub /data:image\/png;base64,/, ''
|
86
|
+
|
87
|
+
if File.exists? filename
|
88
|
+
puts "File #{filename} already exists." if verbose?
|
89
|
+
else
|
90
|
+
puts "Downloading base64 image as #{filename}..." if verbose?
|
91
|
+
File.open(filename, 'wb') { |f|
|
92
|
+
f.write Base64.decode64(url)
|
93
|
+
}
|
94
|
+
end
|
95
|
+
else
|
96
|
+
filename = File.join path, options[:filename] || File.basename(url)
|
97
|
+
filename.gsub! File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR
|
98
|
+
|
99
|
+
if File.exists? filename
|
100
|
+
puts "File #{filename} already exists." if verbose?
|
101
|
+
else
|
102
|
+
filename = File.join(path, rand(1000).to_s + '.jpg') unless filename[-4] == ?. || filename[-5] == ?.
|
103
|
+
puts "Downloading #{url} as #{filename}..." if verbose?
|
104
|
+
|
105
|
+
File.open(filename, 'wb') { |f|
|
106
|
+
f.write open(url,
|
107
|
+
'User-Agent' => options[:user_agent] || USER_AGENT,
|
108
|
+
'Referer' => options[:referer ] || url
|
109
|
+
).read
|
110
|
+
}
|
111
|
+
end
|
112
|
+
end
|
113
|
+
rescue Exception => e
|
114
|
+
p e
|
115
|
+
puts "Error downloading #{url}."
|
116
|
+
return false
|
117
|
+
end
|
118
|
+
|
119
|
+
true
|
120
|
+
end
|
121
|
+
|
122
|
+
def get_generic(url, path, xpath)
|
123
|
+
uri = nil
|
124
|
+
Nokogiri::HTML(open(url)).xpath(xpath).each { |p|
|
125
|
+
if p.to_s.start_with? ?/
|
126
|
+
uri = URI(url) if uri.nil?
|
127
|
+
p = "#{uri.scheme}://#{uri.host}#{p}"
|
128
|
+
end
|
129
|
+
get path, p
|
130
|
+
}
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def method_missing(method, *args, &block)
|
135
|
+
"'#{method.split('get_')[1]}' profile not found."
|
136
|
+
end
|
74
137
|
end
|
75
|
-
|
76
|
-
end; end
|
138
|
+
end
|
@@ -1,34 +1,50 @@
|
|
1
|
-
#--
|
2
|
-
# Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
|
3
|
-
#
|
4
|
-
# This file is part of Dumper.
|
5
|
-
#
|
6
|
-
# Dumper is free software: you can redistribute it and/or modify
|
7
|
-
# it under the terms of the GNU General Public License as published by
|
8
|
-
# the Free Software Foundation, either version 3 of the License, or
|
9
|
-
# (at your option) any later version.
|
10
|
-
#
|
11
|
-
# Dumper is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
# GNU General Public License for more details.
|
15
|
-
#
|
16
|
-
# You should have received a copy of the GNU General Public License
|
17
|
-
# along with Dumper. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
#++
|
19
|
-
|
20
|
-
module Dumper
|
21
|
-
module Profiles
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
1
|
+
#--
|
2
|
+
# Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
|
3
|
+
#
|
4
|
+
# This file is part of Dumper.
|
5
|
+
#
|
6
|
+
# Dumper is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Dumper is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with Dumper. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
#++
|
19
|
+
|
20
|
+
module Dumper
|
21
|
+
module Profiles
|
22
|
+
|
23
|
+
class FourChan < Profile
|
24
|
+
def dump(url, path, from, to)
|
25
|
+
from -= 1
|
26
|
+
to -= 1 if to >= 1
|
27
|
+
|
28
|
+
Nokogiri::HTML(open(url)).xpath('//a[@class = "fileThumb"]/@href')[from..to].each { |p|
|
29
|
+
@pool.process {
|
30
|
+
Dumper::Profiles.get path, "http:#{p}"
|
31
|
+
}
|
32
|
+
}
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class << self
|
37
|
+
def get_4chan(url, path, from = 1, to = -1)
|
38
|
+
FourChan.new { |p|
|
39
|
+
p.dump url, path, from, to
|
40
|
+
p.shutdown
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
def info_4chan
|
45
|
+
{ from: :enabled, to: :enabled, type: :images }
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
34
50
|
end
|