image-dumper 0.6.3 → 0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/dumper +18 -14
- data/lib/dumper.rb +6 -0
- data/lib/dumper/dumper.rb +97 -99
- data/lib/dumper/logger.rb +61 -0
- data/lib/dumper/profile.rb +46 -0
- data/lib/dumper/profiles.rb +24 -0
- data/lib/dumper/profiles/4chan.rb +1 -1
- data/lib/dumper/profiles/behoimi.rb +2 -2
- data/lib/dumper/profiles/booru.rb +3 -2
- data/lib/dumper/profiles/deviantart.rb +1 -1
- data/lib/dumper/profiles/fakku.rb +3 -2
- data/lib/dumper/profiles/fc2.rb +1 -1
- data/lib/dumper/profiles/gelbooru.rb +3 -2
- data/lib/dumper/profiles/imagebam.rb +1 -1
- data/lib/dumper/profiles/mangaeden.rb +2 -2
- data/lib/dumper/profiles/mangago.rb +1 -1
- data/lib/dumper/profiles/mangahere.rb +1 -1
- data/lib/dumper/profiles/multiplayer.rb +1 -1
- data/lib/dumper/profiles/redblow.rb +1 -1
- data/lib/dumper/profiles/sankakucomplex.rb +9 -4
- data/lib/dumper/profiles/teca.rb +1 -1
- data/lib/dumper/profiles/yande.rb +5 -4
- data/lib/dumper/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 06a97439e109b4b00205dc78978c35e810ad4990
|
4
|
+
data.tar.gz: e564cf847b687c07c3e7496377e57f3a7d40265a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e213fdd13e3318788c99a60b4ad749a60927c561e03802f834b4d7d29371f08a07c04c68fa8e3fba16ded775ff211f388397400c989580522920f90ddef7b68f
|
7
|
+
data.tar.gz: c0e9703a9be61dfe13d948b5b45d0bf512725579d2f807e00e7e5da014a72d4def01992985f89a37747b80a7ac41c3b35a9854b5e01c34b031692f3cf82ca6ee
|
data/bin/dumper
CHANGED
@@ -27,14 +27,14 @@ OptionParser.new do |o|
|
|
27
27
|
|
28
28
|
o.on '-l', '--list', 'Show available profiles' do
|
29
29
|
abort 'Profiles available:'.tap { |s|
|
30
|
-
Dumper
|
30
|
+
Dumper.list.sort { |a, b| a <=> b }.each { |p| s << "\n" + (' ' * 3) + p }
|
31
31
|
} if ARGV.empty?
|
32
32
|
end
|
33
33
|
|
34
34
|
o.on '-i', '--info PROFILE', 'Get info about profiles' do |profile|
|
35
|
-
if Dumper::Profiles
|
35
|
+
if Dumper::Profiles.list.include? profile
|
36
36
|
method = ("info_#{profile}").to_sym
|
37
|
-
Dumper::Profiles
|
37
|
+
Dumper::Profiles.send(method).tap { |i|
|
38
38
|
puts "Option 'from' is #{i[:from] ? 'available' : 'not available'}."
|
39
39
|
puts "Option 'to' is #{i[:to] ? 'available' : 'not available'}."
|
40
40
|
abort "It dumps #{i[:type]}."
|
@@ -48,11 +48,11 @@ OptionParser.new do |o|
|
|
48
48
|
options[:url] << url
|
49
49
|
end
|
50
50
|
|
51
|
-
o.on '-
|
51
|
+
o.on '-f', '--from PAGE', 'Start to save from... (if allowed)' do |pages|
|
52
52
|
options[:from] = pages.to_i
|
53
53
|
end
|
54
54
|
|
55
|
-
o.on '-
|
55
|
+
o.on '-t', '--to PAGE', 'Finish to save at... (if allowed)' do |pages|
|
56
56
|
options[:to] = pages.to_i
|
57
57
|
end
|
58
58
|
|
@@ -70,14 +70,18 @@ OptionParser.new do |o|
|
|
70
70
|
|
71
71
|
o.on '-t', '--threads MIN:MAX', 'Set the number of threads to use' do |threads|
|
72
72
|
threads = threads.split ?:
|
73
|
-
Dumper
|
73
|
+
Dumper.pool_size = threads[0].to_i, threads[1].to_i
|
74
74
|
end
|
75
75
|
|
76
|
-
o.on '-
|
77
|
-
Dumper
|
76
|
+
o.on '-m', '--silence', 'Print only important messages' do
|
77
|
+
Dumper.shut_up!
|
78
78
|
end
|
79
79
|
|
80
|
-
o.on '-
|
80
|
+
o.on '-o', '--output ', 'Print the logs on the given file' do |file|
|
81
|
+
Dumper::Logger.redirect_on :file, file
|
82
|
+
end
|
83
|
+
|
84
|
+
o.on '-s', '--file FILE', 'File containing in each line "URL||Folder"' do |file|
|
81
85
|
file = File.open(file).read.gsub(/\r\n?/, "\n")
|
82
86
|
file.each_line { |line|
|
83
87
|
split = line.split '||'
|
@@ -99,15 +103,15 @@ options[:url].each_with_index { |url, i|
|
|
99
103
|
|
100
104
|
host = options[:profile] || URI.parse(url).host.split(?.)[-2]
|
101
105
|
|
102
|
-
if options[:xpath].nil? && Dumper
|
106
|
+
if options[:xpath].nil? && Dumper.list.include?(host.gsub(?-, ?_))
|
103
107
|
method = "get_#{host.gsub ?-, ?_}".to_sym
|
104
108
|
case
|
105
|
-
when options[:from] && options[:to] then Dumper::Profiles
|
106
|
-
when options[:from] then Dumper::Profiles
|
107
|
-
else Dumper::Profiles
|
109
|
+
when options[:from] && options[:to] then Dumper::Profiles.send method, url, options[:path][i], options[:from], options[:to]
|
110
|
+
when options[:from] then Dumper::Profiles.send method, url, options[:path][i], options[:from]
|
111
|
+
else Dumper::Profiles.send method, url, options[:path][i]
|
108
112
|
end
|
109
113
|
else
|
110
|
-
Dumper::Profiles
|
114
|
+
Dumper::Profiles.get_generic url, options[:path][i], options[:xpath]
|
111
115
|
end
|
112
116
|
rescue Nokogiri::XML::XPath::SyntaxError => e
|
113
117
|
puts e.to_s.gsub /expression/, 'xpath'
|
data/lib/dumper.rb
CHANGED
@@ -28,11 +28,17 @@ require 'certified'
|
|
28
28
|
require 'addressable/uri'
|
29
29
|
require 'json'
|
30
30
|
require 'thread/pool'
|
31
|
+
require 'observer'
|
31
32
|
|
32
33
|
require 'dumper/utils'
|
34
|
+
require 'dumper/logger'
|
35
|
+
require 'dumper/profile'
|
36
|
+
require 'dumper/profiles'
|
33
37
|
require 'dumper/dumper'
|
34
38
|
require 'dumper/version'
|
35
39
|
|
36
40
|
Dir.glob(File.expand_path("../dumper/profiles/*.rb", __FILE__)).each { |f|
|
37
41
|
require "dumper/profiles/#{File.basename(f).split(?.)[0]}"
|
38
42
|
}
|
43
|
+
|
44
|
+
Dumper.add_observer Dumper::Logger.new
|
data/lib/dumper/dumper.rb
CHANGED
@@ -18,128 +18,126 @@
|
|
18
18
|
#++
|
19
19
|
|
20
20
|
module Dumper
|
21
|
-
|
22
|
-
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:16.0) Gecko/20100101 Firefox/16.0'
|
21
|
+
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:16.0) Gecko/20100101 Firefox/16.0'
|
23
22
|
|
24
|
-
|
25
|
-
|
23
|
+
def pool_size
|
24
|
+
{
|
25
|
+
min: @min || 4,
|
26
|
+
max: @max
|
27
|
+
}
|
28
|
+
end
|
26
29
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
+
def verbose?
|
31
|
+
@verbose == nil || @verbose == true
|
32
|
+
end
|
33
|
+
alias_method :mute?, :verbose?
|
30
34
|
|
31
|
-
|
32
|
-
|
35
|
+
def mute?
|
36
|
+
@verbose == false
|
37
|
+
end
|
38
|
+
alias_method :muted?, :mute?
|
33
39
|
|
34
|
-
|
35
|
-
|
40
|
+
class << self
|
41
|
+
include Observable
|
36
42
|
|
37
|
-
|
38
|
-
|
39
|
-
|
43
|
+
def pool_size=(min, max = nil)
|
44
|
+
@min = min
|
45
|
+
@max = max
|
46
|
+
end
|
40
47
|
|
41
|
-
|
42
|
-
|
43
|
-
end
|
48
|
+
def verbose=(verbose)
|
49
|
+
@verbose = verbose
|
44
50
|
end
|
45
51
|
|
46
|
-
def
|
47
|
-
|
48
|
-
min: @min || 4,
|
49
|
-
max: @max
|
50
|
-
}
|
52
|
+
def shut_up!
|
53
|
+
@verbose == false
|
51
54
|
end
|
55
|
+
alias_method :mute!, :shut_up!
|
56
|
+
|
57
|
+
def verbose!
|
58
|
+
@verbose == true
|
59
|
+
end
|
60
|
+
alias_method :unmute!, :verbose!
|
52
61
|
|
53
62
|
def verbose?
|
54
63
|
@verbose == nil || @verbose == true
|
55
64
|
end
|
56
65
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
+
def list
|
67
|
+
Dir.glob(File.expand_path('../profiles/*.rb', __FILE__)).sort { |a, b| b <=> a }.map { |f|
|
68
|
+
f = File.basename(f).split(?.)[0]
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
def get(path, url, options = {})
|
73
|
+
url = url.to_s
|
74
|
+
errors = 0
|
75
|
+
|
76
|
+
begin
|
77
|
+
if url.start_with? 'data:image/'
|
78
|
+
filename = File.join path, options[:filename] || rand(1000).to_s + '.' + url.split('data:image/')[1].split(?;)[0]
|
79
|
+
filename.gsub! File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR
|
66
80
|
|
67
|
-
|
68
|
-
@verbose == nil || @verbose == true
|
69
|
-
end
|
81
|
+
url.gsub /data:image\/png;base64,/, ''
|
70
82
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
}
|
75
|
-
end
|
76
|
-
|
77
|
-
def get(path, url, options = {})
|
78
|
-
url = url.to_s
|
79
|
-
errors = 0
|
80
|
-
|
81
|
-
begin
|
82
|
-
if url.start_with? 'data:image/'
|
83
|
-
filename = File.join path, options[:filename] || rand(1000).to_s + '.' + url.split('data:image/')[1].split(?;)[0]
|
84
|
-
filename.gsub! File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR
|
85
|
-
|
86
|
-
url.gsub /data:image\/png;base64,/, ''
|
87
|
-
|
88
|
-
if File.exists? filename
|
89
|
-
puts "File #{filename} already exists." if verbose?
|
90
|
-
else
|
91
|
-
puts "Downloading base64 image as #{filename}..." if verbose?
|
92
|
-
File.open(filename, 'wb') { |f|
|
93
|
-
f.write Base64.decode64(url)
|
94
|
-
}
|
95
|
-
end
|
83
|
+
if File.exists? filename
|
84
|
+
changed
|
85
|
+
notify_observers error: "File #{filename} already exists."
|
96
86
|
else
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
else
|
103
|
-
filename = File.join(path, rand(1000).to_s + '.jpg') unless filename[-4] == ?. || filename[-5] == ?.
|
104
|
-
puts "Downloading #{url} as #{filename}..." if verbose?
|
105
|
-
|
106
|
-
File.open(filename, 'wb') { |f|
|
107
|
-
f.write open(url,
|
108
|
-
'User-Agent' => options[:user_agent] || USER_AGENT,
|
109
|
-
'Referer' => options[:referer ] || url
|
110
|
-
).read
|
111
|
-
}
|
112
|
-
end
|
87
|
+
changed
|
88
|
+
notify_observers status: "Downloading base64 image as #{filename}..."
|
89
|
+
File.open(filename, 'wb') { |f|
|
90
|
+
f.write Base64.decode64(url)
|
91
|
+
}
|
113
92
|
end
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
93
|
+
else
|
94
|
+
filename = File.join path, options[:filename] || File.basename(url)
|
95
|
+
filename.gsub! File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR
|
96
|
+
|
97
|
+
if File.exists? filename
|
98
|
+
changed
|
99
|
+
notify_observers error: "File #{filename} already exists."
|
119
100
|
else
|
120
|
-
|
121
|
-
|
122
|
-
|
101
|
+
filename = File.join(path, rand(1000).to_s + '.jpg') unless filename[-4] == ?. || filename[-5] == ?.
|
102
|
+
changed
|
103
|
+
notify_observers status: "Downloading #{url} as #{filename}..."
|
104
|
+
|
105
|
+
File.open(filename, 'wb') { |f|
|
106
|
+
f.write open(url,
|
107
|
+
'User-Agent' => options[:user_agent] || USER_AGENT,
|
108
|
+
'Referer' => options[:referer ] || url
|
109
|
+
).read
|
110
|
+
}
|
123
111
|
end
|
124
112
|
end
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
end
|
136
|
-
get path, p
|
137
|
-
}
|
113
|
+
rescue Exception => e
|
114
|
+
if errors <= 3
|
115
|
+
errors += 1
|
116
|
+
sleep 3
|
117
|
+
retry
|
118
|
+
else
|
119
|
+
changed
|
120
|
+
notify_observers critical_error: "Error downloading #{url}.", critical_error_dump: e
|
121
|
+
return false
|
122
|
+
end
|
138
123
|
end
|
124
|
+
|
125
|
+
true
|
139
126
|
end
|
140
|
-
|
141
|
-
def
|
142
|
-
|
127
|
+
|
128
|
+
def get_generic(url, path, xpath)
|
129
|
+
uri = nil
|
130
|
+
Nokogiri::HTML(open(url)).xpath(xpath).each { |p|
|
131
|
+
if p.to_s.start_with? ?/
|
132
|
+
uri = URI(url) if uri.nil?
|
133
|
+
p = "#{uri.scheme}://#{uri.host}#{p}"
|
134
|
+
end
|
135
|
+
get path, p
|
136
|
+
}
|
143
137
|
end
|
144
138
|
end
|
139
|
+
|
140
|
+
def method_missing(method, *args, &block)
|
141
|
+
"'#{method.split('get_')[1]}' profile not found."
|
142
|
+
end
|
145
143
|
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
|
3
|
+
#
|
4
|
+
# This file is part of Dumper.
|
5
|
+
#
|
6
|
+
# Dumper is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Dumper is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with Dumper. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
#++
|
19
|
+
|
20
|
+
module Dumper
|
21
|
+
class Logger
|
22
|
+
|
23
|
+
class << self
|
24
|
+
def redirect_on(where, file)
|
25
|
+
@@where = where
|
26
|
+
@@file = !file || file.empty? ? 'dumper.log' : file
|
27
|
+
end
|
28
|
+
|
29
|
+
def log_on_file(file, data)
|
30
|
+
File.open(file, ?a) { |file|
|
31
|
+
data.each { |status, message|
|
32
|
+
file.puts status == :critical_error_dump ? message.inspect : message
|
33
|
+
}
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def log_on_screen(data)
|
38
|
+
data.each { |status, message|
|
39
|
+
if status == :critical_error_dump
|
40
|
+
p message
|
41
|
+
else
|
42
|
+
puts message
|
43
|
+
end
|
44
|
+
}
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def initialize
|
49
|
+
@@where = :screen
|
50
|
+
end
|
51
|
+
|
52
|
+
def update(data)
|
53
|
+
if @@where == :file
|
54
|
+
Logger.log_on_file @@file, data
|
55
|
+
else
|
56
|
+
Logger.log_on_screen(data) if Dumper.verbose?
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
|
3
|
+
#
|
4
|
+
# This file is part of Dumper.
|
5
|
+
#
|
6
|
+
# Dumper is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Dumper is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with Dumper. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
#++
|
19
|
+
|
20
|
+
module Dumper
|
21
|
+
class Profile
|
22
|
+
include Dumper
|
23
|
+
include Observable
|
24
|
+
|
25
|
+
def initialize(&block)
|
26
|
+
add_observer Dumper::Logger.new
|
27
|
+
|
28
|
+
min = pool_size[:min]
|
29
|
+
max = pool_size[:max]
|
30
|
+
|
31
|
+
@pool = Thread.pool min, max
|
32
|
+
changed
|
33
|
+
notify_observers error: "Using #{min}:#{max || min} threads..."
|
34
|
+
|
35
|
+
instance_eval &block
|
36
|
+
end
|
37
|
+
|
38
|
+
def dump(url, path, *args)
|
39
|
+
raise NotImplementedError
|
40
|
+
end
|
41
|
+
|
42
|
+
def shutdown
|
43
|
+
@pool.shutdown
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
|
3
|
+
#
|
4
|
+
# This file is part of Dumper.
|
5
|
+
#
|
6
|
+
# Dumper is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Dumper is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with Dumper. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
#++
|
19
|
+
|
20
|
+
module Dumper
|
21
|
+
module Profiles
|
22
|
+
Profile = Dumper::Profile
|
23
|
+
end
|
24
|
+
end
|
@@ -23,9 +23,9 @@ module Dumper
|
|
23
23
|
class Behoimi < Profile
|
24
24
|
def dump(url, path, from, to)
|
25
25
|
from.upto(to) { |i|
|
26
|
-
Nokogiri::HTML(open("#{url}&page=#{i}", 'User-Agent' => Dumper::
|
26
|
+
Nokogiri::HTML(open("#{url}&page=#{i}", 'User-Agent' => Dumper::USER_AGENT, 'Referer' => url)).xpath('//img[@class="preview "]/@src').each { |p|
|
27
27
|
@pool.process {
|
28
|
-
Dumper
|
28
|
+
Dumper.get path, p.to_s.gsub('preview/', ''), { referer: url }
|
29
29
|
}
|
30
30
|
}
|
31
31
|
}
|
@@ -25,11 +25,12 @@ module Dumper
|
|
25
25
|
page = 0
|
26
26
|
|
27
27
|
from.upto(to) { |i|
|
28
|
-
|
28
|
+
changed
|
29
|
+
notify_observers status: "--- Page #{i} ---"
|
29
30
|
|
30
31
|
Nokogiri::HTML(open("#{url}&pid=#{page}")).xpath('//span[@class="thumb"]').each { |u|
|
31
32
|
@pool.process {
|
32
|
-
Dumper
|
33
|
+
Dumper.get path, u.child.child['src'].gsub(/thumbs/, 'img').gsub(/thumbnails\//, 'images/').gsub(/thumbnail_/, '')
|
33
34
|
}
|
34
35
|
}
|
35
36
|
|
@@ -28,13 +28,14 @@ module Dumper
|
|
28
28
|
cdn = open(url).read.split('window.params.thumbs')[1].split('\/thumbs\/')[0].gsub(/\\\//m, ?/)[5..-1] + '/images/'
|
29
29
|
|
30
30
|
from.upto(to) { |i|
|
31
|
-
return if errors ==
|
31
|
+
return if errors == 3
|
32
32
|
|
33
33
|
file = "%03d.jpg" % i
|
34
34
|
filename = "#{cdn}#{file}"
|
35
35
|
|
36
36
|
@pool.process {
|
37
|
-
unless Dumper
|
37
|
+
unless Dumper.get path, URI.parse(URI.encode(filename, '[]')), { referer: url }
|
38
|
+
sleep 3
|
38
39
|
errors += 1
|
39
40
|
|
40
41
|
file = File.join(path, file).gsub(File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR)
|
data/lib/dumper/profiles/fc2.rb
CHANGED
@@ -27,7 +27,7 @@ module Dumper
|
|
27
27
|
|
28
28
|
Nokogiri::HTML(open(url)).xpath('//a[@target="_blank"]/@href')[from..to].each { |p|
|
29
29
|
@pool.process {
|
30
|
-
Dumper
|
30
|
+
Dumper.get(path, p) if p.to_s.end_with?('jpg') || p.to_s.end_with?('png')
|
31
31
|
}
|
32
32
|
}
|
33
33
|
end
|
@@ -25,11 +25,12 @@ module Dumper
|
|
25
25
|
page = 0
|
26
26
|
|
27
27
|
from.upto(to) { |i|
|
28
|
-
|
28
|
+
changed
|
29
|
+
notify_observers status: "--- Page #{i} ---"
|
29
30
|
|
30
31
|
Nokogiri::HTML(open("#{url}&pid=#{page}")).xpath('//span[@class="thumb"]').each { |u|
|
31
32
|
@pool.process {
|
32
|
-
Dumper
|
33
|
+
Dumper.get path, u.child.child['src'].gsub(/thumbnails/, 'images').gsub(/thumbnail_/, '')
|
33
34
|
}
|
34
35
|
}
|
35
36
|
|
@@ -33,7 +33,7 @@ module Dumper
|
|
33
33
|
|
34
34
|
page = Nokogiri::HTML open("http://www.mangaeden.com#{p['href']}")
|
35
35
|
|
36
|
-
Dumper
|
36
|
+
Dumper.get dir, page.at_xpath('//img[@id="mainImg"]/@src'), { filename: '1.png' }
|
37
37
|
i += 1
|
38
38
|
|
39
39
|
page.xpath('//a[@class="ui-state-default"]').each { |q|
|
@@ -41,7 +41,7 @@ module Dumper
|
|
41
41
|
|
42
42
|
Nokogiri::HTML(open("http://www.mangaeden.com#{q['href']}")).xpath('//img[@id="mainImg"]/@src').each { |r|
|
43
43
|
@pool.process {
|
44
|
-
Dumper
|
44
|
+
Dumper.get dir, r, { filename: "#{q.text.to_i}.png" }
|
45
45
|
}
|
46
46
|
}
|
47
47
|
}
|
@@ -31,7 +31,7 @@ module Dumper
|
|
31
31
|
url = page.at_xpath('//a[@id="pic_container"]/@href').to_s
|
32
32
|
scan = page.at_xpath('//img[@id="page1"]/@src').to_s[0..-3]
|
33
33
|
|
34
|
-
Dumper
|
34
|
+
Dumper.get path, scan, { referer: url, filename: "#{i}.#{scan.split(?.).last}" }
|
35
35
|
}
|
36
36
|
}
|
37
37
|
end
|
@@ -39,7 +39,7 @@ module Dumper
|
|
39
39
|
Nokogiri::HTML(open(chapter)).xpath('//select[@class="wid60"]/option').each { |q|
|
40
40
|
@pool.process {
|
41
41
|
scan = Nokogiri::HTML(open(q['value'])).xpath('//section[@id="viewer"]/a/img/@src')[0].to_s
|
42
|
-
Dumper
|
42
|
+
Dumper.get dir, scan, { filename: "#{q.text}.png" }
|
43
43
|
}
|
44
44
|
}
|
45
45
|
}
|
@@ -27,7 +27,7 @@ module Dumper
|
|
27
27
|
prefix = url.include?('idol.sankakucomplex') ? 'idol' : 'chan'
|
28
28
|
|
29
29
|
from.upto(to) { |page|
|
30
|
-
u = url
|
30
|
+
u = "#{url}&page=#{page}"
|
31
31
|
begin
|
32
32
|
op = open u
|
33
33
|
rescue Exception => e
|
@@ -37,13 +37,18 @@ module Dumper
|
|
37
37
|
|
38
38
|
Nokogiri::HTML(op).xpath('//a/@href').each { |p|
|
39
39
|
next unless p.to_s.start_with? '/post/show'
|
40
|
+
errors = 0
|
40
41
|
|
41
42
|
@pool.process {
|
42
43
|
begin
|
43
44
|
img = Nokogiri::HTML(open("http://#{prefix}.sankakucomplex.com/#{p}")).at_xpath('//a[@itemprop="contentUrl"]/@href').to_s
|
44
|
-
Dumper
|
45
|
+
Dumper.get path, img, { referer: u }
|
45
46
|
rescue Exception => e
|
46
|
-
|
47
|
+
if errors <= 3
|
48
|
+
sleep 3
|
49
|
+
errors += 1
|
50
|
+
retry
|
51
|
+
end
|
47
52
|
end
|
48
53
|
}
|
49
54
|
}
|
@@ -58,7 +63,7 @@ module Dumper
|
|
58
63
|
}
|
59
64
|
}[from..to].each { |p|
|
60
65
|
@pool.process {
|
61
|
-
Dumper
|
66
|
+
Dumper.get path, p, { referer: url }
|
62
67
|
}
|
63
68
|
}
|
64
69
|
end
|
data/lib/dumper/profiles/teca.rb
CHANGED
@@ -23,12 +23,13 @@ module Dumper
|
|
23
23
|
class YandeRe < Profile
|
24
24
|
def dump(url, path, from, to)
|
25
25
|
from.upto(to) { |i|
|
26
|
-
|
26
|
+
changed
|
27
|
+
notify_observers status: "--- Page #{i} ---"
|
27
28
|
|
28
|
-
Nokogiri::HTML(open("#{url}&page=#{i}", 'User-Agent' => Dumper::
|
29
|
+
Nokogiri::HTML(open("#{url}&page=#{i}", 'User-Agent' => Dumper::USER_AGENT, 'Referer' => url)).xpath('//a[@class="thumb"]/@href').each { |p|
|
29
30
|
@pool.process {
|
30
|
-
img = Nokogiri::HTML(open("https://yande.re#{p}", 'User-Agent' => Dumper::
|
31
|
-
Dumper
|
31
|
+
img = Nokogiri::HTML(open("https://yande.re#{p}", 'User-Agent' => Dumper::USER_AGENT, 'Referer' => url)).at_xpath('//img[@id="image"]/@src').text
|
32
|
+
Dumper.get path, img, { referer: url }
|
32
33
|
}
|
33
34
|
}
|
34
35
|
}
|
data/lib/dumper/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: image-dumper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.7'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Giovanni Capuano
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-01-
|
11
|
+
date: 2014-01-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -145,6 +145,8 @@ extensions: []
|
|
145
145
|
extra_rdoc_files: []
|
146
146
|
files:
|
147
147
|
- lib/dumper/dumper.rb
|
148
|
+
- lib/dumper/logger.rb
|
149
|
+
- lib/dumper/profile.rb
|
148
150
|
- lib/dumper/profiles/4chan.rb
|
149
151
|
- lib/dumper/profiles/behoimi.rb
|
150
152
|
- lib/dumper/profiles/booru.rb
|
@@ -161,6 +163,7 @@ files:
|
|
161
163
|
- lib/dumper/profiles/sankakucomplex.rb
|
162
164
|
- lib/dumper/profiles/teca.rb
|
163
165
|
- lib/dumper/profiles/yande.rb
|
166
|
+
- lib/dumper/profiles.rb
|
164
167
|
- lib/dumper/utils.rb
|
165
168
|
- lib/dumper/version.rb
|
166
169
|
- lib/dumper.rb
|