image-dumper 0.6.3 → 0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/dumper +18 -14
- data/lib/dumper.rb +6 -0
- data/lib/dumper/dumper.rb +97 -99
- data/lib/dumper/logger.rb +61 -0
- data/lib/dumper/profile.rb +46 -0
- data/lib/dumper/profiles.rb +24 -0
- data/lib/dumper/profiles/4chan.rb +1 -1
- data/lib/dumper/profiles/behoimi.rb +2 -2
- data/lib/dumper/profiles/booru.rb +3 -2
- data/lib/dumper/profiles/deviantart.rb +1 -1
- data/lib/dumper/profiles/fakku.rb +3 -2
- data/lib/dumper/profiles/fc2.rb +1 -1
- data/lib/dumper/profiles/gelbooru.rb +3 -2
- data/lib/dumper/profiles/imagebam.rb +1 -1
- data/lib/dumper/profiles/mangaeden.rb +2 -2
- data/lib/dumper/profiles/mangago.rb +1 -1
- data/lib/dumper/profiles/mangahere.rb +1 -1
- data/lib/dumper/profiles/multiplayer.rb +1 -1
- data/lib/dumper/profiles/redblow.rb +1 -1
- data/lib/dumper/profiles/sankakucomplex.rb +9 -4
- data/lib/dumper/profiles/teca.rb +1 -1
- data/lib/dumper/profiles/yande.rb +5 -4
- data/lib/dumper/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 06a97439e109b4b00205dc78978c35e810ad4990
|
4
|
+
data.tar.gz: e564cf847b687c07c3e7496377e57f3a7d40265a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e213fdd13e3318788c99a60b4ad749a60927c561e03802f834b4d7d29371f08a07c04c68fa8e3fba16ded775ff211f388397400c989580522920f90ddef7b68f
|
7
|
+
data.tar.gz: c0e9703a9be61dfe13d948b5b45d0bf512725579d2f807e00e7e5da014a72d4def01992985f89a37747b80a7ac41c3b35a9854b5e01c34b031692f3cf82ca6ee
|
data/bin/dumper
CHANGED
@@ -27,14 +27,14 @@ OptionParser.new do |o|
|
|
27
27
|
|
28
28
|
o.on '-l', '--list', 'Show available profiles' do
|
29
29
|
abort 'Profiles available:'.tap { |s|
|
30
|
-
Dumper
|
30
|
+
Dumper.list.sort { |a, b| a <=> b }.each { |p| s << "\n" + (' ' * 3) + p }
|
31
31
|
} if ARGV.empty?
|
32
32
|
end
|
33
33
|
|
34
34
|
o.on '-i', '--info PROFILE', 'Get info about profiles' do |profile|
|
35
|
-
if Dumper::Profiles
|
35
|
+
if Dumper::Profiles.list.include? profile
|
36
36
|
method = ("info_#{profile}").to_sym
|
37
|
-
Dumper::Profiles
|
37
|
+
Dumper::Profiles.send(method).tap { |i|
|
38
38
|
puts "Option 'from' is #{i[:from] ? 'available' : 'not available'}."
|
39
39
|
puts "Option 'to' is #{i[:to] ? 'available' : 'not available'}."
|
40
40
|
abort "It dumps #{i[:type]}."
|
@@ -48,11 +48,11 @@ OptionParser.new do |o|
|
|
48
48
|
options[:url] << url
|
49
49
|
end
|
50
50
|
|
51
|
-
o.on '-
|
51
|
+
o.on '-f', '--from PAGE', 'Start to save from... (if allowed)' do |pages|
|
52
52
|
options[:from] = pages.to_i
|
53
53
|
end
|
54
54
|
|
55
|
-
o.on '-
|
55
|
+
o.on '-t', '--to PAGE', 'Finish to save at... (if allowed)' do |pages|
|
56
56
|
options[:to] = pages.to_i
|
57
57
|
end
|
58
58
|
|
@@ -70,14 +70,18 @@ OptionParser.new do |o|
|
|
70
70
|
|
71
71
|
o.on '-t', '--threads MIN:MAX', 'Set the number of threads to use' do |threads|
|
72
72
|
threads = threads.split ?:
|
73
|
-
Dumper
|
73
|
+
Dumper.pool_size = threads[0].to_i, threads[1].to_i
|
74
74
|
end
|
75
75
|
|
76
|
-
o.on '-
|
77
|
-
Dumper
|
76
|
+
o.on '-m', '--silence', 'Print only important messages' do
|
77
|
+
Dumper.shut_up!
|
78
78
|
end
|
79
79
|
|
80
|
-
o.on '-
|
80
|
+
o.on '-o', '--output ', 'Print the logs on the given file' do |file|
|
81
|
+
Dumper::Logger.redirect_on :file, file
|
82
|
+
end
|
83
|
+
|
84
|
+
o.on '-s', '--file FILE', 'File containing in each line "URL||Folder"' do |file|
|
81
85
|
file = File.open(file).read.gsub(/\r\n?/, "\n")
|
82
86
|
file.each_line { |line|
|
83
87
|
split = line.split '||'
|
@@ -99,15 +103,15 @@ options[:url].each_with_index { |url, i|
|
|
99
103
|
|
100
104
|
host = options[:profile] || URI.parse(url).host.split(?.)[-2]
|
101
105
|
|
102
|
-
if options[:xpath].nil? && Dumper
|
106
|
+
if options[:xpath].nil? && Dumper.list.include?(host.gsub(?-, ?_))
|
103
107
|
method = "get_#{host.gsub ?-, ?_}".to_sym
|
104
108
|
case
|
105
|
-
when options[:from] && options[:to] then Dumper::Profiles
|
106
|
-
when options[:from] then Dumper::Profiles
|
107
|
-
else Dumper::Profiles
|
109
|
+
when options[:from] && options[:to] then Dumper::Profiles.send method, url, options[:path][i], options[:from], options[:to]
|
110
|
+
when options[:from] then Dumper::Profiles.send method, url, options[:path][i], options[:from]
|
111
|
+
else Dumper::Profiles.send method, url, options[:path][i]
|
108
112
|
end
|
109
113
|
else
|
110
|
-
Dumper::Profiles
|
114
|
+
Dumper::Profiles.get_generic url, options[:path][i], options[:xpath]
|
111
115
|
end
|
112
116
|
rescue Nokogiri::XML::XPath::SyntaxError => e
|
113
117
|
puts e.to_s.gsub /expression/, 'xpath'
|
data/lib/dumper.rb
CHANGED
@@ -28,11 +28,17 @@ require 'certified'
|
|
28
28
|
require 'addressable/uri'
|
29
29
|
require 'json'
|
30
30
|
require 'thread/pool'
|
31
|
+
require 'observer'
|
31
32
|
|
32
33
|
require 'dumper/utils'
|
34
|
+
require 'dumper/logger'
|
35
|
+
require 'dumper/profile'
|
36
|
+
require 'dumper/profiles'
|
33
37
|
require 'dumper/dumper'
|
34
38
|
require 'dumper/version'
|
35
39
|
|
36
40
|
Dir.glob(File.expand_path("../dumper/profiles/*.rb", __FILE__)).each { |f|
|
37
41
|
require "dumper/profiles/#{File.basename(f).split(?.)[0]}"
|
38
42
|
}
|
43
|
+
|
44
|
+
Dumper.add_observer Dumper::Logger.new
|
data/lib/dumper/dumper.rb
CHANGED
@@ -18,128 +18,126 @@
|
|
18
18
|
#++
|
19
19
|
|
20
20
|
module Dumper
|
21
|
-
|
22
|
-
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:16.0) Gecko/20100101 Firefox/16.0'
|
21
|
+
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:16.0) Gecko/20100101 Firefox/16.0'
|
23
22
|
|
24
|
-
|
25
|
-
|
23
|
+
def pool_size
|
24
|
+
{
|
25
|
+
min: @min || 4,
|
26
|
+
max: @max
|
27
|
+
}
|
28
|
+
end
|
26
29
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
+
def verbose?
|
31
|
+
@verbose == nil || @verbose == true
|
32
|
+
end
|
33
|
+
alias_method :mute?, :verbose?
|
30
34
|
|
31
|
-
|
32
|
-
|
35
|
+
def mute?
|
36
|
+
@verbose == false
|
37
|
+
end
|
38
|
+
alias_method :muted?, :mute?
|
33
39
|
|
34
|
-
|
35
|
-
|
40
|
+
class << self
|
41
|
+
include Observable
|
36
42
|
|
37
|
-
|
38
|
-
|
39
|
-
|
43
|
+
def pool_size=(min, max = nil)
|
44
|
+
@min = min
|
45
|
+
@max = max
|
46
|
+
end
|
40
47
|
|
41
|
-
|
42
|
-
|
43
|
-
end
|
48
|
+
def verbose=(verbose)
|
49
|
+
@verbose = verbose
|
44
50
|
end
|
45
51
|
|
46
|
-
def
|
47
|
-
|
48
|
-
min: @min || 4,
|
49
|
-
max: @max
|
50
|
-
}
|
52
|
+
def shut_up!
|
53
|
+
@verbose == false
|
51
54
|
end
|
55
|
+
alias_method :mute!, :shut_up!
|
56
|
+
|
57
|
+
def verbose!
|
58
|
+
@verbose == true
|
59
|
+
end
|
60
|
+
alias_method :unmute!, :verbose!
|
52
61
|
|
53
62
|
def verbose?
|
54
63
|
@verbose == nil || @verbose == true
|
55
64
|
end
|
56
65
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
+
def list
|
67
|
+
Dir.glob(File.expand_path('../profiles/*.rb', __FILE__)).sort { |a, b| b <=> a }.map { |f|
|
68
|
+
f = File.basename(f).split(?.)[0]
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
def get(path, url, options = {})
|
73
|
+
url = url.to_s
|
74
|
+
errors = 0
|
75
|
+
|
76
|
+
begin
|
77
|
+
if url.start_with? 'data:image/'
|
78
|
+
filename = File.join path, options[:filename] || rand(1000).to_s + '.' + url.split('data:image/')[1].split(?;)[0]
|
79
|
+
filename.gsub! File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR
|
66
80
|
|
67
|
-
|
68
|
-
@verbose == nil || @verbose == true
|
69
|
-
end
|
81
|
+
url.gsub /data:image\/png;base64,/, ''
|
70
82
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
}
|
75
|
-
end
|
76
|
-
|
77
|
-
def get(path, url, options = {})
|
78
|
-
url = url.to_s
|
79
|
-
errors = 0
|
80
|
-
|
81
|
-
begin
|
82
|
-
if url.start_with? 'data:image/'
|
83
|
-
filename = File.join path, options[:filename] || rand(1000).to_s + '.' + url.split('data:image/')[1].split(?;)[0]
|
84
|
-
filename.gsub! File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR
|
85
|
-
|
86
|
-
url.gsub /data:image\/png;base64,/, ''
|
87
|
-
|
88
|
-
if File.exists? filename
|
89
|
-
puts "File #{filename} already exists." if verbose?
|
90
|
-
else
|
91
|
-
puts "Downloading base64 image as #{filename}..." if verbose?
|
92
|
-
File.open(filename, 'wb') { |f|
|
93
|
-
f.write Base64.decode64(url)
|
94
|
-
}
|
95
|
-
end
|
83
|
+
if File.exists? filename
|
84
|
+
changed
|
85
|
+
notify_observers error: "File #{filename} already exists."
|
96
86
|
else
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
else
|
103
|
-
filename = File.join(path, rand(1000).to_s + '.jpg') unless filename[-4] == ?. || filename[-5] == ?.
|
104
|
-
puts "Downloading #{url} as #{filename}..." if verbose?
|
105
|
-
|
106
|
-
File.open(filename, 'wb') { |f|
|
107
|
-
f.write open(url,
|
108
|
-
'User-Agent' => options[:user_agent] || USER_AGENT,
|
109
|
-
'Referer' => options[:referer ] || url
|
110
|
-
).read
|
111
|
-
}
|
112
|
-
end
|
87
|
+
changed
|
88
|
+
notify_observers status: "Downloading base64 image as #{filename}..."
|
89
|
+
File.open(filename, 'wb') { |f|
|
90
|
+
f.write Base64.decode64(url)
|
91
|
+
}
|
113
92
|
end
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
93
|
+
else
|
94
|
+
filename = File.join path, options[:filename] || File.basename(url)
|
95
|
+
filename.gsub! File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR
|
96
|
+
|
97
|
+
if File.exists? filename
|
98
|
+
changed
|
99
|
+
notify_observers error: "File #{filename} already exists."
|
119
100
|
else
|
120
|
-
|
121
|
-
|
122
|
-
|
101
|
+
filename = File.join(path, rand(1000).to_s + '.jpg') unless filename[-4] == ?. || filename[-5] == ?.
|
102
|
+
changed
|
103
|
+
notify_observers status: "Downloading #{url} as #{filename}..."
|
104
|
+
|
105
|
+
File.open(filename, 'wb') { |f|
|
106
|
+
f.write open(url,
|
107
|
+
'User-Agent' => options[:user_agent] || USER_AGENT,
|
108
|
+
'Referer' => options[:referer ] || url
|
109
|
+
).read
|
110
|
+
}
|
123
111
|
end
|
124
112
|
end
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
end
|
136
|
-
get path, p
|
137
|
-
}
|
113
|
+
rescue Exception => e
|
114
|
+
if errors <= 3
|
115
|
+
errors += 1
|
116
|
+
sleep 3
|
117
|
+
retry
|
118
|
+
else
|
119
|
+
changed
|
120
|
+
notify_observers critical_error: "Error downloading #{url}.", critical_error_dump: e
|
121
|
+
return false
|
122
|
+
end
|
138
123
|
end
|
124
|
+
|
125
|
+
true
|
139
126
|
end
|
140
|
-
|
141
|
-
def
|
142
|
-
|
127
|
+
|
128
|
+
def get_generic(url, path, xpath)
|
129
|
+
uri = nil
|
130
|
+
Nokogiri::HTML(open(url)).xpath(xpath).each { |p|
|
131
|
+
if p.to_s.start_with? ?/
|
132
|
+
uri = URI(url) if uri.nil?
|
133
|
+
p = "#{uri.scheme}://#{uri.host}#{p}"
|
134
|
+
end
|
135
|
+
get path, p
|
136
|
+
}
|
143
137
|
end
|
144
138
|
end
|
139
|
+
|
140
|
+
def method_missing(method, *args, &block)
|
141
|
+
"'#{method.split('get_')[1]}' profile not found."
|
142
|
+
end
|
145
143
|
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
|
3
|
+
#
|
4
|
+
# This file is part of Dumper.
|
5
|
+
#
|
6
|
+
# Dumper is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Dumper is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with Dumper. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
#++
|
19
|
+
|
20
|
+
module Dumper
|
21
|
+
class Logger
|
22
|
+
|
23
|
+
class << self
|
24
|
+
def redirect_on(where, file)
|
25
|
+
@@where = where
|
26
|
+
@@file = !file || file.empty? ? 'dumper.log' : file
|
27
|
+
end
|
28
|
+
|
29
|
+
def log_on_file(file, data)
|
30
|
+
File.open(file, ?a) { |file|
|
31
|
+
data.each { |status, message|
|
32
|
+
file.puts status == :critical_error_dump ? message.inspect : message
|
33
|
+
}
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def log_on_screen(data)
|
38
|
+
data.each { |status, message|
|
39
|
+
if status == :critical_error_dump
|
40
|
+
p message
|
41
|
+
else
|
42
|
+
puts message
|
43
|
+
end
|
44
|
+
}
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def initialize
|
49
|
+
@@where = :screen
|
50
|
+
end
|
51
|
+
|
52
|
+
def update(data)
|
53
|
+
if @@where == :file
|
54
|
+
Logger.log_on_file @@file, data
|
55
|
+
else
|
56
|
+
Logger.log_on_screen(data) if Dumper.verbose?
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
|
3
|
+
#
|
4
|
+
# This file is part of Dumper.
|
5
|
+
#
|
6
|
+
# Dumper is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Dumper is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with Dumper. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
#++
|
19
|
+
|
20
|
+
module Dumper
|
21
|
+
class Profile
|
22
|
+
include Dumper
|
23
|
+
include Observable
|
24
|
+
|
25
|
+
def initialize(&block)
|
26
|
+
add_observer Dumper::Logger.new
|
27
|
+
|
28
|
+
min = pool_size[:min]
|
29
|
+
max = pool_size[:max]
|
30
|
+
|
31
|
+
@pool = Thread.pool min, max
|
32
|
+
changed
|
33
|
+
notify_observers error: "Using #{min}:#{max || min} threads..."
|
34
|
+
|
35
|
+
instance_eval &block
|
36
|
+
end
|
37
|
+
|
38
|
+
def dump(url, path, *args)
|
39
|
+
raise NotImplementedError
|
40
|
+
end
|
41
|
+
|
42
|
+
def shutdown
|
43
|
+
@pool.shutdown
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright(C) 2013 Giovanni Capuano <webmaster@giovannicapuano.net>
|
3
|
+
#
|
4
|
+
# This file is part of Dumper.
|
5
|
+
#
|
6
|
+
# Dumper is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Dumper is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with Dumper. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
#++
|
19
|
+
|
20
|
+
module Dumper
|
21
|
+
module Profiles
|
22
|
+
Profile = Dumper::Profile
|
23
|
+
end
|
24
|
+
end
|
@@ -23,9 +23,9 @@ module Dumper
|
|
23
23
|
class Behoimi < Profile
|
24
24
|
def dump(url, path, from, to)
|
25
25
|
from.upto(to) { |i|
|
26
|
-
Nokogiri::HTML(open("#{url}&page=#{i}", 'User-Agent' => Dumper::
|
26
|
+
Nokogiri::HTML(open("#{url}&page=#{i}", 'User-Agent' => Dumper::USER_AGENT, 'Referer' => url)).xpath('//img[@class="preview "]/@src').each { |p|
|
27
27
|
@pool.process {
|
28
|
-
Dumper
|
28
|
+
Dumper.get path, p.to_s.gsub('preview/', ''), { referer: url }
|
29
29
|
}
|
30
30
|
}
|
31
31
|
}
|
@@ -25,11 +25,12 @@ module Dumper
|
|
25
25
|
page = 0
|
26
26
|
|
27
27
|
from.upto(to) { |i|
|
28
|
-
|
28
|
+
changed
|
29
|
+
notify_observers status: "--- Page #{i} ---"
|
29
30
|
|
30
31
|
Nokogiri::HTML(open("#{url}&pid=#{page}")).xpath('//span[@class="thumb"]').each { |u|
|
31
32
|
@pool.process {
|
32
|
-
Dumper
|
33
|
+
Dumper.get path, u.child.child['src'].gsub(/thumbs/, 'img').gsub(/thumbnails\//, 'images/').gsub(/thumbnail_/, '')
|
33
34
|
}
|
34
35
|
}
|
35
36
|
|
@@ -28,13 +28,14 @@ module Dumper
|
|
28
28
|
cdn = open(url).read.split('window.params.thumbs')[1].split('\/thumbs\/')[0].gsub(/\\\//m, ?/)[5..-1] + '/images/'
|
29
29
|
|
30
30
|
from.upto(to) { |i|
|
31
|
-
return if errors ==
|
31
|
+
return if errors == 3
|
32
32
|
|
33
33
|
file = "%03d.jpg" % i
|
34
34
|
filename = "#{cdn}#{file}"
|
35
35
|
|
36
36
|
@pool.process {
|
37
|
-
unless Dumper
|
37
|
+
unless Dumper.get path, URI.parse(URI.encode(filename, '[]')), { referer: url }
|
38
|
+
sleep 3
|
38
39
|
errors += 1
|
39
40
|
|
40
41
|
file = File.join(path, file).gsub(File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR)
|
data/lib/dumper/profiles/fc2.rb
CHANGED
@@ -27,7 +27,7 @@ module Dumper
|
|
27
27
|
|
28
28
|
Nokogiri::HTML(open(url)).xpath('//a[@target="_blank"]/@href')[from..to].each { |p|
|
29
29
|
@pool.process {
|
30
|
-
Dumper
|
30
|
+
Dumper.get(path, p) if p.to_s.end_with?('jpg') || p.to_s.end_with?('png')
|
31
31
|
}
|
32
32
|
}
|
33
33
|
end
|
@@ -25,11 +25,12 @@ module Dumper
|
|
25
25
|
page = 0
|
26
26
|
|
27
27
|
from.upto(to) { |i|
|
28
|
-
|
28
|
+
changed
|
29
|
+
notify_observers status: "--- Page #{i} ---"
|
29
30
|
|
30
31
|
Nokogiri::HTML(open("#{url}&pid=#{page}")).xpath('//span[@class="thumb"]').each { |u|
|
31
32
|
@pool.process {
|
32
|
-
Dumper
|
33
|
+
Dumper.get path, u.child.child['src'].gsub(/thumbnails/, 'images').gsub(/thumbnail_/, '')
|
33
34
|
}
|
34
35
|
}
|
35
36
|
|
@@ -33,7 +33,7 @@ module Dumper
|
|
33
33
|
|
34
34
|
page = Nokogiri::HTML open("http://www.mangaeden.com#{p['href']}")
|
35
35
|
|
36
|
-
Dumper
|
36
|
+
Dumper.get dir, page.at_xpath('//img[@id="mainImg"]/@src'), { filename: '1.png' }
|
37
37
|
i += 1
|
38
38
|
|
39
39
|
page.xpath('//a[@class="ui-state-default"]').each { |q|
|
@@ -41,7 +41,7 @@ module Dumper
|
|
41
41
|
|
42
42
|
Nokogiri::HTML(open("http://www.mangaeden.com#{q['href']}")).xpath('//img[@id="mainImg"]/@src').each { |r|
|
43
43
|
@pool.process {
|
44
|
-
Dumper
|
44
|
+
Dumper.get dir, r, { filename: "#{q.text.to_i}.png" }
|
45
45
|
}
|
46
46
|
}
|
47
47
|
}
|
@@ -31,7 +31,7 @@ module Dumper
|
|
31
31
|
url = page.at_xpath('//a[@id="pic_container"]/@href').to_s
|
32
32
|
scan = page.at_xpath('//img[@id="page1"]/@src').to_s[0..-3]
|
33
33
|
|
34
|
-
Dumper
|
34
|
+
Dumper.get path, scan, { referer: url, filename: "#{i}.#{scan.split(?.).last}" }
|
35
35
|
}
|
36
36
|
}
|
37
37
|
end
|
@@ -39,7 +39,7 @@ module Dumper
|
|
39
39
|
Nokogiri::HTML(open(chapter)).xpath('//select[@class="wid60"]/option').each { |q|
|
40
40
|
@pool.process {
|
41
41
|
scan = Nokogiri::HTML(open(q['value'])).xpath('//section[@id="viewer"]/a/img/@src')[0].to_s
|
42
|
-
Dumper
|
42
|
+
Dumper.get dir, scan, { filename: "#{q.text}.png" }
|
43
43
|
}
|
44
44
|
}
|
45
45
|
}
|
@@ -27,7 +27,7 @@ module Dumper
|
|
27
27
|
prefix = url.include?('idol.sankakucomplex') ? 'idol' : 'chan'
|
28
28
|
|
29
29
|
from.upto(to) { |page|
|
30
|
-
u = url
|
30
|
+
u = "#{url}&page=#{page}"
|
31
31
|
begin
|
32
32
|
op = open u
|
33
33
|
rescue Exception => e
|
@@ -37,13 +37,18 @@ module Dumper
|
|
37
37
|
|
38
38
|
Nokogiri::HTML(op).xpath('//a/@href').each { |p|
|
39
39
|
next unless p.to_s.start_with? '/post/show'
|
40
|
+
errors = 0
|
40
41
|
|
41
42
|
@pool.process {
|
42
43
|
begin
|
43
44
|
img = Nokogiri::HTML(open("http://#{prefix}.sankakucomplex.com/#{p}")).at_xpath('//a[@itemprop="contentUrl"]/@href').to_s
|
44
|
-
Dumper
|
45
|
+
Dumper.get path, img, { referer: u }
|
45
46
|
rescue Exception => e
|
46
|
-
|
47
|
+
if errors <= 3
|
48
|
+
sleep 3
|
49
|
+
errors += 1
|
50
|
+
retry
|
51
|
+
end
|
47
52
|
end
|
48
53
|
}
|
49
54
|
}
|
@@ -58,7 +63,7 @@ module Dumper
|
|
58
63
|
}
|
59
64
|
}[from..to].each { |p|
|
60
65
|
@pool.process {
|
61
|
-
Dumper
|
66
|
+
Dumper.get path, p, { referer: url }
|
62
67
|
}
|
63
68
|
}
|
64
69
|
end
|
data/lib/dumper/profiles/teca.rb
CHANGED
@@ -23,12 +23,13 @@ module Dumper
|
|
23
23
|
class YandeRe < Profile
|
24
24
|
def dump(url, path, from, to)
|
25
25
|
from.upto(to) { |i|
|
26
|
-
|
26
|
+
changed
|
27
|
+
notify_observers status: "--- Page #{i} ---"
|
27
28
|
|
28
|
-
Nokogiri::HTML(open("#{url}&page=#{i}", 'User-Agent' => Dumper::
|
29
|
+
Nokogiri::HTML(open("#{url}&page=#{i}", 'User-Agent' => Dumper::USER_AGENT, 'Referer' => url)).xpath('//a[@class="thumb"]/@href').each { |p|
|
29
30
|
@pool.process {
|
30
|
-
img = Nokogiri::HTML(open("https://yande.re#{p}", 'User-Agent' => Dumper::
|
31
|
-
Dumper
|
31
|
+
img = Nokogiri::HTML(open("https://yande.re#{p}", 'User-Agent' => Dumper::USER_AGENT, 'Referer' => url)).at_xpath('//img[@id="image"]/@src').text
|
32
|
+
Dumper.get path, img, { referer: url }
|
32
33
|
}
|
33
34
|
}
|
34
35
|
}
|
data/lib/dumper/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: image-dumper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.7'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Giovanni Capuano
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-01-
|
11
|
+
date: 2014-01-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -145,6 +145,8 @@ extensions: []
|
|
145
145
|
extra_rdoc_files: []
|
146
146
|
files:
|
147
147
|
- lib/dumper/dumper.rb
|
148
|
+
- lib/dumper/logger.rb
|
149
|
+
- lib/dumper/profile.rb
|
148
150
|
- lib/dumper/profiles/4chan.rb
|
149
151
|
- lib/dumper/profiles/behoimi.rb
|
150
152
|
- lib/dumper/profiles/booru.rb
|
@@ -161,6 +163,7 @@ files:
|
|
161
163
|
- lib/dumper/profiles/sankakucomplex.rb
|
162
164
|
- lib/dumper/profiles/teca.rb
|
163
165
|
- lib/dumper/profiles/yande.rb
|
166
|
+
- lib/dumper/profiles.rb
|
164
167
|
- lib/dumper/utils.rb
|
165
168
|
- lib/dumper/version.rb
|
166
169
|
- lib/dumper.rb
|