rypper 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/rypper +6 -0
- data/lib/net/http_client.rb +73 -0
- data/lib/rypper.rb +19 -0
- data/lib/rypper/cli.rb +77 -0
- data/lib/rypper/counter.rb +60 -0
- data/lib/rypper/extractor.rb +32 -0
- data/lib/rypper/loader.rb +29 -0
- data/lib/rypper/uri.rb +99 -0
- metadata +82 -0
data/bin/rypper
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'net/http'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
module Net
|
7
|
+
class HTTPClient
|
8
|
+
class << self
|
9
|
+
def storage
|
10
|
+
@storage ||= {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def from_storage(host, port=80, renew=false)
|
14
|
+
if self.storage.has_key?(host) and not renew
|
15
|
+
connection = self.storage[host]
|
16
|
+
else
|
17
|
+
connection = self.new(host, port)
|
18
|
+
self.storage[host] = connection
|
19
|
+
end
|
20
|
+
return connection
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_reader :host, :port, :timeout
|
25
|
+
attr_writer :keep_alive
|
26
|
+
attr_accessor :user_agent
|
27
|
+
|
28
|
+
def initialize(host, port=80, timeout=15)
|
29
|
+
@host = host.to_s.strip
|
30
|
+
@port = port.to_i
|
31
|
+
@keep_alive = true
|
32
|
+
@timeout = timeout.to_i
|
33
|
+
@user_agent = "Net::HTTPClient/0.1 (Ruby #{RUBY_VERSION})"
|
34
|
+
@http = Net::HTTP.new(self.host, self.port)
|
35
|
+
@http.read_timeout = self.timeout
|
36
|
+
@http.open_timeout = self.timeout
|
37
|
+
end
|
38
|
+
|
39
|
+
def keep_alive?
|
40
|
+
@keep_alive ? true : false
|
41
|
+
end
|
42
|
+
|
43
|
+
def get(path, header={}, options={})
|
44
|
+
uri = path.is_a?(URI) ? path : self.to_uri(path)
|
45
|
+
header['Accept'] ||= '*/*'
|
46
|
+
header['Connection'] ||= (self.keep_alive? ? 'Keep-Alive' : 'Close')
|
47
|
+
header['Referer'] = uri.to_s if options[:referer_self]
|
48
|
+
header['User-Agent'] ||= self.user_agent
|
49
|
+
@http.start unless @http.started?
|
50
|
+
response = nil
|
51
|
+
begin
|
52
|
+
response = @http.request_get(uri.path, header)
|
53
|
+
rescue EOFError
|
54
|
+
@http = Net::HTTP.new(self.host, self.port)
|
55
|
+
@http.start
|
56
|
+
response = @http.request_get(uri.path, header)
|
57
|
+
end
|
58
|
+
if response.is_a?(Net::HTTPRedirection) && options[:follow_redirects]
|
59
|
+
uri_redirect = URI.parse(response['Location'])
|
60
|
+
header_redirect = options[:follow_with_header] ? header : options[:follow_header] || {}
|
61
|
+
options_redirect = options[:follow_with_options] ? options : options[:follow_options] || {}
|
62
|
+
options_redirect['Referer'] ||= uri.to_s
|
63
|
+
connection = self.class.from_storage(uri_redirect.host, uri_redirect.port)
|
64
|
+
response = connection.get(uri_redirect, header_redirect, options_redirect)
|
65
|
+
end
|
66
|
+
return response
|
67
|
+
end
|
68
|
+
|
69
|
+
def to_uri(path=nil)
|
70
|
+
return URI::HTTP.build({:host => self.host, :port => self.port, :path => path})
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/lib/rypper.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'net/http_client'
|
4
|
+
|
5
|
+
require 'getoptlong'
|
6
|
+
require 'uri'
|
7
|
+
|
8
|
+
require 'rubygems'
|
9
|
+
require 'nokogiri'
|
10
|
+
|
11
|
+
require 'rypper/cli'
|
12
|
+
require 'rypper/counter'
|
13
|
+
require 'rypper/extractor'
|
14
|
+
require 'rypper/loader'
|
15
|
+
require 'rypper/uri'
|
16
|
+
|
17
|
+
if File.basename($0) == __FILE__
|
18
|
+
Rypper::CLI.main()
|
19
|
+
end
|
data/lib/rypper/cli.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
module Rypper
|
4
|
+
class CLI
|
5
|
+
OPTS = [
|
6
|
+
['--help', '-h', GetoptLong::NO_ARGUMENT],
|
7
|
+
['--output', '-o', GetoptLong::REQUIRED_ARGUMENT],
|
8
|
+
['--overwrite', '-w', GetoptLong::NO_ARGUMENT],
|
9
|
+
]
|
10
|
+
|
11
|
+
def self.getopt()
|
12
|
+
opts = {}
|
13
|
+
GetoptLong.new(*OPTS).each do |opt, arg|
|
14
|
+
opt_sym = opt.sub('--', '').to_sym
|
15
|
+
opt_type = OPTS.find {|e| e.first == opt}.last
|
16
|
+
if opt_type == GetoptLong::NO_ARGUMENT
|
17
|
+
opts[opt_sym] = true
|
18
|
+
else
|
19
|
+
opts[opt_sym] = arg
|
20
|
+
end
|
21
|
+
end
|
22
|
+
opts
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.main()
|
26
|
+
opts = self.getopt()
|
27
|
+
argv = ARGV
|
28
|
+
if argv.count != 2
|
29
|
+
puts "USAGE: ruby #{$0} <uri> <selector>"
|
30
|
+
exit 1
|
31
|
+
end
|
32
|
+
uri = Rypper::URI.new(argv[0]) # 'http://www.mangafox.com/manga/history_s_strongest_disciple_kenichi/v[01-45:vol]/c[001-459:chap]/[1-99:pic].html'
|
33
|
+
uri.parse!
|
34
|
+
uri.first!
|
35
|
+
|
36
|
+
extractor = nil
|
37
|
+
extractor = Rypper::Extractor.new(argv[1]) # '#image'
|
38
|
+
counter = uri.counter[uri.order.last]
|
39
|
+
|
40
|
+
puts "Processing #{uri.uri} ..."
|
41
|
+
while true
|
42
|
+
html_uri = uri.to_uri
|
43
|
+
puts " * #{html_uri} ..."
|
44
|
+
html = Rypper::Loader.get(html_uri)
|
45
|
+
if html.is_a?(String)
|
46
|
+
extractor.extract!(html).each do |image_uri|
|
47
|
+
if image_uri.is_a?(String)
|
48
|
+
print " * #{image_uri} ..."
|
49
|
+
image_path = uri.to_path(File.extname(image_uri))
|
50
|
+
if !File.exists?(image_path) || opts.has_key?(:overwrite)
|
51
|
+
Rypper::Loader.mkdir!(File.dirname(image_path))
|
52
|
+
image_file = File.open(image_path, 'w')
|
53
|
+
image_file.binmode
|
54
|
+
image_file.write(Rypper::Loader.get(image_uri))
|
55
|
+
image_file.close
|
56
|
+
puts ' OK'
|
57
|
+
else
|
58
|
+
puts ' Exists: Skipping'
|
59
|
+
end
|
60
|
+
else
|
61
|
+
puts ' * Imageless'
|
62
|
+
end
|
63
|
+
end
|
64
|
+
else
|
65
|
+
counter.last!
|
66
|
+
puts ' * Last'
|
67
|
+
end
|
68
|
+
uri.next!
|
69
|
+
break if uri.first?
|
70
|
+
end
|
71
|
+
|
72
|
+
puts 'OK'
|
73
|
+
|
74
|
+
exit 0
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
module Rypper
|
4
|
+
class Counter
|
5
|
+
attr_reader :match
|
6
|
+
attr_reader :lower
|
7
|
+
attr_reader :upper
|
8
|
+
attr_reader :digits
|
9
|
+
attr_reader :state
|
10
|
+
|
11
|
+
def initialize(match, lower, upper, digits=1, state=nil)
|
12
|
+
@match = match
|
13
|
+
@lower = lower
|
14
|
+
@upper = upper
|
15
|
+
@digits = digits
|
16
|
+
@state = state || @lower
|
17
|
+
end
|
18
|
+
|
19
|
+
def first!
|
20
|
+
@state = @lower
|
21
|
+
self
|
22
|
+
end
|
23
|
+
|
24
|
+
def first?
|
25
|
+
@state == @lower
|
26
|
+
end
|
27
|
+
|
28
|
+
def prev!
|
29
|
+
if @state > @lower
|
30
|
+
@state -= 1
|
31
|
+
else
|
32
|
+
@state = @upper
|
33
|
+
end
|
34
|
+
self
|
35
|
+
end
|
36
|
+
|
37
|
+
def next!
|
38
|
+
if @state < @upper
|
39
|
+
@state += 1
|
40
|
+
else
|
41
|
+
@state = @lower
|
42
|
+
end
|
43
|
+
self
|
44
|
+
end
|
45
|
+
|
46
|
+
def last!
|
47
|
+
@state = @upper
|
48
|
+
self
|
49
|
+
end
|
50
|
+
|
51
|
+
def last?
|
52
|
+
@state == @upper
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_s(digits=nil)
|
56
|
+
digits ||= self.digits
|
57
|
+
@state.to_s.rjust(digits, '0')
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
module Rypper
|
4
|
+
class Extractor
|
5
|
+
attr_reader :selector
|
6
|
+
|
7
|
+
def initialize(selector)
|
8
|
+
@selector = selector
|
9
|
+
end
|
10
|
+
|
11
|
+
def extract!(html)
|
12
|
+
unless html.kind_of?(Nokogiri::HTML::Document)
|
13
|
+
html = Nokogiri::HTML(html)
|
14
|
+
end
|
15
|
+
res = []
|
16
|
+
elems = html.search(self.selector)
|
17
|
+
if elems.count == 1
|
18
|
+
elem = elems.first
|
19
|
+
if elem.name == 'img'
|
20
|
+
res << elem[:src]
|
21
|
+
elsif elem.name == 'a'
|
22
|
+
res << elem[:href]
|
23
|
+
else
|
24
|
+
res << elem
|
25
|
+
end
|
26
|
+
else
|
27
|
+
res = elems
|
28
|
+
end
|
29
|
+
res
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
module Rypper
|
4
|
+
class Loader
|
5
|
+
def self.mkdir!(path)
|
6
|
+
path = path.to_s
|
7
|
+
parts = []
|
8
|
+
path.split(File::Separator).each do |part|
|
9
|
+
parts << part
|
10
|
+
sub_path = File.join(*parts)
|
11
|
+
Dir.mkdir(sub_path) unless File.directory?(sub_path)
|
12
|
+
end
|
13
|
+
File.directory?(path)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.get(uri)
|
17
|
+
unless uri.kind_of?(URI)
|
18
|
+
uri = ::URI.parse(uri.to_s)
|
19
|
+
end
|
20
|
+
client = Net::HTTPClient.from_storage(uri.host)
|
21
|
+
response = client.get(uri)
|
22
|
+
if response.code.to_i == 200
|
23
|
+
response.body
|
24
|
+
else
|
25
|
+
response.code.to_i
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/rypper/uri.rb
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
module Rypper
|
4
|
+
class URI
|
5
|
+
REGEXP_COUNTER = /\[(\d+)\-(\d+)\:(\w+)\]/
|
6
|
+
REGEXP_NAME = /\[\:(\w+)\]/
|
7
|
+
|
8
|
+
attr_accessor :uri
|
9
|
+
attr_reader :counter
|
10
|
+
attr_reader :order
|
11
|
+
|
12
|
+
def initialize(uri)
|
13
|
+
self.uri = uri.to_s
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse!
|
17
|
+
@counter = {}
|
18
|
+
@order = []
|
19
|
+
self.uri.scan(REGEXP_COUNTER) do
|
20
|
+
match = $~
|
21
|
+
lower = match[1].to_i
|
22
|
+
upper = match[2].to_i
|
23
|
+
digits = match[1].start_with?('0') ? match[1].length : 1
|
24
|
+
name = match[3].intern
|
25
|
+
self.counter[name] = Counter.new(match.to_s, lower, upper, digits)
|
26
|
+
self.order << name
|
27
|
+
end
|
28
|
+
self
|
29
|
+
end
|
30
|
+
|
31
|
+
def first!
|
32
|
+
self.counter.each_value(&:first!)
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
def first?
|
37
|
+
self.counter.values.all?(&:first?)
|
38
|
+
end
|
39
|
+
|
40
|
+
def prev!
|
41
|
+
self.order.reverse.each do |name|
|
42
|
+
cntr = self.counter[name]
|
43
|
+
if cntr.prev!.state != cntr.upper
|
44
|
+
break
|
45
|
+
end
|
46
|
+
end
|
47
|
+
self
|
48
|
+
end
|
49
|
+
|
50
|
+
def next!
|
51
|
+
self.order.reverse.each do |name|
|
52
|
+
cntr = self.counter[name]
|
53
|
+
if cntr.next!.state != cntr.lower
|
54
|
+
break
|
55
|
+
end
|
56
|
+
end
|
57
|
+
self
|
58
|
+
end
|
59
|
+
|
60
|
+
def last!
|
61
|
+
self.counter.each_value(&:last!)
|
62
|
+
self
|
63
|
+
end
|
64
|
+
|
65
|
+
def last?
|
66
|
+
self.counter.values.all?(&:last?)
|
67
|
+
end
|
68
|
+
|
69
|
+
def to_s
|
70
|
+
s = self.uri.dup
|
71
|
+
self.counter.each do |name, counter|
|
72
|
+
value = counter.to_s
|
73
|
+
s.gsub!(counter.match, value)
|
74
|
+
s.gsub!(":[#{name}]", value)
|
75
|
+
end
|
76
|
+
s
|
77
|
+
end
|
78
|
+
|
79
|
+
def to_uri
|
80
|
+
::URI.parse(self.to_s)
|
81
|
+
end
|
82
|
+
|
83
|
+
def to_path(extension=nil, cntr_sep=nil, path_sep=nil, cdigits=nil)
|
84
|
+
cntr_sep ||= '_'
|
85
|
+
path_sep ||= File::Separator
|
86
|
+
p = []
|
87
|
+
self.order.each do |name|
|
88
|
+
cnt = self.counter[name]
|
89
|
+
p << name.to_s
|
90
|
+
p << cntr_sep
|
91
|
+
p << cnt.to_s(cdigits)
|
92
|
+
p << path_sep
|
93
|
+
end
|
94
|
+
p.pop
|
95
|
+
p << extension unless extension.nil?
|
96
|
+
p.join()
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
metadata
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rypper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Michael Nowak
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2012-01-15 00:00:00 +01:00
|
18
|
+
default_executable: rypper
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: nokogiri
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ~>
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 1
|
29
|
+
- 5
|
30
|
+
version: "1.5"
|
31
|
+
type: :runtime
|
32
|
+
version_requirements: *id001
|
33
|
+
description: Rypper
|
34
|
+
email: thexsystem@gmail.com
|
35
|
+
executables:
|
36
|
+
- rypper
|
37
|
+
extensions: []
|
38
|
+
|
39
|
+
extra_rdoc_files: []
|
40
|
+
|
41
|
+
files:
|
42
|
+
- lib/net/http_client.rb
|
43
|
+
- lib/rypper/cli.rb
|
44
|
+
- lib/rypper/counter.rb
|
45
|
+
- lib/rypper/extractor.rb
|
46
|
+
- lib/rypper/loader.rb
|
47
|
+
- lib/rypper/uri.rb
|
48
|
+
- lib/rypper.rb
|
49
|
+
has_rdoc: true
|
50
|
+
homepage: https://github.com/THExSYSTEM/rypper
|
51
|
+
licenses: []
|
52
|
+
|
53
|
+
post_install_message:
|
54
|
+
rdoc_options:
|
55
|
+
- --charset=UTF-8
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
segments:
|
63
|
+
- 1
|
64
|
+
- 8
|
65
|
+
- 7
|
66
|
+
version: 1.8.7
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
requirements: []
|
75
|
+
|
76
|
+
rubyforge_project:
|
77
|
+
rubygems_version: 1.3.6
|
78
|
+
signing_key:
|
79
|
+
specification_version: 3
|
80
|
+
summary: Rypper
|
81
|
+
test_files: []
|
82
|
+
|