rypper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/bin/rypper ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- mode: ruby -*-
3
+
4
+ require 'rypper'
5
+
6
+ Rypper::CLI.main
@@ -0,0 +1,73 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'net/http'
4
+ require 'uri'
5
+
6
+ module Net
7
+ class HTTPClient
8
+ class << self
9
+ def storage
10
+ @storage ||= {}
11
+ end
12
+
13
+ def from_storage(host, port=80, renew=false)
14
+ if self.storage.has_key?(host) and not renew
15
+ connection = self.storage[host]
16
+ else
17
+ connection = self.new(host, port)
18
+ self.storage[host] = connection
19
+ end
20
+ return connection
21
+ end
22
+ end
23
+
24
+ attr_reader :host, :port, :timeout
25
+ attr_writer :keep_alive
26
+ attr_accessor :user_agent
27
+
28
+ def initialize(host, port=80, timeout=15)
29
+ @host = host.to_s.strip
30
+ @port = port.to_i
31
+ @keep_alive = true
32
+ @timeout = timeout.to_i
33
+ @user_agent = "Net::HTTPClient/0.1 (Ruby #{RUBY_VERSION})"
34
+ @http = Net::HTTP.new(self.host, self.port)
35
+ @http.read_timeout = self.timeout
36
+ @http.open_timeout = self.timeout
37
+ end
38
+
39
+ def keep_alive?
40
+ @keep_alive ? true : false
41
+ end
42
+
43
+ def get(path, header={}, options={})
44
+ uri = path.is_a?(URI) ? path : self.to_uri(path)
45
+ header['Accept'] ||= '*/*'
46
+ header['Connection'] ||= (self.keep_alive? ? 'Keep-Alive' : 'Close')
47
+ header['Referer'] = uri.to_s if options[:referer_self]
48
+ header['User-Agent'] ||= self.user_agent
49
+ @http.start unless @http.started?
50
+ response = nil
51
+ begin
52
+ response = @http.request_get(uri.path, header)
53
+ rescue EOFError
54
+ @http = Net::HTTP.new(self.host, self.port)
55
+ @http.start
56
+ response = @http.request_get(uri.path, header)
57
+ end
58
+ if response.is_a?(Net::HTTPRedirection) && options[:follow_redirects]
59
+ uri_redirect = URI.parse(response['Location'])
60
+ header_redirect = options[:follow_with_header] ? header : options[:follow_header] || {}
61
+ options_redirect = options[:follow_with_options] ? options : options[:follow_options] || {}
62
+ options_redirect['Referer'] ||= uri.to_s
63
+ connection = self.class.from_storage(uri_redirect.host, uri_redirect.port)
64
+ response = connection.get(uri_redirect, header_redirect, options_redirect)
65
+ end
66
+ return response
67
+ end
68
+
69
+ def to_uri(path=nil)
70
+ return URI::HTTP.build({:host => self.host, :port => self.port, :path => path})
71
+ end
72
+ end
73
+ end
data/lib/rypper.rb ADDED
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'net/http_client'
4
+
5
+ require 'getoptlong'
6
+ require 'uri'
7
+
8
+ require 'rubygems'
9
+ require 'nokogiri'
10
+
11
+ require 'rypper/cli'
12
+ require 'rypper/counter'
13
+ require 'rypper/extractor'
14
+ require 'rypper/loader'
15
+ require 'rypper/uri'
16
+
17
+ if File.basename($0) == __FILE__
18
+ Rypper::CLI.main()
19
+ end
data/lib/rypper/cli.rb ADDED
@@ -0,0 +1,77 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module Rypper
4
+ class CLI
5
+ OPTS = [
6
+ ['--help', '-h', GetoptLong::NO_ARGUMENT],
7
+ ['--output', '-o', GetoptLong::REQUIRED_ARGUMENT],
8
+ ['--overwrite', '-w', GetoptLong::NO_ARGUMENT],
9
+ ]
10
+
11
+ def self.getopt()
12
+ opts = {}
13
+ GetoptLong.new(*OPTS).each do |opt, arg|
14
+ opt_sym = opt.sub('--', '').to_sym
15
+ opt_type = OPTS.find {|e| e.first == opt}.last
16
+ if opt_type == GetoptLong::NO_ARGUMENT
17
+ opts[opt_sym] = true
18
+ else
19
+ opts[opt_sym] = arg
20
+ end
21
+ end
22
+ opts
23
+ end
24
+
25
+ def self.main()
26
+ opts = self.getopt()
27
+ argv = ARGV
28
+ if argv.count != 2
29
+ puts "USAGE: ruby #{$0} <uri> <selector>"
30
+ exit 1
31
+ end
32
+ uri = Rypper::URI.new(argv[0]) # 'http://www.mangafox.com/manga/history_s_strongest_disciple_kenichi/v[01-45:vol]/c[001-459:chap]/[1-99:pic].html'
33
+ uri.parse!
34
+ uri.first!
35
+
36
+ extractor = nil
37
+ extractor = Rypper::Extractor.new(argv[1]) # '#image'
38
+ counter = uri.counter[uri.order.last]
39
+
40
+ puts "Processing #{uri.uri} ..."
41
+ while true
42
+ html_uri = uri.to_uri
43
+ puts " * #{html_uri} ..."
44
+ html = Rypper::Loader.get(html_uri)
45
+ if html.is_a?(String)
46
+ extractor.extract!(html).each do |image_uri|
47
+ if image_uri.is_a?(String)
48
+ print " * #{image_uri} ..."
49
+ image_path = uri.to_path(File.extname(image_uri))
50
+ if !File.exists?(image_path) || opts.has_key?(:overwrite)
51
+ Rypper::Loader.mkdir!(File.dirname(image_path))
52
+ image_file = File.open(image_path, 'w')
53
+ image_file.binmode
54
+ image_file.write(Rypper::Loader.get(image_uri))
55
+ image_file.close
56
+ puts ' OK'
57
+ else
58
+ puts ' Exists: Skipping'
59
+ end
60
+ else
61
+ puts ' * Imageless'
62
+ end
63
+ end
64
+ else
65
+ counter.last!
66
+ puts ' * Last'
67
+ end
68
+ uri.next!
69
+ break if uri.first?
70
+ end
71
+
72
+ puts 'OK'
73
+
74
+ exit 0
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,60 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module Rypper
4
+ class Counter
5
+ attr_reader :match
6
+ attr_reader :lower
7
+ attr_reader :upper
8
+ attr_reader :digits
9
+ attr_reader :state
10
+
11
+ def initialize(match, lower, upper, digits=1, state=nil)
12
+ @match = match
13
+ @lower = lower
14
+ @upper = upper
15
+ @digits = digits
16
+ @state = state || @lower
17
+ end
18
+
19
+ def first!
20
+ @state = @lower
21
+ self
22
+ end
23
+
24
+ def first?
25
+ @state == @lower
26
+ end
27
+
28
+ def prev!
29
+ if @state > @lower
30
+ @state -= 1
31
+ else
32
+ @state = @upper
33
+ end
34
+ self
35
+ end
36
+
37
+ def next!
38
+ if @state < @upper
39
+ @state += 1
40
+ else
41
+ @state = @lower
42
+ end
43
+ self
44
+ end
45
+
46
+ def last!
47
+ @state = @upper
48
+ self
49
+ end
50
+
51
+ def last?
52
+ @state == @upper
53
+ end
54
+
55
+ def to_s(digits=nil)
56
+ digits ||= self.digits
57
+ @state.to_s.rjust(digits, '0')
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module Rypper
4
+ class Extractor
5
+ attr_reader :selector
6
+
7
+ def initialize(selector)
8
+ @selector = selector
9
+ end
10
+
11
+ def extract!(html)
12
+ unless html.kind_of?(Nokogiri::HTML::Document)
13
+ html = Nokogiri::HTML(html)
14
+ end
15
+ res = []
16
+ elems = html.search(self.selector)
17
+ if elems.count == 1
18
+ elem = elems.first
19
+ if elem.name == 'img'
20
+ res << elem[:src]
21
+ elsif elem.name == 'a'
22
+ res << elem[:href]
23
+ else
24
+ res << elem
25
+ end
26
+ else
27
+ res = elems
28
+ end
29
+ res
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module Rypper
4
+ class Loader
5
+ def self.mkdir!(path)
6
+ path = path.to_s
7
+ parts = []
8
+ path.split(File::Separator).each do |part|
9
+ parts << part
10
+ sub_path = File.join(*parts)
11
+ Dir.mkdir(sub_path) unless File.directory?(sub_path)
12
+ end
13
+ File.directory?(path)
14
+ end
15
+
16
+ def self.get(uri)
17
+ unless uri.kind_of?(URI)
18
+ uri = ::URI.parse(uri.to_s)
19
+ end
20
+ client = Net::HTTPClient.from_storage(uri.host)
21
+ response = client.get(uri)
22
+ if response.code.to_i == 200
23
+ response.body
24
+ else
25
+ response.code.to_i
26
+ end
27
+ end
28
+ end
29
+ end
data/lib/rypper/uri.rb ADDED
@@ -0,0 +1,99 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module Rypper
4
+ class URI
5
+ REGEXP_COUNTER = /\[(\d+)\-(\d+)\:(\w+)\]/
6
+ REGEXP_NAME = /\[\:(\w+)\]/
7
+
8
+ attr_accessor :uri
9
+ attr_reader :counter
10
+ attr_reader :order
11
+
12
+ def initialize(uri)
13
+ self.uri = uri.to_s
14
+ end
15
+
16
+ def parse!
17
+ @counter = {}
18
+ @order = []
19
+ self.uri.scan(REGEXP_COUNTER) do
20
+ match = $~
21
+ lower = match[1].to_i
22
+ upper = match[2].to_i
23
+ digits = match[1].start_with?('0') ? match[1].length : 1
24
+ name = match[3].intern
25
+ self.counter[name] = Counter.new(match.to_s, lower, upper, digits)
26
+ self.order << name
27
+ end
28
+ self
29
+ end
30
+
31
+ def first!
32
+ self.counter.each_value(&:first!)
33
+ self
34
+ end
35
+
36
+ def first?
37
+ self.counter.values.all?(&:first?)
38
+ end
39
+
40
+ def prev!
41
+ self.order.reverse.each do |name|
42
+ cntr = self.counter[name]
43
+ if cntr.prev!.state != cntr.upper
44
+ break
45
+ end
46
+ end
47
+ self
48
+ end
49
+
50
+ def next!
51
+ self.order.reverse.each do |name|
52
+ cntr = self.counter[name]
53
+ if cntr.next!.state != cntr.lower
54
+ break
55
+ end
56
+ end
57
+ self
58
+ end
59
+
60
+ def last!
61
+ self.counter.each_value(&:last!)
62
+ self
63
+ end
64
+
65
+ def last?
66
+ self.counter.values.all?(&:last?)
67
+ end
68
+
69
+ def to_s
70
+ s = self.uri.dup
71
+ self.counter.each do |name, counter|
72
+ value = counter.to_s
73
+ s.gsub!(counter.match, value)
74
+ s.gsub!(":[#{name}]", value)
75
+ end
76
+ s
77
+ end
78
+
79
+ def to_uri
80
+ ::URI.parse(self.to_s)
81
+ end
82
+
83
+ def to_path(extension=nil, cntr_sep=nil, path_sep=nil, cdigits=nil)
84
+ cntr_sep ||= '_'
85
+ path_sep ||= File::Separator
86
+ p = []
87
+ self.order.each do |name|
88
+ cnt = self.counter[name]
89
+ p << name.to_s
90
+ p << cntr_sep
91
+ p << cnt.to_s(cdigits)
92
+ p << path_sep
93
+ end
94
+ p.pop
95
+ p << extension unless extension.nil?
96
+ p.join()
97
+ end
98
+ end
99
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rypper
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Michael Nowak
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2012-01-15 00:00:00 +01:00
18
+ default_executable: rypper
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: nokogiri
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 5
30
+ version: "1.5"
31
+ type: :runtime
32
+ version_requirements: *id001
33
+ description: Rypper
34
+ email: thexsystem@gmail.com
35
+ executables:
36
+ - rypper
37
+ extensions: []
38
+
39
+ extra_rdoc_files: []
40
+
41
+ files:
42
+ - lib/net/http_client.rb
43
+ - lib/rypper/cli.rb
44
+ - lib/rypper/counter.rb
45
+ - lib/rypper/extractor.rb
46
+ - lib/rypper/loader.rb
47
+ - lib/rypper/uri.rb
48
+ - lib/rypper.rb
49
+ has_rdoc: true
50
+ homepage: https://github.com/THExSYSTEM/rypper
51
+ licenses: []
52
+
53
+ post_install_message:
54
+ rdoc_options:
55
+ - --charset=UTF-8
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ segments:
63
+ - 1
64
+ - 8
65
+ - 7
66
+ version: 1.8.7
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ requirements: []
75
+
76
+ rubyforge_project:
77
+ rubygems_version: 1.3.6
78
+ signing_key:
79
+ specification_version: 3
80
+ summary: Rypper
81
+ test_files: []
82
+