rypper 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/rypper ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- mode: ruby -*-
3
+
4
+ require 'rypper'
5
+
6
+ Rypper::CLI.main
@@ -0,0 +1,73 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'net/http'
4
+ require 'uri'
5
+
6
+ module Net
7
+ class HTTPClient
8
+ class << self
9
+ def storage
10
+ @storage ||= {}
11
+ end
12
+
13
+ def from_storage(host, port=80, renew=false)
14
+ if self.storage.has_key?(host) and not renew
15
+ connection = self.storage[host]
16
+ else
17
+ connection = self.new(host, port)
18
+ self.storage[host] = connection
19
+ end
20
+ return connection
21
+ end
22
+ end
23
+
24
+ attr_reader :host, :port, :timeout
25
+ attr_writer :keep_alive
26
+ attr_accessor :user_agent
27
+
28
+ def initialize(host, port=80, timeout=15)
29
+ @host = host.to_s.strip
30
+ @port = port.to_i
31
+ @keep_alive = true
32
+ @timeout = timeout.to_i
33
+ @user_agent = "Net::HTTPClient/0.1 (Ruby #{RUBY_VERSION})"
34
+ @http = Net::HTTP.new(self.host, self.port)
35
+ @http.read_timeout = self.timeout
36
+ @http.open_timeout = self.timeout
37
+ end
38
+
39
+ def keep_alive?
40
+ @keep_alive ? true : false
41
+ end
42
+
43
+ def get(path, header={}, options={})
44
+ uri = path.is_a?(URI) ? path : self.to_uri(path)
45
+ header['Accept'] ||= '*/*'
46
+ header['Connection'] ||= (self.keep_alive? ? 'Keep-Alive' : 'Close')
47
+ header['Referer'] = uri.to_s if options[:referer_self]
48
+ header['User-Agent'] ||= self.user_agent
49
+ @http.start unless @http.started?
50
+ response = nil
51
+ begin
52
+ response = @http.request_get(uri.path, header)
53
+ rescue EOFError
54
+ @http = Net::HTTP.new(self.host, self.port)
55
+ @http.start
56
+ response = @http.request_get(uri.path, header)
57
+ end
58
+ if response.is_a?(Net::HTTPRedirection) && options[:follow_redirects]
59
+ uri_redirect = URI.parse(response['Location'])
60
+ header_redirect = options[:follow_with_header] ? header : options[:follow_header] || {}
61
+ options_redirect = options[:follow_with_options] ? options : options[:follow_options] || {}
62
+ options_redirect['Referer'] ||= uri.to_s
63
+ connection = self.class.from_storage(uri_redirect.host, uri_redirect.port)
64
+ response = connection.get(uri_redirect, header_redirect, options_redirect)
65
+ end
66
+ return response
67
+ end
68
+
69
+ def to_uri(path=nil)
70
+ return URI::HTTP.build({:host => self.host, :port => self.port, :path => path})
71
+ end
72
+ end
73
+ end
data/lib/rypper.rb ADDED
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'net/http_client'
4
+
5
+ require 'getoptlong'
6
+ require 'uri'
7
+
8
+ require 'rubygems'
9
+ require 'nokogiri'
10
+
11
+ require 'rypper/cli'
12
+ require 'rypper/counter'
13
+ require 'rypper/extractor'
14
+ require 'rypper/loader'
15
+ require 'rypper/uri'
16
+
17
+ if File.basename($0) == __FILE__
18
+ Rypper::CLI.main()
19
+ end
data/lib/rypper/cli.rb ADDED
@@ -0,0 +1,77 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module Rypper
4
+ class CLI
5
+ OPTS = [
6
+ ['--help', '-h', GetoptLong::NO_ARGUMENT],
7
+ ['--output', '-o', GetoptLong::REQUIRED_ARGUMENT],
8
+ ['--overwrite', '-w', GetoptLong::NO_ARGUMENT],
9
+ ]
10
+
11
+ def self.getopt()
12
+ opts = {}
13
+ GetoptLong.new(*OPTS).each do |opt, arg|
14
+ opt_sym = opt.sub('--', '').to_sym
15
+ opt_type = OPTS.find {|e| e.first == opt}.last
16
+ if opt_type == GetoptLong::NO_ARGUMENT
17
+ opts[opt_sym] = true
18
+ else
19
+ opts[opt_sym] = arg
20
+ end
21
+ end
22
+ opts
23
+ end
24
+
25
+ def self.main()
26
+ opts = self.getopt()
27
+ argv = ARGV
28
+ if argv.count != 2
29
+ puts "USAGE: ruby #{$0} <uri> <selector>"
30
+ exit 1
31
+ end
32
+ uri = Rypper::URI.new(argv[0]) # 'http://www.mangafox.com/manga/history_s_strongest_disciple_kenichi/v[01-45:vol]/c[001-459:chap]/[1-99:pic].html'
33
+ uri.parse!
34
+ uri.first!
35
+
36
+ extractor = nil
37
+ extractor = Rypper::Extractor.new(argv[1]) # '#image'
38
+ counter = uri.counter[uri.order.last]
39
+
40
+ puts "Processing #{uri.uri} ..."
41
+ while true
42
+ html_uri = uri.to_uri
43
+ puts " * #{html_uri} ..."
44
+ html = Rypper::Loader.get(html_uri)
45
+ if html.is_a?(String)
46
+ extractor.extract!(html).each do |image_uri|
47
+ if image_uri.is_a?(String)
48
+ print " * #{image_uri} ..."
49
+ image_path = uri.to_path(File.extname(image_uri))
50
+ if !File.exists?(image_path) || opts.has_key?(:overwrite)
51
+ Rypper::Loader.mkdir!(File.dirname(image_path))
52
+ image_file = File.open(image_path, 'w')
53
+ image_file.binmode
54
+ image_file.write(Rypper::Loader.get(image_uri))
55
+ image_file.close
56
+ puts ' OK'
57
+ else
58
+ puts ' Exists: Skipping'
59
+ end
60
+ else
61
+ puts ' * Imageless'
62
+ end
63
+ end
64
+ else
65
+ counter.last!
66
+ puts ' * Last'
67
+ end
68
+ uri.next!
69
+ break if uri.first?
70
+ end
71
+
72
+ puts 'OK'
73
+
74
+ exit 0
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,60 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module Rypper
4
+ class Counter
5
+ attr_reader :match
6
+ attr_reader :lower
7
+ attr_reader :upper
8
+ attr_reader :digits
9
+ attr_reader :state
10
+
11
+ def initialize(match, lower, upper, digits=1, state=nil)
12
+ @match = match
13
+ @lower = lower
14
+ @upper = upper
15
+ @digits = digits
16
+ @state = state || @lower
17
+ end
18
+
19
+ def first!
20
+ @state = @lower
21
+ self
22
+ end
23
+
24
+ def first?
25
+ @state == @lower
26
+ end
27
+
28
+ def prev!
29
+ if @state > @lower
30
+ @state -= 1
31
+ else
32
+ @state = @upper
33
+ end
34
+ self
35
+ end
36
+
37
+ def next!
38
+ if @state < @upper
39
+ @state += 1
40
+ else
41
+ @state = @lower
42
+ end
43
+ self
44
+ end
45
+
46
+ def last!
47
+ @state = @upper
48
+ self
49
+ end
50
+
51
+ def last?
52
+ @state == @upper
53
+ end
54
+
55
+ def to_s(digits=nil)
56
+ digits ||= self.digits
57
+ @state.to_s.rjust(digits, '0')
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module Rypper
4
+ class Extractor
5
+ attr_reader :selector
6
+
7
+ def initialize(selector)
8
+ @selector = selector
9
+ end
10
+
11
+ def extract!(html)
12
+ unless html.kind_of?(Nokogiri::HTML::Document)
13
+ html = Nokogiri::HTML(html)
14
+ end
15
+ res = []
16
+ elems = html.search(self.selector)
17
+ if elems.count == 1
18
+ elem = elems.first
19
+ if elem.name == 'img'
20
+ res << elem[:src]
21
+ elsif elem.name == 'a'
22
+ res << elem[:href]
23
+ else
24
+ res << elem
25
+ end
26
+ else
27
+ res = elems
28
+ end
29
+ res
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module Rypper
4
+ class Loader
5
+ def self.mkdir!(path)
6
+ path = path.to_s
7
+ parts = []
8
+ path.split(File::Separator).each do |part|
9
+ parts << part
10
+ sub_path = File.join(*parts)
11
+ Dir.mkdir(sub_path) unless File.directory?(sub_path)
12
+ end
13
+ File.directory?(path)
14
+ end
15
+
16
+ def self.get(uri)
17
+ unless uri.kind_of?(URI)
18
+ uri = ::URI.parse(uri.to_s)
19
+ end
20
+ client = Net::HTTPClient.from_storage(uri.host)
21
+ response = client.get(uri)
22
+ if response.code.to_i == 200
23
+ response.body
24
+ else
25
+ response.code.to_i
26
+ end
27
+ end
28
+ end
29
+ end
data/lib/rypper/uri.rb ADDED
@@ -0,0 +1,99 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module Rypper
4
+ class URI
5
+ REGEXP_COUNTER = /\[(\d+)\-(\d+)\:(\w+)\]/
6
+ REGEXP_NAME = /\[\:(\w+)\]/
7
+
8
+ attr_accessor :uri
9
+ attr_reader :counter
10
+ attr_reader :order
11
+
12
+ def initialize(uri)
13
+ self.uri = uri.to_s
14
+ end
15
+
16
+ def parse!
17
+ @counter = {}
18
+ @order = []
19
+ self.uri.scan(REGEXP_COUNTER) do
20
+ match = $~
21
+ lower = match[1].to_i
22
+ upper = match[2].to_i
23
+ digits = match[1].start_with?('0') ? match[1].length : 1
24
+ name = match[3].intern
25
+ self.counter[name] = Counter.new(match.to_s, lower, upper, digits)
26
+ self.order << name
27
+ end
28
+ self
29
+ end
30
+
31
+ def first!
32
+ self.counter.each_value(&:first!)
33
+ self
34
+ end
35
+
36
+ def first?
37
+ self.counter.values.all?(&:first?)
38
+ end
39
+
40
+ def prev!
41
+ self.order.reverse.each do |name|
42
+ cntr = self.counter[name]
43
+ if cntr.prev!.state != cntr.upper
44
+ break
45
+ end
46
+ end
47
+ self
48
+ end
49
+
50
+ def next!
51
+ self.order.reverse.each do |name|
52
+ cntr = self.counter[name]
53
+ if cntr.next!.state != cntr.lower
54
+ break
55
+ end
56
+ end
57
+ self
58
+ end
59
+
60
+ def last!
61
+ self.counter.each_value(&:last!)
62
+ self
63
+ end
64
+
65
+ def last?
66
+ self.counter.values.all?(&:last?)
67
+ end
68
+
69
+ def to_s
70
+ s = self.uri.dup
71
+ self.counter.each do |name, counter|
72
+ value = counter.to_s
73
+ s.gsub!(counter.match, value)
74
+ s.gsub!(":[#{name}]", value)
75
+ end
76
+ s
77
+ end
78
+
79
+ def to_uri
80
+ ::URI.parse(self.to_s)
81
+ end
82
+
83
+ def to_path(extension=nil, cntr_sep=nil, path_sep=nil, cdigits=nil)
84
+ cntr_sep ||= '_'
85
+ path_sep ||= File::Separator
86
+ p = []
87
+ self.order.each do |name|
88
+ cnt = self.counter[name]
89
+ p << name.to_s
90
+ p << cntr_sep
91
+ p << cnt.to_s(cdigits)
92
+ p << path_sep
93
+ end
94
+ p.pop
95
+ p << extension unless extension.nil?
96
+ p.join()
97
+ end
98
+ end
99
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rypper
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Michael Nowak
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2012-01-15 00:00:00 +01:00
18
+ default_executable: rypper
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: nokogiri
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 5
30
+ version: "1.5"
31
+ type: :runtime
32
+ version_requirements: *id001
33
+ description: Rypper
34
+ email: thexsystem@gmail.com
35
+ executables:
36
+ - rypper
37
+ extensions: []
38
+
39
+ extra_rdoc_files: []
40
+
41
+ files:
42
+ - lib/net/http_client.rb
43
+ - lib/rypper/cli.rb
44
+ - lib/rypper/counter.rb
45
+ - lib/rypper/extractor.rb
46
+ - lib/rypper/loader.rb
47
+ - lib/rypper/uri.rb
48
+ - lib/rypper.rb
49
+ has_rdoc: true
50
+ homepage: https://github.com/THExSYSTEM/rypper
51
+ licenses: []
52
+
53
+ post_install_message:
54
+ rdoc_options:
55
+ - --charset=UTF-8
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ segments:
63
+ - 1
64
+ - 8
65
+ - 7
66
+ version: 1.8.7
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ requirements: []
75
+
76
+ rubyforge_project:
77
+ rubygems_version: 1.3.6
78
+ signing_key:
79
+ specification_version: 3
80
+ summary: Rypper
81
+ test_files: []
82
+