spidr_epg_gem 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,79 @@
1
+ module Spidr
2
+ #
3
+ # The {Rules} class represents collections of acceptance and rejection
4
+ # rules, which are used to filter data.
5
+ #
6
+ class Rules
7
+
8
+ # Accept rules
9
+ attr_reader :accept
10
+
11
+ # Reject rules
12
+ attr_reader :reject
13
+
14
+ #
15
+ # Creates a new Rules object.
16
+ #
17
+ # @param [Hash] options
18
+ # Additional options.
19
+ #
20
+ # @option options [Array<String, Regexp, Proc>] :accept
21
+ # The patterns to accept data with.
22
+ #
23
+ # @option options [Array<String, Regexp, Proc>] :reject
24
+ # The patterns to reject data with.
25
+ #
26
+ def initialize(options={})
27
+ @accept = []
28
+ @reject = []
29
+
30
+ @accept += options[:accept] if options[:accept]
31
+ @reject += options[:reject] if options[:reject]
32
+ end
33
+
34
+ #
35
+ # Determines whether the data should be accepted or rejected.
36
+ #
37
+ # @return [Boolean]
38
+ # Specifies whether the given data was accepted, using the rules
39
+ # acceptance patterns.
40
+ #
41
+ def accept?(data)
42
+ unless @accept.empty?
43
+ @accept.any? { |rule| test_data(data,rule) }
44
+ else
45
+ !@reject.any? { |rule| test_data(data,rule) }
46
+ end
47
+ end
48
+
49
+ #
50
+ # Determines whether the data should be rejected or accepted.
51
+ #
52
+ # @return [Boolean]
53
+ # Specifies whether the given data was rejected, using the rules
54
+ # rejection patterns.
55
+ #
56
+ def reject?(data)
57
+ !accept?(data)
58
+ end
59
+
60
+ protected
61
+
62
+ #
63
+ # Tests the given data against a given pattern.
64
+ #
65
+ # @return [Boolean]
66
+ # Specifies whether the given data matched the pattern.
67
+ #
68
+ def test_data(data,rule)
69
+ if rule.kind_of?(Proc)
70
+ rule.call(data) == true
71
+ elsif rule.kind_of?(Regexp)
72
+ !((data.to_s =~ rule).nil?)
73
+ else
74
+ data == rule
75
+ end
76
+ end
77
+
78
+ end
79
+ end
@@ -0,0 +1,56 @@
1
+ require 'uri'
2
+
3
+ module Spidr
4
+ #
5
+ # The {Sanitizers} module adds methods to {Agent} which control the
6
+ # sanitation of incoming links.
7
+ #
8
+ module Sanitizers
9
+ # Specifies whether the Agent will strip URI fragments
10
+ attr_accessor :strip_fragments
11
+
12
+ # Specifies whether the Agent will strip URI queries
13
+ attr_accessor :strip_query
14
+
15
+ #
16
+ # Sanitizes a URL based on filtering options.
17
+ #
18
+ # @param [URI::HTTP, URI::HTTPS, String] url
19
+ # The URL to be sanitized
20
+ #
21
+ # @return [URI::HTTP, URI::HTTPS]
22
+ # The new sanitized URL.
23
+ #
24
+ # @since 0.2.2
25
+ #
26
+ def sanitize_url(url)
27
+ url = URI(url.to_s) unless url.kind_of?(URI)
28
+
29
+ url.fragment = nil if @strip_fragments
30
+ url.query = nil if @strip_query
31
+
32
+ return url
33
+ end
34
+
35
+ protected
36
+
37
+ #
38
+ # Initializes the Sanitizer rules.
39
+ #
40
+ # @param [Hash] options
41
+ # Additional options.
42
+ #
43
+ # @option options [Boolean] :strip_fragments (true)
44
+ # Specifies whether or not to strip the fragment component from URLs.
45
+ #
46
+ # @option options [Boolean] :strip_query (false)
47
+ # Specifies whether or not to strip the query component from URLs.
48
+ #
49
+ # @since 0.2.2
50
+ #
51
+ def initialize_sanitizers(options={})
52
+ @strip_fragments = options.fetch(:strip_fragments,true)
53
+ @strip_query = options.fetch(:strip_query,false)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,145 @@
1
+ require 'spidr/spidr'
2
+
3
+ require 'net/http'
4
+
5
+ module Spidr
6
+ #
7
+ # Stores active HTTP Sessions organized by scheme, host-name and port.
8
+ #
9
+ class SessionCache
10
+
11
+ # Proxy to use
12
+ attr_accessor :proxy
13
+
14
+ #
15
+ # Creates a new session cache.
16
+ #
17
+ # @param [Hash] proxy (Spidr.proxy)
18
+ # Proxy options.
19
+ #
20
+ # @option proxy [String] :host
21
+ # The host the proxy is running on.
22
+ #
23
+ # @option proxy [Integer] :port
24
+ # The port the proxy is running on.
25
+ #
26
+ # @option proxy [String] :user
27
+ # The user to authenticate as with the proxy.
28
+ #
29
+ # @option proxy [String] :password
30
+ # The password to authenticate with.
31
+ #
32
+ # @since 0.2.2
33
+ #
34
+ def initialize(proxy=Spidr.proxy)
35
+ @proxy = proxy
36
+ @sessions = {}
37
+ end
38
+
39
+ #
40
+ # Determines if there is an active HTTP session for a given URL.
41
+ #
42
+ # @param [URI::HTTP, String] url
43
+ # The URL that represents a session.
44
+ #
45
+ # @return [Boolean]
46
+ # Specifies whether there is an active HTTP session.
47
+ #
48
+ # @since 0.2.3
49
+ #
50
+ def active?(url)
51
+ # normalize the url
52
+ url = URI(url.to_s) unless url.kind_of?(URI)
53
+
54
+ # session key
55
+ key = [url.scheme, url.host, url.port]
56
+
57
+ return @sessions.has_key?(key)
58
+ end
59
+
60
+ #
61
+ # Provides an active HTTP session for a given URL.
62
+ #
63
+ # @param [URI::HTTP, String] url
64
+ # The URL which will be requested later.
65
+ #
66
+ # @return [Net::HTTP]
67
+ # The active HTTP session object.
68
+ #
69
+ def [](url)
70
+ # normalize the url
71
+ url = URI(url.to_s) unless url.kind_of?(URI)
72
+
73
+ # session key
74
+ key = [url.scheme, url.host, url.port]
75
+
76
+ unless @sessions[key]
77
+ session = Net::HTTP::Proxy(
78
+ @proxy[:host],
79
+ @proxy[:port],
80
+ @proxy[:user],
81
+ @proxy[:password]
82
+ ).new(url.host,url.port)
83
+
84
+ if url.scheme == 'https'
85
+ session.use_ssl = true
86
+ session.verify_mode = OpenSSL::SSL::VERIFY_NONE
87
+ session.start
88
+ end
89
+
90
+ @sessions[key] = session
91
+ end
92
+
93
+ return @sessions[key]
94
+ end
95
+
96
+ #
97
+ # Destroys an HTTP session for the given scheme, host and port.
98
+ #
99
+ # @param [URI::HTTP, String] url
100
+ # The URL of the requested session.
101
+ #
102
+ # @return [nil]
103
+ #
104
+ # @since 0.2.2
105
+ #
106
+ def kill!(url)
107
+ # normalize the url
108
+ url = URI(url.to_s) unless url.kind_of?(URI)
109
+
110
+ # session key
111
+ key = [url.scheme, url.host, url.port]
112
+
113
+ if (sess = @sessions[key])
114
+ begin
115
+ sess.finish
116
+ rescue IOError
117
+ end
118
+
119
+ @sessions.delete(key)
120
+ end
121
+ end
122
+
123
+ #
124
+ # Clears the session cache.
125
+ #
126
+ # @return [SessionCache]
127
+ # The cleared session cache.
128
+ #
129
+ # @since 0.2.2
130
+ #
131
+ def clear
132
+ @sessions.each_value do |sess|
133
+ begin
134
+ sess.finish
135
+ rescue IOError
136
+ nil
137
+ end
138
+ end
139
+
140
+ @sessions.clear
141
+ return self
142
+ end
143
+
144
+ end
145
+ end
@@ -0,0 +1,98 @@
1
+ require 'spidr/agent'
2
+
3
+ module Spidr
4
+ # Common proxy port.
5
+ COMMON_PROXY_PORT = 8080
6
+
7
+ # Default proxy information.
8
+ DEFAULT_PROXY = {
9
+ :host => nil,
10
+ :port => COMMON_PROXY_PORT,
11
+ :user => nil,
12
+ :password => nil
13
+ }
14
+
15
+ #
16
+ # Proxy information used by all newly created Agent objects by default.
17
+ #
18
+ # @return [Hash]
19
+ # The Spidr proxy information.
20
+ #
21
+ def Spidr.proxy
22
+ @@spidr_proxy ||= DEFAULT_PROXY
23
+ end
24
+
25
+ #
26
+ # Sets the proxy information used by Agent objects.
27
+ #
28
+ # @param [Hash] new_proxy
29
+ # The new proxy information.
30
+ #
31
+ # @option new_proxy [String] :host
32
+ # The host-name of the proxy.
33
+ #
34
+ # @option new_proxy [Integer] :port (COMMON_PROXY_PORT)
35
+ # The port of the proxy.
36
+ #
37
+ # @option new_proxy [String] :user
38
+ # The user to authenticate with the proxy as.
39
+ #
40
+ # @option new_proxy [String] :password
41
+ # The password to authenticate with the proxy.
42
+ #
43
+ # @return [Hash]
44
+ # The new proxy information.
45
+ #
46
+ def Spidr.proxy=(new_proxy)
47
+ @@spidr_proxy = {:port => COMMON_PROXY_PORT}.merge(new_proxy)
48
+ end
49
+
50
+ #
51
+ # Disables the proxy settings used by all newly created Agent objects.
52
+ #
53
+ def Spidr.disable_proxy!
54
+ @@spidr_proxy = DEFAULT_PROXY
55
+ return true
56
+ end
57
+
58
+ #
59
+ # The User-Agent string used by all Agent objects by default.
60
+ #
61
+ # @return [String]
62
+ # The Spidr User-Agent string.
63
+ #
64
+ def Spidr.user_agent
65
+ @@spidr_user_agent ||= nil
66
+ end
67
+
68
+ #
69
+ # Sets the Spidr User-Agent string.
70
+ #
71
+ # @param [String] new_agent
72
+ # The new User-Agent string.
73
+ #
74
+ def Spidr.user_agent=(new_agent)
75
+ @@spidr_user_agent = new_agent
76
+ end
77
+
78
+ #
79
+ # @see Agent.start_at
80
+ #
81
+ def Spidr.start_at(url,options={},&block)
82
+ Agent.start_at(url,options,&block)
83
+ end
84
+
85
+ #
86
+ # @see Agent.host
87
+ #
88
+ def Spidr.host(name,options={},&block)
89
+ Agent.host(name,options,&block)
90
+ end
91
+
92
+ #
93
+ # @see Agent.site
94
+ #
95
+ def Spidr.site(url,options={},&block)
96
+ Agent.site(url,options,&block)
97
+ end
98
+ end
@@ -0,0 +1,4 @@
1
+ module Spidr
2
+ # Spidr version
3
+ VERSION = '0.4.1'
4
+ end
data/lib/spidr_epg.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'spidr_epg/agent'
2
+ require 'spidr_epg/spidr'
3
+ require 'spidr_epg/version'
@@ -0,0 +1,3 @@
1
+ require 'spidr_epg_gem/agent'
2
+ require 'spidr_epg_gem/spidr'
3
+ require 'spidr_epg_gem/version'
@@ -0,0 +1,7 @@
1
+ module SpidrEpgGem
2
+ class WhoIs
3
+ def self.awesome?
4
+ puts "YOU ARE AWESOME!!"
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,17 @@
1
+ Gem::Specification.new do |s|
2
+ s.platform = Gem::Platform::RUBY
3
+ s.name = 'spidr_epg_gem'
4
+ s.version = '0.0.0'
5
+ s.date = %q{2013-04-15}
6
+ s.summary = 'Use for crwaling EPG'
7
+ s.description = 'Use for crwaling EPG'
8
+ s.required_ruby_version = '>= 1.9.3'
9
+
10
+ s.authors = ["zql"]
11
+ s.email = ''
12
+
13
+ s.files = `git ls-files`.split("\n")
14
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
15
+ s.require_path = 'lib'
16
+ s.requirements << 'none'
17
+ end
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: spidr_epg_gem
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - zql
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-04-15 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Use for crwaling EPG
14
+ email: ''
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/spidr.rb
20
+ - lib/spidr_epg.rb
21
+ - lib/spidr_epg/actions.rb
22
+ - lib/spidr_epg/actions/actions.rb
23
+ - lib/spidr_epg/actions/exceptions.rb
24
+ - lib/spidr_epg/actions/exceptions/action.rb
25
+ - lib/spidr_epg/actions/exceptions/paused.rb
26
+ - lib/spidr_epg/actions/exceptions/skip_link.rb
27
+ - lib/spidr_epg/actions/exceptions/skip_page.rb
28
+ - lib/spidr_epg/agent.rb
29
+ - lib/spidr_epg/auth_credential.rb
30
+ - lib/spidr_epg/auth_store.rb
31
+ - lib/spidr_epg/body.rb
32
+ - lib/spidr_epg/cookie_jar.rb
33
+ - lib/spidr_epg/events.rb
34
+ - lib/spidr_epg/extensions.rb
35
+ - lib/spidr_epg/extensions/uri.rb
36
+ - lib/spidr_epg/filters.rb
37
+ - lib/spidr_epg/headers.rb
38
+ - lib/spidr_epg/links.rb
39
+ - lib/spidr_epg/page.rb
40
+ - lib/spidr_epg/rules.rb
41
+ - lib/spidr_epg/sanitizers.rb
42
+ - lib/spidr_epg/session_cache.rb
43
+ - lib/spidr_epg/spidr.rb
44
+ - lib/spidr_epg/version.rb
45
+ - lib/spidr_epg_gem.rb~
46
+ - lib/spidr_epg_gem~
47
+ - spidr_epg_gem.gemspec
48
+ homepage:
49
+ licenses: []
50
+ metadata: {}
51
+ post_install_message:
52
+ rdoc_options: []
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: 1.9.3
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ requirements:
66
+ - none
67
+ rubyforge_project:
68
+ rubygems_version: 2.0.3
69
+ signing_key:
70
+ specification_version: 4
71
+ summary: Use for crwaling EPG
72
+ test_files: []