pincers 0.5.1 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/pincers/backend/base.rb +4 -0
- data/lib/pincers/backend/webdriver.rb +35 -2
- data/lib/pincers/core/cookies.rb +16 -0
- data/lib/pincers/core/download.rb +13 -0
- data/lib/pincers/core/root_context.rb +6 -2
- data/lib/pincers/extension/queries.rb +6 -0
- data/lib/pincers/support/cookie.rb +3 -0
- data/lib/pincers/support/cookie_jar.rb +152 -6
- data/lib/pincers/support/http_client.rb +123 -0
- data/lib/pincers/version.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 51cc155a36f380781aa91b00a53f5996aae5b9b1
|
4
|
+
data.tar.gz: b7ef59a62aa573b6be45914692fc603ef91195c0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 78e3f46f2d2a4bd2bcb82d6ca2c5eda70f2683f5bc769eb086204eb82e7168b7b02a6687550b47870187f2172319184d6f47d1b98a33984f7fc50860f40a0868
|
7
|
+
data.tar.gz: 95d1bbf4b2cc3ccea044ecc0c8bfea63477b56d489487edf2d9f110943e5b231d955f25c47e25a0e8c661a593d696c9003642cacdceaace35ad26f1ae7943c5b
|
data/lib/pincers/backend/base.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "selenium-webdriver"
|
2
|
+
require "pincers/backend/base"
|
3
|
+
require "pincers/support/http_client"
|
4
|
+
require "pincers/core/download"
|
3
5
|
|
4
6
|
module Pincers::Backend
|
5
7
|
|
@@ -149,6 +151,12 @@ module Pincers::Backend
|
|
149
151
|
driver.switch_to.default_content
|
150
152
|
end
|
151
153
|
|
154
|
+
def fetch_resource(_url)
|
155
|
+
url = URI::join(driver.current_url, _url)
|
156
|
+
response = as_http_client.get url
|
157
|
+
Pincers::Core::Download.from_http_response response
|
158
|
+
end
|
159
|
+
|
152
160
|
private
|
153
161
|
|
154
162
|
def search(_element, _query, _limit)
|
@@ -192,6 +200,31 @@ module Pincers::Backend
|
|
192
200
|
_element
|
193
201
|
end
|
194
202
|
|
203
|
+
def as_http_client
|
204
|
+
Pincers::Support::HttpClient.new({
|
205
|
+
proxy: proxy_address,
|
206
|
+
cookies: cookie_jar,
|
207
|
+
headers: {
|
208
|
+
'User-Agent' => user_agent
|
209
|
+
}
|
210
|
+
})
|
211
|
+
end
|
212
|
+
|
213
|
+
def user_agent
|
214
|
+
driver.execute_script("return navigator.userAgent;")
|
215
|
+
end
|
216
|
+
|
217
|
+
def proxy_address
|
218
|
+
proxy = driver.capabilities.proxy
|
219
|
+
proxy.nil? ? nil : (proxy.http || proxy.ssl)
|
220
|
+
end
|
221
|
+
|
222
|
+
def cookie_jar
|
223
|
+
jar = Pincers::Support::CookieJar.new
|
224
|
+
driver.manage.all_cookies.each { |c| jar.set c }
|
225
|
+
jar
|
226
|
+
end
|
227
|
+
|
195
228
|
end
|
196
229
|
|
197
230
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require 'pincers/
|
1
|
+
require 'pincers/core/cookies'
|
2
2
|
require 'pincers/core/search_context'
|
3
3
|
|
4
4
|
module Pincers::Core
|
@@ -41,7 +41,7 @@ module Pincers::Core
|
|
41
41
|
end
|
42
42
|
|
43
43
|
def cookies
|
44
|
-
@cookies ||=
|
44
|
+
@cookies ||= Cookies.new backend
|
45
45
|
end
|
46
46
|
|
47
47
|
def goto(_urlOrOptions)
|
@@ -61,6 +61,10 @@ module Pincers::Core
|
|
61
61
|
self
|
62
62
|
end
|
63
63
|
|
64
|
+
def download(_url)
|
65
|
+
wrap_errors { backend.fetch_resource _url }
|
66
|
+
end
|
67
|
+
|
64
68
|
def forward(_steps=1)
|
65
69
|
wrap_errors { backend.navigate_forward _steps }
|
66
70
|
self
|
@@ -58,5 +58,11 @@ module Pincers::Extension
|
|
58
58
|
type.to_sym
|
59
59
|
end
|
60
60
|
|
61
|
+
def download
|
62
|
+
url = attribute(:href) || attribute(:src)
|
63
|
+
raise Pincers::NavigationError.new(self, 'No resource url was found') if url.nil?
|
64
|
+
root.download(attribute(:href) || attribute(:src))
|
65
|
+
end
|
66
|
+
|
61
67
|
end
|
62
68
|
end
|
@@ -1,15 +1,161 @@
|
|
1
|
-
require
|
1
|
+
require "pincers/support/cookie"
|
2
2
|
|
3
3
|
module Pincers::Support
|
4
4
|
class CookieJar
|
5
|
-
include Enumerable
|
6
5
|
|
7
|
-
|
8
|
-
|
6
|
+
BAD_VALUE_CHARS = /([\x00-\x20\x7F",;\\])/ # RFC 6265 - 4.1.1
|
7
|
+
|
8
|
+
attr_reader :cookies
|
9
|
+
|
10
|
+
def initialize(_cookies=nil)
|
11
|
+
@cookies = _cookies || []
|
12
|
+
end
|
13
|
+
|
14
|
+
def copy
|
15
|
+
self.class.new @cookies.clone
|
16
|
+
end
|
17
|
+
|
18
|
+
def get(_url, _name)
|
19
|
+
for_origin(URI.parse(_url)).find { |c| c.name == _name }
|
20
|
+
end
|
21
|
+
|
22
|
+
def set(_parts)
|
23
|
+
return nil unless _parts[:domain]
|
24
|
+
return nil unless _parts[:name]
|
25
|
+
return nil unless _parts[:value]
|
26
|
+
|
27
|
+
cookie = Cookie.new(
|
28
|
+
_parts[:name],
|
29
|
+
_parts[:value],
|
30
|
+
_parts[:domain].gsub(/^\./,''),
|
31
|
+
_parts[:path] || '/',
|
32
|
+
_parts[:expires],
|
33
|
+
_parts[:secure]
|
34
|
+
)
|
35
|
+
|
36
|
+
replace_cookie cookie
|
37
|
+
cookie
|
38
|
+
end
|
39
|
+
|
40
|
+
def set_raw(_uri, _raw)
|
41
|
+
cookie = decode_cookie _raw
|
42
|
+
|
43
|
+
cookie.domain = compute_domain cookie.domain, _uri.host
|
44
|
+
cookie.path = compute_path cookie.path, _uri.path
|
45
|
+
|
46
|
+
return nil if cookie.domain.nil? or cookie.path.nil?
|
47
|
+
|
48
|
+
replace_cookie(cookie)
|
49
|
+
cookie
|
50
|
+
end
|
51
|
+
|
52
|
+
def for_origin(_uri)
|
53
|
+
@cookies.select do |c|
|
54
|
+
domains_match c.domain, _uri.host and paths_match c.path, _uri.path
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def for_origin_as_header(_uri)
|
59
|
+
for_origin(_uri).map { |c| "#{c.name}=#{quote(c.value)}" }.join('; ')
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def decode_cookie(_raw)
|
65
|
+
# taken from WEBrick implementation
|
66
|
+
cookie_elem = _raw.split(/;/)
|
67
|
+
first_elem = cookie_elem.shift
|
68
|
+
first_elem.strip!
|
69
|
+
key, value = first_elem.split(/\=/, 2)
|
70
|
+
|
71
|
+
cookie = Cookie.new(key, dequote(value))
|
72
|
+
cookie_elem.each do |pair|
|
73
|
+
pair.strip!
|
74
|
+
key, value = pair.split(/\=/, 2)
|
75
|
+
value = dequote(value.strip) if value
|
76
|
+
|
77
|
+
case key.downcase
|
78
|
+
when "domain" then cookie.domain = value.downcase
|
79
|
+
when "path" then cookie.path = value.downcase
|
80
|
+
when "expires" then cookie.expires = value
|
81
|
+
# when "max-age" then cookie.max_age = Integer(value)
|
82
|
+
# when "comment" then cookie.comment = value
|
83
|
+
# when "version" then cookie.version = Integer(value)
|
84
|
+
when "secure" then cookie.secure = true
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
cookie
|
89
|
+
end
|
90
|
+
|
91
|
+
def domains_match(_cookie_domain, _request_domain)
|
92
|
+
# RFC 6265 - 5.1.3
|
93
|
+
# TODO: ensure request domain is not an IP
|
94
|
+
return true if _cookie_domain == _request_domain
|
95
|
+
return true if _request_domain.end_with? ".#{_cookie_domain}"
|
96
|
+
return false
|
97
|
+
end
|
98
|
+
|
99
|
+
def paths_match(_cookie_path, _request_path)
|
100
|
+
# RFC 6265 - 5.1.4
|
101
|
+
_request_path = '/' if _request_path.empty?
|
102
|
+
return true if _cookie_path == _request_path
|
103
|
+
return true if _cookie_path[-1] == '/' and _request_path.start_with? _cookie_path
|
104
|
+
return true if _request_path.start_with? "#{_cookie_path}/"
|
105
|
+
return false
|
106
|
+
end
|
107
|
+
|
108
|
+
def compute_domain(_cookie_domain, _request_domain)
|
109
|
+
return _request_domain if _cookie_domain.nil?
|
110
|
+
# cookies with different domain are discarded
|
111
|
+
return nil unless _cookie_domain.end_with? _request_domain
|
112
|
+
return _cookie_domain.gsub(/^\./,'') # remove leading dot
|
113
|
+
end
|
114
|
+
|
115
|
+
def compute_path(_cookie_path, _request_path)
|
116
|
+
default_path = compute_default_path(_request_path)
|
117
|
+
return default_path if _cookie_path.nil?
|
118
|
+
return nil unless _cookie_path.start_with? default_path
|
119
|
+
return _cookie_path
|
120
|
+
end
|
121
|
+
|
122
|
+
def compute_default_path(_request_path)
|
123
|
+
# RFC 6265 - 5.1.4
|
124
|
+
return '/' unless _request_path[0] === '/'
|
125
|
+
ls_idx = _request_path.rindex('/')
|
126
|
+
return '/' unless ls_idx > 0
|
127
|
+
_request_path[0..ls_idx]
|
128
|
+
end
|
129
|
+
|
130
|
+
def replace_cookie(_cookie)
|
131
|
+
@cookies.each_with_index do |cookie, i|
|
132
|
+
if equivalent(cookie, _cookie)
|
133
|
+
@cookies[i] = _cookie
|
134
|
+
return
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
@cookies << _cookie
|
139
|
+
end
|
140
|
+
|
141
|
+
def dequote(_str)
|
142
|
+
# taken from WEBrick implementation
|
143
|
+
ret = (/\A"(.*)"\Z/ =~ _str) ? $1 : _str.dup
|
144
|
+
ret.gsub!(/\\(.)/, "\\1")
|
145
|
+
ret
|
146
|
+
end
|
147
|
+
|
148
|
+
def quote(_str)
|
149
|
+
# taken from WEBrick implementation and the http-cookie gem
|
150
|
+
return _str unless BAD_VALUE_CHARS === _str
|
151
|
+
'"' << _str.gsub(/[\\\"]/o, "\\\1") << '"'
|
9
152
|
end
|
10
153
|
|
11
|
-
def
|
12
|
-
|
154
|
+
def equivalent(_cookie_a, _cookie_b)
|
155
|
+
return false unless _cookie_a.domain == _cookie_b.domain
|
156
|
+
return false unless _cookie_a.path == _cookie_b.path
|
157
|
+
return false unless _cookie_a.name == _cookie_b.name
|
158
|
+
return true
|
13
159
|
end
|
14
160
|
|
15
161
|
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
require "uri"
|
2
|
+
require "pincers/support/cookie_jar"
|
3
|
+
|
4
|
+
module Pincers::Support
|
5
|
+
class HttpClient
|
6
|
+
|
7
|
+
class HttpRequestError < StandardError
|
8
|
+
extend Forwardable
|
9
|
+
|
10
|
+
def_delegators :@response, :code, :body
|
11
|
+
|
12
|
+
attr_reader :response
|
13
|
+
|
14
|
+
def initialize(_response)
|
15
|
+
@response = _response
|
16
|
+
super _response.message
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class MaximumRedirectsError < StandardError
|
21
|
+
def initialize
|
22
|
+
super 'Redirection loop detected!'
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
attr_reader :proxy_addr, :proxy_port, :cookies
|
27
|
+
|
28
|
+
def initialize(_options={})
|
29
|
+
if _options[:proxy]
|
30
|
+
@proxy_addr, @proxy_port = _options[:proxy].split ':'
|
31
|
+
end
|
32
|
+
|
33
|
+
@cookies = if _options[:cookies]
|
34
|
+
_options[:cookies].copy
|
35
|
+
else
|
36
|
+
CookieJar.new
|
37
|
+
end
|
38
|
+
|
39
|
+
@default_headers = _options[:headers]
|
40
|
+
end
|
41
|
+
|
42
|
+
def get(_url, _query={}, _headers={})
|
43
|
+
# TODO: append query string?
|
44
|
+
perform_request Net::HTTP::Get, URI(_url), _headers
|
45
|
+
end
|
46
|
+
|
47
|
+
def post(_url, _data, _headers={})
|
48
|
+
perform_request Net::HTTP::Post, URI(_url), _headers do |req|
|
49
|
+
req.body = prepare_data(_data)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def put(_url, _data, _headers={})
|
54
|
+
perform_request Net::HTTP::Put, URI(_url), _headers do |req|
|
55
|
+
req.body = prepare_data(_data)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def delete(_url)
|
60
|
+
perform_request Net::HTTP::Delete, URI(_url), _headers
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def perform_request(_req_type, _uri, _headers, _limit=10)
|
66
|
+
|
67
|
+
raise MaximumRedirectsError.new if _limit == 0
|
68
|
+
|
69
|
+
request = _req_type.new(_uri.request_uri.empty? ? '/' : _uri.request_uri)
|
70
|
+
build_headers(request, _headers)
|
71
|
+
set_cookies(request, _uri)
|
72
|
+
yield request if block_given?
|
73
|
+
|
74
|
+
response = build_client(_uri).request request
|
75
|
+
|
76
|
+
case response
|
77
|
+
when Net::HTTPSuccess then
|
78
|
+
update_cookies(_uri, response)
|
79
|
+
response
|
80
|
+
when Net::HTTPRedirection then
|
81
|
+
location = response['location']
|
82
|
+
perform_request(_req_type, URI.parse(location), _headers, _limit - 1)
|
83
|
+
else
|
84
|
+
handle_error_response response
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def build_client(uri)
|
89
|
+
client = Net::HTTP.new uri.host, uri.port || 80, proxy_addr, proxy_port
|
90
|
+
client.use_ssl = true if uri.scheme == 'https'
|
91
|
+
client.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
92
|
+
client
|
93
|
+
end
|
94
|
+
|
95
|
+
def handle_error_response(_response)
|
96
|
+
raise HttpRequestError.new _response
|
97
|
+
end
|
98
|
+
|
99
|
+
def prepare_data(_data)
|
100
|
+
if _data.is_a? Hash
|
101
|
+
_data.keys.map { |k| "#{k}=#{_data[k]}" }.join '&'
|
102
|
+
else _data end
|
103
|
+
end
|
104
|
+
|
105
|
+
def build_headers(_request, _headers)
|
106
|
+
copy_headers _request, @default_headers if @default_headers
|
107
|
+
copy_headers _request, _headers
|
108
|
+
end
|
109
|
+
|
110
|
+
def set_cookies(_request, _uri)
|
111
|
+
_request['Cookie'] = @cookies.for_origin_as_header _uri
|
112
|
+
end
|
113
|
+
|
114
|
+
def update_cookies(_uri, _response)
|
115
|
+
cookies = _response.get_fields('set-cookie')
|
116
|
+
cookies.each { |raw| @cookies.set_raw _uri, raw } if cookies
|
117
|
+
end
|
118
|
+
|
119
|
+
def copy_headers(_request, _headers)
|
120
|
+
_headers.keys.each { |k| _request[k] = _headers[k] }
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
data/lib/pincers/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pincers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ignacio Baixas
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-09-
|
11
|
+
date: 2015-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -217,6 +217,8 @@ files:
|
|
217
217
|
- lib/pincers/backend/base.rb
|
218
218
|
- lib/pincers/backend/nokogiri.rb
|
219
219
|
- lib/pincers/backend/webdriver.rb
|
220
|
+
- lib/pincers/core/cookies.rb
|
221
|
+
- lib/pincers/core/download.rb
|
220
222
|
- lib/pincers/core/root_context.rb
|
221
223
|
- lib/pincers/core/search_context.rb
|
222
224
|
- lib/pincers/errors.rb
|
@@ -228,7 +230,9 @@ files:
|
|
228
230
|
- lib/pincers/factories/webdriver.rb
|
229
231
|
- lib/pincers/factory.rb
|
230
232
|
- lib/pincers/support/configuration.rb
|
233
|
+
- lib/pincers/support/cookie.rb
|
231
234
|
- lib/pincers/support/cookie_jar.rb
|
235
|
+
- lib/pincers/support/http_client.rb
|
232
236
|
- lib/pincers/support/query.rb
|
233
237
|
- lib/pincers/support/xpath_builder.rb
|
234
238
|
- lib/pincers/version.rb
|