pincers 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/pincers/backend/base.rb +4 -0
- data/lib/pincers/backend/webdriver.rb +35 -2
- data/lib/pincers/core/cookies.rb +16 -0
- data/lib/pincers/core/download.rb +13 -0
- data/lib/pincers/core/root_context.rb +6 -2
- data/lib/pincers/extension/queries.rb +6 -0
- data/lib/pincers/support/cookie.rb +3 -0
- data/lib/pincers/support/cookie_jar.rb +152 -6
- data/lib/pincers/support/http_client.rb +123 -0
- data/lib/pincers/version.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 51cc155a36f380781aa91b00a53f5996aae5b9b1
|
|
4
|
+
data.tar.gz: b7ef59a62aa573b6be45914692fc603ef91195c0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 78e3f46f2d2a4bd2bcb82d6ca2c5eda70f2683f5bc769eb086204eb82e7168b7b02a6687550b47870187f2172319184d6f47d1b98a33984f7fc50860f40a0868
|
|
7
|
+
data.tar.gz: 95d1bbf4b2cc3ccea044ecc0c8bfea63477b56d489487edf2d9f110943e5b231d955f25c47e25a0e8c661a593d696c9003642cacdceaace35ad26f1ae7943c5b
|
data/lib/pincers/backend/base.rb
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
require
|
|
2
|
-
require
|
|
1
|
+
require "selenium-webdriver"
|
|
2
|
+
require "pincers/backend/base"
|
|
3
|
+
require "pincers/support/http_client"
|
|
4
|
+
require "pincers/core/download"
|
|
3
5
|
|
|
4
6
|
module Pincers::Backend
|
|
5
7
|
|
|
@@ -149,6 +151,12 @@ module Pincers::Backend
|
|
|
149
151
|
driver.switch_to.default_content
|
|
150
152
|
end
|
|
151
153
|
|
|
154
|
+
def fetch_resource(_url)
|
|
155
|
+
url = URI::join(driver.current_url, _url)
|
|
156
|
+
response = as_http_client.get url
|
|
157
|
+
Pincers::Core::Download.from_http_response response
|
|
158
|
+
end
|
|
159
|
+
|
|
152
160
|
private
|
|
153
161
|
|
|
154
162
|
def search(_element, _query, _limit)
|
|
@@ -192,6 +200,31 @@ module Pincers::Backend
|
|
|
192
200
|
_element
|
|
193
201
|
end
|
|
194
202
|
|
|
203
|
+
def as_http_client
|
|
204
|
+
Pincers::Support::HttpClient.new({
|
|
205
|
+
proxy: proxy_address,
|
|
206
|
+
cookies: cookie_jar,
|
|
207
|
+
headers: {
|
|
208
|
+
'User-Agent' => user_agent
|
|
209
|
+
}
|
|
210
|
+
})
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def user_agent
|
|
214
|
+
driver.execute_script("return navigator.userAgent;")
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def proxy_address
|
|
218
|
+
proxy = driver.capabilities.proxy
|
|
219
|
+
proxy.nil? ? nil : (proxy.http || proxy.ssl)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def cookie_jar
|
|
223
|
+
jar = Pincers::Support::CookieJar.new
|
|
224
|
+
driver.manage.all_cookies.each { |c| jar.set c }
|
|
225
|
+
jar
|
|
226
|
+
end
|
|
227
|
+
|
|
195
228
|
end
|
|
196
229
|
|
|
197
230
|
end
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
require 'pincers/
|
|
1
|
+
require 'pincers/core/cookies'
|
|
2
2
|
require 'pincers/core/search_context'
|
|
3
3
|
|
|
4
4
|
module Pincers::Core
|
|
@@ -41,7 +41,7 @@ module Pincers::Core
|
|
|
41
41
|
end
|
|
42
42
|
|
|
43
43
|
def cookies
|
|
44
|
-
@cookies ||=
|
|
44
|
+
@cookies ||= Cookies.new backend
|
|
45
45
|
end
|
|
46
46
|
|
|
47
47
|
def goto(_urlOrOptions)
|
|
@@ -61,6 +61,10 @@ module Pincers::Core
|
|
|
61
61
|
self
|
|
62
62
|
end
|
|
63
63
|
|
|
64
|
+
def download(_url)
|
|
65
|
+
wrap_errors { backend.fetch_resource _url }
|
|
66
|
+
end
|
|
67
|
+
|
|
64
68
|
def forward(_steps=1)
|
|
65
69
|
wrap_errors { backend.navigate_forward _steps }
|
|
66
70
|
self
|
|
@@ -58,5 +58,11 @@ module Pincers::Extension
|
|
|
58
58
|
type.to_sym
|
|
59
59
|
end
|
|
60
60
|
|
|
61
|
+
def download
|
|
62
|
+
url = attribute(:href) || attribute(:src)
|
|
63
|
+
raise Pincers::NavigationError.new(self, 'No resource url was found') if url.nil?
|
|
64
|
+
root.download(attribute(:href) || attribute(:src))
|
|
65
|
+
end
|
|
66
|
+
|
|
61
67
|
end
|
|
62
68
|
end
|
|
@@ -1,15 +1,161 @@
|
|
|
1
|
-
require
|
|
1
|
+
require "pincers/support/cookie"
|
|
2
2
|
|
|
3
3
|
module Pincers::Support
|
|
4
4
|
class CookieJar
|
|
5
|
-
include Enumerable
|
|
6
5
|
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
BAD_VALUE_CHARS = /([\x00-\x20\x7F",;\\])/ # RFC 6265 - 4.1.1
|
|
7
|
+
|
|
8
|
+
attr_reader :cookies
|
|
9
|
+
|
|
10
|
+
def initialize(_cookies=nil)
|
|
11
|
+
@cookies = _cookies || []
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def copy
|
|
15
|
+
self.class.new @cookies.clone
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def get(_url, _name)
|
|
19
|
+
for_origin(URI.parse(_url)).find { |c| c.name == _name }
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def set(_parts)
|
|
23
|
+
return nil unless _parts[:domain]
|
|
24
|
+
return nil unless _parts[:name]
|
|
25
|
+
return nil unless _parts[:value]
|
|
26
|
+
|
|
27
|
+
cookie = Cookie.new(
|
|
28
|
+
_parts[:name],
|
|
29
|
+
_parts[:value],
|
|
30
|
+
_parts[:domain].gsub(/^\./,''),
|
|
31
|
+
_parts[:path] || '/',
|
|
32
|
+
_parts[:expires],
|
|
33
|
+
_parts[:secure]
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
replace_cookie cookie
|
|
37
|
+
cookie
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def set_raw(_uri, _raw)
|
|
41
|
+
cookie = decode_cookie _raw
|
|
42
|
+
|
|
43
|
+
cookie.domain = compute_domain cookie.domain, _uri.host
|
|
44
|
+
cookie.path = compute_path cookie.path, _uri.path
|
|
45
|
+
|
|
46
|
+
return nil if cookie.domain.nil? or cookie.path.nil?
|
|
47
|
+
|
|
48
|
+
replace_cookie(cookie)
|
|
49
|
+
cookie
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def for_origin(_uri)
|
|
53
|
+
@cookies.select do |c|
|
|
54
|
+
domains_match c.domain, _uri.host and paths_match c.path, _uri.path
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def for_origin_as_header(_uri)
|
|
59
|
+
for_origin(_uri).map { |c| "#{c.name}=#{quote(c.value)}" }.join('; ')
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def decode_cookie(_raw)
|
|
65
|
+
# taken from WEBrick implementation
|
|
66
|
+
cookie_elem = _raw.split(/;/)
|
|
67
|
+
first_elem = cookie_elem.shift
|
|
68
|
+
first_elem.strip!
|
|
69
|
+
key, value = first_elem.split(/\=/, 2)
|
|
70
|
+
|
|
71
|
+
cookie = Cookie.new(key, dequote(value))
|
|
72
|
+
cookie_elem.each do |pair|
|
|
73
|
+
pair.strip!
|
|
74
|
+
key, value = pair.split(/\=/, 2)
|
|
75
|
+
value = dequote(value.strip) if value
|
|
76
|
+
|
|
77
|
+
case key.downcase
|
|
78
|
+
when "domain" then cookie.domain = value.downcase
|
|
79
|
+
when "path" then cookie.path = value.downcase
|
|
80
|
+
when "expires" then cookie.expires = value
|
|
81
|
+
# when "max-age" then cookie.max_age = Integer(value)
|
|
82
|
+
# when "comment" then cookie.comment = value
|
|
83
|
+
# when "version" then cookie.version = Integer(value)
|
|
84
|
+
when "secure" then cookie.secure = true
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
cookie
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def domains_match(_cookie_domain, _request_domain)
|
|
92
|
+
# RFC 6265 - 5.1.3
|
|
93
|
+
# TODO: ensure request domain is not an IP
|
|
94
|
+
return true if _cookie_domain == _request_domain
|
|
95
|
+
return true if _request_domain.end_with? ".#{_cookie_domain}"
|
|
96
|
+
return false
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def paths_match(_cookie_path, _request_path)
|
|
100
|
+
# RFC 6265 - 5.1.4
|
|
101
|
+
_request_path = '/' if _request_path.empty?
|
|
102
|
+
return true if _cookie_path == _request_path
|
|
103
|
+
return true if _cookie_path[-1] == '/' and _request_path.start_with? _cookie_path
|
|
104
|
+
return true if _request_path.start_with? "#{_cookie_path}/"
|
|
105
|
+
return false
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def compute_domain(_cookie_domain, _request_domain)
|
|
109
|
+
return _request_domain if _cookie_domain.nil?
|
|
110
|
+
# cookies with different domain are discarded
|
|
111
|
+
return nil unless _cookie_domain.end_with? _request_domain
|
|
112
|
+
return _cookie_domain.gsub(/^\./,'') # remove leading dot
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def compute_path(_cookie_path, _request_path)
|
|
116
|
+
default_path = compute_default_path(_request_path)
|
|
117
|
+
return default_path if _cookie_path.nil?
|
|
118
|
+
return nil unless _cookie_path.start_with? default_path
|
|
119
|
+
return _cookie_path
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def compute_default_path(_request_path)
|
|
123
|
+
# RFC 6265 - 5.1.4
|
|
124
|
+
return '/' unless _request_path[0] === '/'
|
|
125
|
+
ls_idx = _request_path.rindex('/')
|
|
126
|
+
return '/' unless ls_idx > 0
|
|
127
|
+
_request_path[0..ls_idx]
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def replace_cookie(_cookie)
|
|
131
|
+
@cookies.each_with_index do |cookie, i|
|
|
132
|
+
if equivalent(cookie, _cookie)
|
|
133
|
+
@cookies[i] = _cookie
|
|
134
|
+
return
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
@cookies << _cookie
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def dequote(_str)
|
|
142
|
+
# taken from WEBrick implementation
|
|
143
|
+
ret = (/\A"(.*)"\Z/ =~ _str) ? $1 : _str.dup
|
|
144
|
+
ret.gsub!(/\\(.)/, "\\1")
|
|
145
|
+
ret
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def quote(_str)
|
|
149
|
+
# taken from WEBrick implementation and the http-cookie gem
|
|
150
|
+
return _str unless BAD_VALUE_CHARS === _str
|
|
151
|
+
'"' << _str.gsub(/[\\\"]/o, "\\\1") << '"'
|
|
9
152
|
end
|
|
10
153
|
|
|
11
|
-
def
|
|
12
|
-
|
|
154
|
+
def equivalent(_cookie_a, _cookie_b)
|
|
155
|
+
return false unless _cookie_a.domain == _cookie_b.domain
|
|
156
|
+
return false unless _cookie_a.path == _cookie_b.path
|
|
157
|
+
return false unless _cookie_a.name == _cookie_b.name
|
|
158
|
+
return true
|
|
13
159
|
end
|
|
14
160
|
|
|
15
161
|
end
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
require "uri"
|
|
2
|
+
require "pincers/support/cookie_jar"
|
|
3
|
+
|
|
4
|
+
module Pincers::Support
|
|
5
|
+
class HttpClient
|
|
6
|
+
|
|
7
|
+
class HttpRequestError < StandardError
|
|
8
|
+
extend Forwardable
|
|
9
|
+
|
|
10
|
+
def_delegators :@response, :code, :body
|
|
11
|
+
|
|
12
|
+
attr_reader :response
|
|
13
|
+
|
|
14
|
+
def initialize(_response)
|
|
15
|
+
@response = _response
|
|
16
|
+
super _response.message
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
class MaximumRedirectsError < StandardError
|
|
21
|
+
def initialize
|
|
22
|
+
super 'Redirection loop detected!'
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
attr_reader :proxy_addr, :proxy_port, :cookies
|
|
27
|
+
|
|
28
|
+
def initialize(_options={})
|
|
29
|
+
if _options[:proxy]
|
|
30
|
+
@proxy_addr, @proxy_port = _options[:proxy].split ':'
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
@cookies = if _options[:cookies]
|
|
34
|
+
_options[:cookies].copy
|
|
35
|
+
else
|
|
36
|
+
CookieJar.new
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
@default_headers = _options[:headers]
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def get(_url, _query={}, _headers={})
|
|
43
|
+
# TODO: append query string?
|
|
44
|
+
perform_request Net::HTTP::Get, URI(_url), _headers
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def post(_url, _data, _headers={})
|
|
48
|
+
perform_request Net::HTTP::Post, URI(_url), _headers do |req|
|
|
49
|
+
req.body = prepare_data(_data)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def put(_url, _data, _headers={})
|
|
54
|
+
perform_request Net::HTTP::Put, URI(_url), _headers do |req|
|
|
55
|
+
req.body = prepare_data(_data)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def delete(_url)
|
|
60
|
+
perform_request Net::HTTP::Delete, URI(_url), _headers
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
def perform_request(_req_type, _uri, _headers, _limit=10)
|
|
66
|
+
|
|
67
|
+
raise MaximumRedirectsError.new if _limit == 0
|
|
68
|
+
|
|
69
|
+
request = _req_type.new(_uri.request_uri.empty? ? '/' : _uri.request_uri)
|
|
70
|
+
build_headers(request, _headers)
|
|
71
|
+
set_cookies(request, _uri)
|
|
72
|
+
yield request if block_given?
|
|
73
|
+
|
|
74
|
+
response = build_client(_uri).request request
|
|
75
|
+
|
|
76
|
+
case response
|
|
77
|
+
when Net::HTTPSuccess then
|
|
78
|
+
update_cookies(_uri, response)
|
|
79
|
+
response
|
|
80
|
+
when Net::HTTPRedirection then
|
|
81
|
+
location = response['location']
|
|
82
|
+
perform_request(_req_type, URI.parse(location), _headers, _limit - 1)
|
|
83
|
+
else
|
|
84
|
+
handle_error_response response
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def build_client(uri)
|
|
89
|
+
client = Net::HTTP.new uri.host, uri.port || 80, proxy_addr, proxy_port
|
|
90
|
+
client.use_ssl = true if uri.scheme == 'https'
|
|
91
|
+
client.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
|
92
|
+
client
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def handle_error_response(_response)
|
|
96
|
+
raise HttpRequestError.new _response
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def prepare_data(_data)
|
|
100
|
+
if _data.is_a? Hash
|
|
101
|
+
_data.keys.map { |k| "#{k}=#{_data[k]}" }.join '&'
|
|
102
|
+
else _data end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def build_headers(_request, _headers)
|
|
106
|
+
copy_headers _request, @default_headers if @default_headers
|
|
107
|
+
copy_headers _request, _headers
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def set_cookies(_request, _uri)
|
|
111
|
+
_request['Cookie'] = @cookies.for_origin_as_header _uri
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def update_cookies(_uri, _response)
|
|
115
|
+
cookies = _response.get_fields('set-cookie')
|
|
116
|
+
cookies.each { |raw| @cookies.set_raw _uri, raw } if cookies
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def copy_headers(_request, _headers)
|
|
120
|
+
_headers.keys.each { |k| _request[k] = _headers[k] }
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
data/lib/pincers/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pincers
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ignacio Baixas
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-09-
|
|
11
|
+
date: 2015-09-04 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|
|
@@ -217,6 +217,8 @@ files:
|
|
|
217
217
|
- lib/pincers/backend/base.rb
|
|
218
218
|
- lib/pincers/backend/nokogiri.rb
|
|
219
219
|
- lib/pincers/backend/webdriver.rb
|
|
220
|
+
- lib/pincers/core/cookies.rb
|
|
221
|
+
- lib/pincers/core/download.rb
|
|
220
222
|
- lib/pincers/core/root_context.rb
|
|
221
223
|
- lib/pincers/core/search_context.rb
|
|
222
224
|
- lib/pincers/errors.rb
|
|
@@ -228,7 +230,9 @@ files:
|
|
|
228
230
|
- lib/pincers/factories/webdriver.rb
|
|
229
231
|
- lib/pincers/factory.rb
|
|
230
232
|
- lib/pincers/support/configuration.rb
|
|
233
|
+
- lib/pincers/support/cookie.rb
|
|
231
234
|
- lib/pincers/support/cookie_jar.rb
|
|
235
|
+
- lib/pincers/support/http_client.rb
|
|
232
236
|
- lib/pincers/support/query.rb
|
|
233
237
|
- lib/pincers/support/xpath_builder.rb
|
|
234
238
|
- lib/pincers/version.rb
|