pincers 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bb985ceb492cc98c2bfa31c449d2910956a598ce
4
- data.tar.gz: d87917a61c5e611f2e2a83e03ffbe7821b94e127
3
+ metadata.gz: 51cc155a36f380781aa91b00a53f5996aae5b9b1
4
+ data.tar.gz: b7ef59a62aa573b6be45914692fc603ef91195c0
5
5
  SHA512:
6
- metadata.gz: 4175ea99d6f5e46ca5bc742a4d9a4884991d8a92ccca0d5dbf95e86dbcbdea3b6c327cb784557ec2ccd51cd67189093d935efa56bfa9edba776a4ff15a3b76a9
7
- data.tar.gz: 659b15f7fbe7570a1f78eaadc2af572a78fb68e00496e789102732dc935ecdcd317a14cbfb07d6680021463146e67d1e9cab2eaafb39dd90db66e00a727cc08c
6
+ metadata.gz: 78e3f46f2d2a4bd2bcb82d6ca2c5eda70f2683f5bc769eb086204eb82e7168b7b02a6687550b47870187f2172319184d6f47d1b98a33984f7fc50860f40a0868
7
+ data.tar.gz: 95d1bbf4b2cc3ccea044ecc0c8bfea63477b56d489487edf2d9f110943e5b231d955f25c47e25a0e8c661a593d696c9003642cacdceaace35ad26f1ae7943c5b
@@ -120,6 +120,10 @@ module Pincers::Backend
120
120
  ensure_implementation :switch_to_parent_frame
121
121
  end
122
122
 
123
+ def fetch_resource
124
+ ensure_implementation :fetch_resource
125
+ end
126
+
123
127
  private
124
128
 
125
129
  def ensure_implementation(_name)
@@ -1,5 +1,7 @@
1
- require 'selenium-webdriver'
2
- require 'pincers/backend/base'
1
+ require "selenium-webdriver"
2
+ require "pincers/backend/base"
3
+ require "pincers/support/http_client"
4
+ require "pincers/core/download"
3
5
 
4
6
  module Pincers::Backend
5
7
 
@@ -149,6 +151,12 @@ module Pincers::Backend
149
151
  driver.switch_to.default_content
150
152
  end
151
153
 
154
+ def fetch_resource(_url)
155
+ url = URI::join(driver.current_url, _url)
156
+ response = as_http_client.get url
157
+ Pincers::Core::Download.from_http_response response
158
+ end
159
+
152
160
  private
153
161
 
154
162
  def search(_element, _query, _limit)
@@ -192,6 +200,31 @@ module Pincers::Backend
192
200
  _element
193
201
  end
194
202
 
203
+ def as_http_client
204
+ Pincers::Support::HttpClient.new({
205
+ proxy: proxy_address,
206
+ cookies: cookie_jar,
207
+ headers: {
208
+ 'User-Agent' => user_agent
209
+ }
210
+ })
211
+ end
212
+
213
+ def user_agent
214
+ driver.execute_script("return navigator.userAgent;")
215
+ end
216
+
217
+ def proxy_address
218
+ proxy = driver.capabilities.proxy
219
+ proxy.nil? ? nil : (proxy.http || proxy.ssl)
220
+ end
221
+
222
+ def cookie_jar
223
+ jar = Pincers::Support::CookieJar.new
224
+ driver.manage.all_cookies.each { |c| jar.set c }
225
+ jar
226
+ end
227
+
195
228
  end
196
229
 
197
230
  end
@@ -0,0 +1,16 @@
1
+ require 'ostruct'
2
+
3
+ module Pincers::Core
4
+ class Cookies
5
+ include Enumerable
6
+
7
+ def initialize(_backend)
8
+ @backend = _backend
9
+ end
10
+
11
+ def each
12
+ @backend.fetch_cookies.each { |c| yield OpenStruct.new c }
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,13 @@
1
+ module Pincers::Core
2
+ class Download < Struct.new(:mime, :data)
3
+
4
+ def self.from_http_response(_response)
5
+ self.new _response['Content-Type'] || 'text/plain', _response.body
6
+ end
7
+
8
+ def store(_path)
9
+ File.write _path, data
10
+ end
11
+
12
+ end
13
+ end
@@ -1,4 +1,4 @@
1
- require 'pincers/support/cookie_jar'
1
+ require 'pincers/core/cookies'
2
2
  require 'pincers/core/search_context'
3
3
 
4
4
  module Pincers::Core
@@ -41,7 +41,7 @@ module Pincers::Core
41
41
  end
42
42
 
43
43
  def cookies
44
- @cookies ||= CookieJar.new backend
44
+ @cookies ||= Cookies.new backend
45
45
  end
46
46
 
47
47
  def goto(_urlOrOptions)
@@ -61,6 +61,10 @@ module Pincers::Core
61
61
  self
62
62
  end
63
63
 
64
+ def download(_url)
65
+ wrap_errors { backend.fetch_resource _url }
66
+ end
67
+
64
68
  def forward(_steps=1)
65
69
  wrap_errors { backend.navigate_forward _steps }
66
70
  self
@@ -58,5 +58,11 @@ module Pincers::Extension
58
58
  type.to_sym
59
59
  end
60
60
 
61
+ def download
62
+ url = attribute(:href) || attribute(:src)
63
+ raise Pincers::NavigationError.new(self, 'No resource url was found') if url.nil?
64
+ root.download(attribute(:href) || attribute(:src))
65
+ end
66
+
61
67
  end
62
68
  end
@@ -0,0 +1,3 @@
1
+ module Pincers::Support
2
+ class Cookie < Struct.new(:name, :value, :domain, :path, :expires, :secure); end
3
+ end
@@ -1,15 +1,161 @@
1
- require 'ostruct'
1
+ require "pincers/support/cookie"
2
2
 
3
3
  module Pincers::Support
4
4
  class CookieJar
5
- include Enumerable
6
5
 
7
- def initialize(_backend)
8
- @backend = _backend
6
+ BAD_VALUE_CHARS = /([\x00-\x20\x7F",;\\])/ # RFC 6265 - 4.1.1
7
+
8
+ attr_reader :cookies
9
+
10
+ def initialize(_cookies=nil)
11
+ @cookies = _cookies || []
12
+ end
13
+
14
+ def copy
15
+ self.class.new @cookies.clone
16
+ end
17
+
18
+ def get(_url, _name)
19
+ for_origin(URI.parse(_url)).find { |c| c.name == _name }
20
+ end
21
+
22
+ def set(_parts)
23
+ return nil unless _parts[:domain]
24
+ return nil unless _parts[:name]
25
+ return nil unless _parts[:value]
26
+
27
+ cookie = Cookie.new(
28
+ _parts[:name],
29
+ _parts[:value],
30
+ _parts[:domain].gsub(/^\./,''),
31
+ _parts[:path] || '/',
32
+ _parts[:expires],
33
+ _parts[:secure]
34
+ )
35
+
36
+ replace_cookie cookie
37
+ cookie
38
+ end
39
+
40
+ def set_raw(_uri, _raw)
41
+ cookie = decode_cookie _raw
42
+
43
+ cookie.domain = compute_domain cookie.domain, _uri.host
44
+ cookie.path = compute_path cookie.path, _uri.path
45
+
46
+ return nil if cookie.domain.nil? or cookie.path.nil?
47
+
48
+ replace_cookie(cookie)
49
+ cookie
50
+ end
51
+
52
+ def for_origin(_uri)
53
+ @cookies.select do |c|
54
+ domains_match c.domain, _uri.host and paths_match c.path, _uri.path
55
+ end
56
+ end
57
+
58
+ def for_origin_as_header(_uri)
59
+ for_origin(_uri).map { |c| "#{c.name}=#{quote(c.value)}" }.join('; ')
60
+ end
61
+
62
+ private
63
+
64
+ def decode_cookie(_raw)
65
+ # taken from WEBrick implementation
66
+ cookie_elem = _raw.split(/;/)
67
+ first_elem = cookie_elem.shift
68
+ first_elem.strip!
69
+ key, value = first_elem.split(/\=/, 2)
70
+
71
+ cookie = Cookie.new(key, dequote(value))
72
+ cookie_elem.each do |pair|
73
+ pair.strip!
74
+ key, value = pair.split(/\=/, 2)
75
+ value = dequote(value.strip) if value
76
+
77
+ case key.downcase
78
+ when "domain" then cookie.domain = value.downcase
79
+ when "path" then cookie.path = value.downcase
80
+ when "expires" then cookie.expires = value
81
+ # when "max-age" then cookie.max_age = Integer(value)
82
+ # when "comment" then cookie.comment = value
83
+ # when "version" then cookie.version = Integer(value)
84
+ when "secure" then cookie.secure = true
85
+ end
86
+ end
87
+
88
+ cookie
89
+ end
90
+
91
+ def domains_match(_cookie_domain, _request_domain)
92
+ # RFC 6265 - 5.1.3
93
+ # TODO: ensure request domain is not an IP
94
+ return true if _cookie_domain == _request_domain
95
+ return true if _request_domain.end_with? ".#{_cookie_domain}"
96
+ return false
97
+ end
98
+
99
+ def paths_match(_cookie_path, _request_path)
100
+ # RFC 6265 - 5.1.4
101
+ _request_path = '/' if _request_path.empty?
102
+ return true if _cookie_path == _request_path
103
+ return true if _cookie_path[-1] == '/' and _request_path.start_with? _cookie_path
104
+ return true if _request_path.start_with? "#{_cookie_path}/"
105
+ return false
106
+ end
107
+
108
+ def compute_domain(_cookie_domain, _request_domain)
109
+ return _request_domain if _cookie_domain.nil?
110
+ # cookies with different domain are discarded
111
+ return nil unless _cookie_domain.end_with? _request_domain
112
+ return _cookie_domain.gsub(/^\./,'') # remove leading dot
113
+ end
114
+
115
+ def compute_path(_cookie_path, _request_path)
116
+ default_path = compute_default_path(_request_path)
117
+ return default_path if _cookie_path.nil?
118
+ return nil unless _cookie_path.start_with? default_path
119
+ return _cookie_path
120
+ end
121
+
122
+ def compute_default_path(_request_path)
123
+ # RFC 6265 - 5.1.4
124
+ return '/' unless _request_path[0] === '/'
125
+ ls_idx = _request_path.rindex('/')
126
+ return '/' unless ls_idx > 0
127
+ _request_path[0..ls_idx]
128
+ end
129
+
130
+ def replace_cookie(_cookie)
131
+ @cookies.each_with_index do |cookie, i|
132
+ if equivalent(cookie, _cookie)
133
+ @cookies[i] = _cookie
134
+ return
135
+ end
136
+ end
137
+
138
+ @cookies << _cookie
139
+ end
140
+
141
+ def dequote(_str)
142
+ # taken from WEBrick implementation
143
+ ret = (/\A"(.*)"\Z/ =~ _str) ? $1 : _str.dup
144
+ ret.gsub!(/\\(.)/, "\\1")
145
+ ret
146
+ end
147
+
148
+ def quote(_str)
149
+ # taken from WEBrick implementation and the http-cookie gem
150
+ return _str unless BAD_VALUE_CHARS === _str
151
+ '"' << _str.gsub(/[\\\"]/o, "\\\1") << '"'
9
152
  end
10
153
 
11
- def each
12
- @backend.fetch_cookies.each { |c| yield OpenStruct.new c }
154
+ def equivalent(_cookie_a, _cookie_b)
155
+ return false unless _cookie_a.domain == _cookie_b.domain
156
+ return false unless _cookie_a.path == _cookie_b.path
157
+ return false unless _cookie_a.name == _cookie_b.name
158
+ return true
13
159
  end
14
160
 
15
161
  end
@@ -0,0 +1,123 @@
1
+ require "uri"
2
+ require "pincers/support/cookie_jar"
3
+
4
+ module Pincers::Support
5
+ class HttpClient
6
+
7
+ class HttpRequestError < StandardError
8
+ extend Forwardable
9
+
10
+ def_delegators :@response, :code, :body
11
+
12
+ attr_reader :response
13
+
14
+ def initialize(_response)
15
+ @response = _response
16
+ super _response.message
17
+ end
18
+ end
19
+
20
+ class MaximumRedirectsError < StandardError
21
+ def initialize
22
+ super 'Redirection loop detected!'
23
+ end
24
+ end
25
+
26
+ attr_reader :proxy_addr, :proxy_port, :cookies
27
+
28
+ def initialize(_options={})
29
+ if _options[:proxy]
30
+ @proxy_addr, @proxy_port = _options[:proxy].split ':'
31
+ end
32
+
33
+ @cookies = if _options[:cookies]
34
+ _options[:cookies].copy
35
+ else
36
+ CookieJar.new
37
+ end
38
+
39
+ @default_headers = _options[:headers]
40
+ end
41
+
42
+ def get(_url, _query={}, _headers={})
43
+ # TODO: append query string?
44
+ perform_request Net::HTTP::Get, URI(_url), _headers
45
+ end
46
+
47
+ def post(_url, _data, _headers={})
48
+ perform_request Net::HTTP::Post, URI(_url), _headers do |req|
49
+ req.body = prepare_data(_data)
50
+ end
51
+ end
52
+
53
+ def put(_url, _data, _headers={})
54
+ perform_request Net::HTTP::Put, URI(_url), _headers do |req|
55
+ req.body = prepare_data(_data)
56
+ end
57
+ end
58
+
59
+ def delete(_url)
60
+ perform_request Net::HTTP::Delete, URI(_url), _headers
61
+ end
62
+
63
+ private
64
+
65
+ def perform_request(_req_type, _uri, _headers, _limit=10)
66
+
67
+ raise MaximumRedirectsError.new if _limit == 0
68
+
69
+ request = _req_type.new(_uri.request_uri.empty? ? '/' : _uri.request_uri)
70
+ build_headers(request, _headers)
71
+ set_cookies(request, _uri)
72
+ yield request if block_given?
73
+
74
+ response = build_client(_uri).request request
75
+
76
+ case response
77
+ when Net::HTTPSuccess then
78
+ update_cookies(_uri, response)
79
+ response
80
+ when Net::HTTPRedirection then
81
+ location = response['location']
82
+ perform_request(_req_type, URI.parse(location), _headers, _limit - 1)
83
+ else
84
+ handle_error_response response
85
+ end
86
+ end
87
+
88
+ def build_client(uri)
89
+ client = Net::HTTP.new uri.host, uri.port || 80, proxy_addr, proxy_port
90
+ client.use_ssl = true if uri.scheme == 'https'
91
+ client.verify_mode = OpenSSL::SSL::VERIFY_NONE
92
+ client
93
+ end
94
+
95
+ def handle_error_response(_response)
96
+ raise HttpRequestError.new _response
97
+ end
98
+
99
+ def prepare_data(_data)
100
+ if _data.is_a? Hash
101
+ _data.keys.map { |k| "#{k}=#{_data[k]}" }.join '&'
102
+ else _data end
103
+ end
104
+
105
+ def build_headers(_request, _headers)
106
+ copy_headers _request, @default_headers if @default_headers
107
+ copy_headers _request, _headers
108
+ end
109
+
110
+ def set_cookies(_request, _uri)
111
+ _request['Cookie'] = @cookies.for_origin_as_header _uri
112
+ end
113
+
114
+ def update_cookies(_uri, _response)
115
+ cookies = _response.get_fields('set-cookie')
116
+ cookies.each { |raw| @cookies.set_raw _uri, raw } if cookies
117
+ end
118
+
119
+ def copy_headers(_request, _headers)
120
+ _headers.keys.each { |k| _request[k] = _headers[k] }
121
+ end
122
+ end
123
+ end
@@ -1,3 +1,3 @@
1
1
  module Pincers
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pincers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-03 00:00:00.000000000 Z
11
+ date: 2015-09-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -217,6 +217,8 @@ files:
217
217
  - lib/pincers/backend/base.rb
218
218
  - lib/pincers/backend/nokogiri.rb
219
219
  - lib/pincers/backend/webdriver.rb
220
+ - lib/pincers/core/cookies.rb
221
+ - lib/pincers/core/download.rb
220
222
  - lib/pincers/core/root_context.rb
221
223
  - lib/pincers/core/search_context.rb
222
224
  - lib/pincers/errors.rb
@@ -228,7 +230,9 @@ files:
228
230
  - lib/pincers/factories/webdriver.rb
229
231
  - lib/pincers/factory.rb
230
232
  - lib/pincers/support/configuration.rb
233
+ - lib/pincers/support/cookie.rb
231
234
  - lib/pincers/support/cookie_jar.rb
235
+ - lib/pincers/support/http_client.rb
232
236
  - lib/pincers/support/query.rb
233
237
  - lib/pincers/support/xpath_builder.rb
234
238
  - lib/pincers/version.rb