pincers 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a19d33e62f9611053e680e501b3bab925c4279fa
4
- data.tar.gz: 2266ae44ad8606cc8dd7097c91d2d9ae64a8d80f
3
+ metadata.gz: 687162c2ca445449ffdacc2890cd9e94182317da
4
+ data.tar.gz: bf7434146ce2516a3d5b5cae8ac9f9cebd03a4ae
5
5
  SHA512:
6
- metadata.gz: 7764b044ebdfd024d894273fb511573644be4074415acc24fb46b5fc9f577d7dd5d95cc8cdbb90fbd6ac90de203c4852c4b590f837c64a58cd882c39a8a2f5d6
7
- data.tar.gz: 3b35f1c8568797d90d2cb90a87b81997158b76428a3b62a550fa723b2770af8ee05d370d7de1ac4732f4b9feaca2eb4fd73502555d404a2c560f751ed39a82ad
6
+ metadata.gz: c8d1a5595a9b19c1ed239ed081e101e4df882522752d79c096802978d04fb5b6395ef63d628078d8ef81828f9216e794ba338ae3a73716da21987ba174b561a3
7
+ data.tar.gz: 2fa86573efa8cb6ccb0e1d26eb2c4e892d3a8e6fae2009a70f3308ec3d3decc29a5c081312067d542de22ca2898f33cbbb6e527c048088f90bca1771a8762e0b
@@ -2,6 +2,7 @@ require 'pincers/nokogiri/backend'
2
2
  require 'pincers/chenso/browsing_manager'
3
3
  require 'pincers/chenso/html_page_request'
4
4
  require 'pincers/chenso/html_form_request'
5
+ require 'pincers/chenso/html_page_cache'
5
6
  require 'pincers/core/helpers/form'
6
7
 
7
8
  module Pincers::Chenso
@@ -68,7 +69,14 @@ module Pincers::Chenso
68
69
  end
69
70
 
70
71
  def as_http_client
71
- @client.fork(false)
72
+ @client.fork(true)
73
+ end
74
+
75
+ def merge_http_client(_client)
76
+ @client.join _client
77
+ if _client.content and /text\/html/ === _client.content_type
78
+ @browser.push HtmlPageCache.new(_client.uri, _client.content)
79
+ end
72
80
  end
73
81
 
74
82
  def switch_to_frame(_element)
@@ -2,14 +2,12 @@ module Pincers::Chenso
2
2
 
3
3
  class BrowsingContext
4
4
 
5
- attr_reader :document
6
-
7
- def initialize(_http_client)
5
+ def initialize(_http_client, _state=nil)
8
6
  @client = _http_client
9
7
  @history = []
10
8
  @pointer = -1
11
9
  @childs = {}
12
- @document = nil
10
+ @state = _state
13
11
  end
14
12
 
15
13
  def get_child(_id)
@@ -17,24 +15,32 @@ module Pincers::Chenso
17
15
  end
18
16
 
19
17
  def load_child(_id)
20
- @childs[_id] = self.class.new @client
18
+ @childs[_id] = self.class.new(@client, @state)
21
19
  end
22
20
 
23
21
  def current_url
24
- @client.uri.to_s
22
+ @state ? @state.uri.to_s : nil
23
+ end
24
+
25
+ def document
26
+ @state ? @state.document : nil
25
27
  end
26
28
 
27
29
  def refresh
28
30
  if @pointer >= 0
29
31
  navigate @history[@pointer]
30
- else nil end
32
+ end
33
+ self
31
34
  end
32
35
 
33
36
  def push(_request)
37
+ _request.fix_uri @state
38
+
34
39
  @history.slice!(@pointer+1..-1)
35
40
  @history.push _request
36
41
  @pointer += 1
37
42
  navigate _request
43
+ self
38
44
  end
39
45
 
40
46
  def back(_times=1)
@@ -46,6 +52,7 @@ module Pincers::Chenso
46
52
  else
47
53
  change_pointer @pointer - _times
48
54
  end
55
+ self
49
56
  end
50
57
 
51
58
  def forward(_times=1)
@@ -55,6 +62,7 @@ module Pincers::Chenso
55
62
  else
56
63
  change_pointer @pointer + _times
57
64
  end
65
+ self
58
66
  end
59
67
 
60
68
  private
@@ -67,9 +75,9 @@ module Pincers::Chenso
67
75
  end
68
76
 
69
77
  def navigate(_request)
70
- @document = _request.execute @client
78
+ @state = _request.execute @client
71
79
  @childs.clear
72
- @document
80
+ nil
73
81
  end
74
82
 
75
83
  end
@@ -0,0 +1,4 @@
1
+ module Pincers::Chenso
2
+ class BrowsingState < Struct.new(:uri, :document)
3
+ end
4
+ end
@@ -0,0 +1,18 @@
1
+ require 'pincers/chenso/browsing_state'
2
+
3
+ module Pincers::Chenso
4
+ class HtmlPageCache
5
+ def initialize(_uri, _content)
6
+ @uri = _uri
7
+ @content = _content
8
+ end
9
+
10
+ def fix_uri(_current_state)
11
+ # nothing
12
+ end
13
+
14
+ def execute(_client)
15
+ BrowsingState.new @uri, ::Nokogiri::HTML(@content)
16
+ end
17
+ end
18
+ end
@@ -1,3 +1,6 @@
1
+ require 'pincers/http/utils'
2
+ require 'pincers/chenso/browsing_state'
3
+
1
4
  module Pincers::Chenso
2
5
  class HtmlPageRequest
3
6
 
@@ -7,21 +10,28 @@ module Pincers::Chenso
7
10
  }
8
11
 
9
12
  def initialize(_url, _method=:get, _data=nil, _encoding=nil)
10
- @url = _url
13
+ @url = _url
11
14
  @method = _method
12
15
  @data = _data
13
16
  @encoding = _encoding
14
17
  end
15
18
 
16
- def execute(_client)
17
- @uri = _client.absolute_uri_for @url if @uri.nil?
19
+ def fix_uri(_current_state)
20
+ @uri = Pincers::Http::Utils.parse_uri @url
21
+ if _current_state
22
+ @uri = URI.join(_current_state.uri, @uri)
23
+ elsif @uri.relative?
24
+ raise ArgumentError, 'Absolute uri required'
25
+ end
26
+ end
18
27
 
28
+ def execute(_client)
19
29
  _client.send(@method, @uri) do |request|
20
30
  request.headers.merge DEFAULT_HEADERS
21
31
  request.set_form_data(@data, @encoding) unless @data.nil?
22
32
  end
23
33
 
24
- ::Nokogiri::HTML _client.content
34
+ BrowsingState.new _client.uri, ::Nokogiri::HTML(_client.content)
25
35
  end
26
36
  end
27
37
  end
@@ -130,6 +130,10 @@ module Pincers::Core
130
130
  ensure_implementation :as_http_client
131
131
  end
132
132
 
133
+ def merge_http_client(_client)
134
+ # nothing
135
+ end
136
+
133
137
  def fetch_resource
134
138
  ensure_implementation :fetch_resource
135
139
  end
@@ -109,7 +109,7 @@ module Pincers::Core
109
109
  http_client = backend.as_http_client
110
110
  unless _block.nil?
111
111
  r = _block.call http_client
112
- # sync_with http_client # TODO :copy cookies and maybe url?
112
+ backend.merge_http_client http_client
113
113
  r
114
114
  else
115
115
  http_client
@@ -42,6 +42,10 @@ module Pincers::Http
42
42
  end
43
43
 
44
44
  def set_cookie(_cookie)
45
+ if _cookie.is_a? Hash
46
+ _cookie = Cookie.new(_cookie[:name], _cookie[:value], _cookie[:domain], _cookie[:path])
47
+ end
48
+
45
49
  @session.cookie_jar.set _cookie
46
50
  end
47
51
 
@@ -72,11 +76,15 @@ module Pincers::Http
72
76
  perform_in_session request
73
77
  end
74
78
 
75
- def fork(_keep_session = true)
79
+ def fork(_keep_session = false)
76
80
  fork_session = _keep_session ? @session : @session.clone
77
81
  self.class.new fork_session, @document
78
82
  end
79
83
 
84
+ def join(_other_client)
85
+ @session.sync _other_client.session
86
+ end
87
+
80
88
  def absolute_uri_for(_url)
81
89
  uri = _url.is_a?(URI) ? _url : Utils.parse_uri(_url)
82
90
  if uri.relative?
@@ -1,3 +1,24 @@
1
1
  module Pincers::Http
2
- class Cookie < Struct.new(:name, :value, :domain, :path, :expires, :secure); end
2
+ class Cookie
3
+
4
+ attr_reader :name, :value, :domain, :path, :expires, :secure
5
+
6
+ def initialize(_name, _value, _domain, _path=nil, _expires=nil, _secure=nil)
7
+ @name = _name
8
+ @value = _value
9
+ @domain = _domain.gsub(/^\./,'').downcase # RFC 6265 5.2.3
10
+ @path = valid_path?(_path) ? _path.downcase : '/'
11
+ @expires = _expires
12
+ @secure = _secure
13
+ end
14
+
15
+ private
16
+
17
+ def valid_path?(_path)
18
+ !_path.nil? && !_path.empty? && _path[0] == '/'
19
+ end
20
+
21
+ end
22
+
23
+
3
24
  end
@@ -20,38 +20,38 @@ module Pincers::Http
20
20
  for_origin(Utils.parse_uri(_url)).find { |c| c.name == _name }
21
21
  end
22
22
 
23
- def set(_parts)
24
- return nil unless _parts[:domain]
25
- return nil unless _parts[:name]
26
- return nil unless _parts[:value]
27
-
28
- cookie = Cookie.new(
29
- _parts[:name],
30
- _parts[:value],
31
- _parts[:domain].gsub(/^\./,''),
32
- _parts[:path] || '/',
33
- _parts[:expires],
34
- _parts[:secure]
35
- )
36
-
37
- replace_cookie cookie
38
- cookie
39
- end
40
-
41
- def set_raw(_uri, _raw)
42
- cookie = decode_cookie _raw
23
+ def set(_cookie)
24
+ if _cookie.name.nil? or _cookie.value.nil? or _cookie.domain.nil? or _cookie.path.nil?
25
+ raise ArgumentError, 'Invalid cookie'
26
+ end
43
27
 
44
- cookie.domain = compute_domain cookie.domain, _uri.host
45
- cookie.path = compute_path cookie.path, _uri.path
28
+ @cookies.each_with_index do |cookie, i|
29
+ if equivalent(cookie, _cookie)
30
+ @cookies[i] = _cookie
31
+ return _cookie
32
+ end
33
+ end
46
34
 
47
- return nil if cookie.domain.nil? or cookie.path.nil?
35
+ @cookies << _cookie
36
+ _cookie
37
+ end
48
38
 
49
- replace_cookie(cookie)
39
+ def set_raw(_request_uri, _raw)
40
+ cookie = decode_cookie _request_uri, _raw
41
+ set cookie unless cookie.nil?
50
42
  cookie
51
43
  end
52
44
 
45
+ def set_from_header(_uri, _header)
46
+ _header.split(/, (?=\w+=)/).map do |raw_cookie|
47
+ set_raw _uri, raw_cookie.strip
48
+ end
49
+ end
50
+
53
51
  def for_origin(_uri)
52
+ # RFC 6265 5.4.1
54
53
  @cookies.select do |c|
54
+ # TODO: add scheme and host only checks
55
55
  domains_match c.domain, _uri.host and paths_match c.path, _uri.path
56
56
  end
57
57
  end
@@ -62,38 +62,56 @@ module Pincers::Http
62
62
 
63
63
  private
64
64
 
65
- def decode_cookie(_raw)
65
+ def decode_cookie(_request, _raw)
66
66
  # taken from WEBrick implementation
67
67
  cookie_elem = _raw.split(/;/)
68
68
  first_elem = cookie_elem.shift
69
69
  first_elem.strip!
70
- key, value = first_elem.split(/\=/, 2)
71
70
 
72
- cookie = Cookie.new(key, dequote(value))
71
+ name, value = first_elem.split(/\=/, 2)
72
+ domain = nil
73
+ path = nil
74
+ expires = nil
75
+ secure = nil
76
+ # TODO: host_only = true
77
+
73
78
  cookie_elem.each do |pair|
74
79
  pair.strip!
75
- key, value = pair.split(/\=/, 2)
76
- value = dequote(value.strip) if value
77
-
78
- case key.downcase
79
- when "domain" then cookie.domain = value.downcase
80
- when "path" then cookie.path = value.downcase
81
- when "expires" then cookie.expires = value
82
- # when "max-age" then cookie.max_age = Integer(value)
83
- # when "comment" then cookie.comment = value
84
- # when "version" then cookie.version = Integer(value)
85
- when "secure" then cookie.secure = true
80
+ opt_key, opt_value = pair.split(/\=/, 2)
81
+ opt_value = dequote(opt_value.strip) if opt_value
82
+
83
+ case opt_key.downcase
84
+ when "domain"
85
+ domain = opt_value.downcase
86
+ # TODO: host_only = false
87
+ return nil unless domains_match(domain, _request.host) # RFC 6265 5.3.6
88
+ when "path"
89
+ path = opt_value.downcase if opt_value[0] == '/' # RFC 6265 5.2.4
90
+ when "expires" then expires = opt_value
91
+ # when "max-age" then max_age = Integer(value)
92
+ # when "comment" then comment = value
93
+ # when "version" then version = Integer(value)
94
+ when "secure" then secure = true
86
95
  end
87
96
  end
88
97
 
89
- cookie
98
+ Cookie.new(
99
+ name,
100
+ dequote(value),
101
+ domain || _request.host,
102
+ path || default_path(_request.path),
103
+ expires,
104
+ secure
105
+ )
90
106
  end
91
107
 
92
108
  def domains_match(_cookie_domain, _request_domain)
93
109
  # RFC 6265 - 5.1.3
94
110
  # TODO: ensure request domain is not an IP
95
111
  return true if _cookie_domain == _request_domain
96
- return true if _request_domain.end_with? ".#{_cookie_domain}"
112
+ if _request_domain.end_with? "#{_cookie_domain}"
113
+ return true if _cookie_domain[0] == '.' or _request_domain.end_with? ".#{_cookie_domain}"
114
+ end
97
115
  return false
98
116
  end
99
117
 
@@ -101,26 +119,13 @@ module Pincers::Http
101
119
  # RFC 6265 - 5.1.4
102
120
  _request_path = '/' if _request_path.empty?
103
121
  return true if _cookie_path == _request_path
104
- return true if _cookie_path[-1] == '/' and _request_path.start_with? _cookie_path
105
- return true if _request_path.start_with? "#{_cookie_path}/"
122
+ if _request_path.start_with? _cookie_path
123
+ return true if _cookie_path[-1] == '/' or _request_path.start_with? "#{_cookie_path}/"
124
+ end
106
125
  return false
107
126
  end
108
127
 
109
- def compute_domain(_cookie_domain, _request_domain)
110
- return _request_domain if _cookie_domain.nil?
111
- # cookies with different domain are discarded
112
- return nil unless _cookie_domain.end_with? _request_domain
113
- return _cookie_domain.gsub(/^\./,'') # remove leading dot
114
- end
115
-
116
- def compute_path(_cookie_path, _request_path)
117
- default_path = compute_default_path(_request_path)
118
- return default_path if _cookie_path.nil?
119
- return nil unless _cookie_path.start_with? default_path
120
- return _cookie_path
121
- end
122
-
123
- def compute_default_path(_request_path)
128
+ def default_path(_request_path)
124
129
  # RFC 6265 - 5.1.4
125
130
  return '/' unless _request_path[0] === '/'
126
131
  ls_idx = _request_path.rindex('/')
@@ -128,17 +133,6 @@ module Pincers::Http
128
133
  _request_path[0..ls_idx]
129
134
  end
130
135
 
131
- def replace_cookie(_cookie)
132
- @cookies.each_with_index do |cookie, i|
133
- if equivalent(cookie, _cookie)
134
- @cookies[i] = _cookie
135
- return
136
- end
137
- end
138
-
139
- @cookies << _cookie
140
- end
141
-
142
136
  def dequote(_str)
143
137
  # taken from WEBrick implementation
144
138
  ret = (/\A"(.*)"\Z/ =~ _str) ? $1 : _str.dup
@@ -38,6 +38,11 @@ module Pincers::Http
38
38
  self.class.new self
39
39
  end
40
40
 
41
+ def sync(_other)
42
+ @headers.merge! _other.headers
43
+ _other.cookie_jar.cookies.each { |c| cookie_jar.set c }
44
+ end
45
+
41
46
  def perform(_request)
42
47
  perform_recursive _request, @redirect_limit, nil
43
48
  end
@@ -57,16 +62,16 @@ module Pincers::Http
57
62
  copy_headers http_request, _request.headers
58
63
  set_cookies http_request, uri
59
64
 
60
- http_response = connect(uri).request http_request
65
+ http_response = connect(uri).request(http_request)
66
+ update_cookies(uri, http_response)
61
67
 
62
68
  case http_response
63
69
  when Net::HTTPSuccess then
64
- update_cookies(uri, http_response)
65
70
  http_response.uri = uri # uri is not always set by net/http
66
71
  http_response
67
72
  when Net::HTTPRedirection then
68
73
  location = Utils.parse_uri(http_response['location'])
69
- perform(_request, _limit - 1, location)
74
+ perform_recursive(_request, _limit - 1, location)
70
75
  else
71
76
  handle_error_response http_response
72
77
  end
@@ -92,8 +97,8 @@ module Pincers::Http
92
97
  end
93
98
 
94
99
  def update_cookies(_uri, _response)
95
- cookies = _response.get_fields('set-cookie')
96
- cookies.each { |raw| @cookie_jar.set_raw _uri, raw } if cookies
100
+ fields = _response.get_fields('set-cookie')
101
+ fields.each { |field| cookie_jar.set_from_header _uri, field } if fields
97
102
  end
98
103
  end
99
104
  end
@@ -1,3 +1,3 @@
1
1
  module Pincers
2
- VERSION = "0.7.0"
2
+ VERSION = "0.7.1"
3
3
  end
@@ -1,7 +1,8 @@
1
1
  require 'selenium-webdriver'
2
2
  require 'pincers/core/base_backend'
3
- require 'pincers/http/client'
4
3
  require 'pincers/webdriver/http_document'
4
+ require 'pincers/http/client'
5
+ require 'pincers/http/cookie'
5
6
 
6
7
  module Pincers::Webdriver
7
8
  class Backend < Pincers::Core::BaseBackend
@@ -161,7 +162,7 @@ module Pincers::Webdriver
161
162
  session = Pincers::Http::Session.new
162
163
  session.headers['User-Agent'] = user_agent
163
164
  session.proxy = proxy_address
164
- driver.manage.all_cookies.each { |c| session.cookie_jar.set c }
165
+ load_cookies_in_session session
165
166
 
166
167
  Pincers::Http::Client.new session, HttpDocument.new(self)
167
168
  end
@@ -217,5 +218,20 @@ module Pincers::Webdriver
217
218
  proxy = driver.capabilities.proxy
218
219
  proxy.nil? ? nil : (proxy.http || proxy.ssl)
219
220
  end
221
+
222
+ def load_cookies_in_session(_session)
223
+ driver.manage.all_cookies.each do |wd_cookie|
224
+ if wd_cookie[:domain] and wd_cookie[:name] and wd_cookie[:value]
225
+ _session.cookie_jar.set Pincers::Http::Cookie.new(
226
+ wd_cookie[:name],
227
+ wd_cookie[:value],
228
+ wd_cookie[:domain],
229
+ wd_cookie[:path],
230
+ wd_cookie[:expires],
231
+ wd_cookie[:secure]
232
+ )
233
+ end
234
+ end
235
+ end
220
236
  end
221
237
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pincers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-21 00:00:00.000000000 Z
11
+ date: 2015-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -231,8 +231,10 @@ files:
231
231
  - lib/pincers/chenso/backend.rb
232
232
  - lib/pincers/chenso/browsing_context.rb
233
233
  - lib/pincers/chenso/browsing_manager.rb
234
+ - lib/pincers/chenso/browsing_state.rb
234
235
  - lib/pincers/chenso/factory.rb
235
236
  - lib/pincers/chenso/html_form_request.rb
237
+ - lib/pincers/chenso/html_page_cache.rb
236
238
  - lib/pincers/chenso/html_page_request.rb
237
239
  - lib/pincers/core/base_backend.rb
238
240
  - lib/pincers/core/base_factory.rb