pincers 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a19d33e62f9611053e680e501b3bab925c4279fa
4
- data.tar.gz: 2266ae44ad8606cc8dd7097c91d2d9ae64a8d80f
3
+ metadata.gz: 687162c2ca445449ffdacc2890cd9e94182317da
4
+ data.tar.gz: bf7434146ce2516a3d5b5cae8ac9f9cebd03a4ae
5
5
  SHA512:
6
- metadata.gz: 7764b044ebdfd024d894273fb511573644be4074415acc24fb46b5fc9f577d7dd5d95cc8cdbb90fbd6ac90de203c4852c4b590f837c64a58cd882c39a8a2f5d6
7
- data.tar.gz: 3b35f1c8568797d90d2cb90a87b81997158b76428a3b62a550fa723b2770af8ee05d370d7de1ac4732f4b9feaca2eb4fd73502555d404a2c560f751ed39a82ad
6
+ metadata.gz: c8d1a5595a9b19c1ed239ed081e101e4df882522752d79c096802978d04fb5b6395ef63d628078d8ef81828f9216e794ba338ae3a73716da21987ba174b561a3
7
+ data.tar.gz: 2fa86573efa8cb6ccb0e1d26eb2c4e892d3a8e6fae2009a70f3308ec3d3decc29a5c081312067d542de22ca2898f33cbbb6e527c048088f90bca1771a8762e0b
@@ -2,6 +2,7 @@ require 'pincers/nokogiri/backend'
2
2
  require 'pincers/chenso/browsing_manager'
3
3
  require 'pincers/chenso/html_page_request'
4
4
  require 'pincers/chenso/html_form_request'
5
+ require 'pincers/chenso/html_page_cache'
5
6
  require 'pincers/core/helpers/form'
6
7
 
7
8
  module Pincers::Chenso
@@ -68,7 +69,14 @@ module Pincers::Chenso
68
69
  end
69
70
 
70
71
  def as_http_client
71
- @client.fork(false)
72
+ @client.fork(true)
73
+ end
74
+
75
+ def merge_http_client(_client)
76
+ @client.join _client
77
+ if _client.content and /text\/html/ === _client.content_type
78
+ @browser.push HtmlPageCache.new(_client.uri, _client.content)
79
+ end
72
80
  end
73
81
 
74
82
  def switch_to_frame(_element)
@@ -2,14 +2,12 @@ module Pincers::Chenso
2
2
 
3
3
  class BrowsingContext
4
4
 
5
- attr_reader :document
6
-
7
- def initialize(_http_client)
5
+ def initialize(_http_client, _state=nil)
8
6
  @client = _http_client
9
7
  @history = []
10
8
  @pointer = -1
11
9
  @childs = {}
12
- @document = nil
10
+ @state = _state
13
11
  end
14
12
 
15
13
  def get_child(_id)
@@ -17,24 +15,32 @@ module Pincers::Chenso
17
15
  end
18
16
 
19
17
  def load_child(_id)
20
- @childs[_id] = self.class.new @client
18
+ @childs[_id] = self.class.new(@client, @state)
21
19
  end
22
20
 
23
21
  def current_url
24
- @client.uri.to_s
22
+ @state ? @state.uri.to_s : nil
23
+ end
24
+
25
+ def document
26
+ @state ? @state.document : nil
25
27
  end
26
28
 
27
29
  def refresh
28
30
  if @pointer >= 0
29
31
  navigate @history[@pointer]
30
- else nil end
32
+ end
33
+ self
31
34
  end
32
35
 
33
36
  def push(_request)
37
+ _request.fix_uri @state
38
+
34
39
  @history.slice!(@pointer+1..-1)
35
40
  @history.push _request
36
41
  @pointer += 1
37
42
  navigate _request
43
+ self
38
44
  end
39
45
 
40
46
  def back(_times=1)
@@ -46,6 +52,7 @@ module Pincers::Chenso
46
52
  else
47
53
  change_pointer @pointer - _times
48
54
  end
55
+ self
49
56
  end
50
57
 
51
58
  def forward(_times=1)
@@ -55,6 +62,7 @@ module Pincers::Chenso
55
62
  else
56
63
  change_pointer @pointer + _times
57
64
  end
65
+ self
58
66
  end
59
67
 
60
68
  private
@@ -67,9 +75,9 @@ module Pincers::Chenso
67
75
  end
68
76
 
69
77
  def navigate(_request)
70
- @document = _request.execute @client
78
+ @state = _request.execute @client
71
79
  @childs.clear
72
- @document
80
+ nil
73
81
  end
74
82
 
75
83
  end
@@ -0,0 +1,4 @@
1
+ module Pincers::Chenso
2
+ class BrowsingState < Struct.new(:uri, :document)
3
+ end
4
+ end
@@ -0,0 +1,18 @@
1
+ require 'pincers/chenso/browsing_state'
2
+
3
+ module Pincers::Chenso
4
+ class HtmlPageCache
5
+ def initialize(_uri, _content)
6
+ @uri = _uri
7
+ @content = _content
8
+ end
9
+
10
+ def fix_uri(_current_state)
11
+ # nothing
12
+ end
13
+
14
+ def execute(_client)
15
+ BrowsingState.new @uri, ::Nokogiri::HTML(@content)
16
+ end
17
+ end
18
+ end
@@ -1,3 +1,6 @@
1
+ require 'pincers/http/utils'
2
+ require 'pincers/chenso/browsing_state'
3
+
1
4
  module Pincers::Chenso
2
5
  class HtmlPageRequest
3
6
 
@@ -7,21 +10,28 @@ module Pincers::Chenso
7
10
  }
8
11
 
9
12
  def initialize(_url, _method=:get, _data=nil, _encoding=nil)
10
- @url = _url
13
+ @url = _url
11
14
  @method = _method
12
15
  @data = _data
13
16
  @encoding = _encoding
14
17
  end
15
18
 
16
- def execute(_client)
17
- @uri = _client.absolute_uri_for @url if @uri.nil?
19
+ def fix_uri(_current_state)
20
+ @uri = Pincers::Http::Utils.parse_uri @url
21
+ if _current_state
22
+ @uri = URI.join(_current_state.uri, @uri)
23
+ elsif @uri.relative?
24
+ raise ArgumentError, 'Absolute uri required'
25
+ end
26
+ end
18
27
 
28
+ def execute(_client)
19
29
  _client.send(@method, @uri) do |request|
20
30
  request.headers.merge DEFAULT_HEADERS
21
31
  request.set_form_data(@data, @encoding) unless @data.nil?
22
32
  end
23
33
 
24
- ::Nokogiri::HTML _client.content
34
+ BrowsingState.new _client.uri, ::Nokogiri::HTML(_client.content)
25
35
  end
26
36
  end
27
37
  end
@@ -130,6 +130,10 @@ module Pincers::Core
130
130
  ensure_implementation :as_http_client
131
131
  end
132
132
 
133
+ def merge_http_client(_client)
134
+ # nothing
135
+ end
136
+
133
137
  def fetch_resource
134
138
  ensure_implementation :fetch_resource
135
139
  end
@@ -109,7 +109,7 @@ module Pincers::Core
109
109
  http_client = backend.as_http_client
110
110
  unless _block.nil?
111
111
  r = _block.call http_client
112
- # sync_with http_client # TODO :copy cookies and maybe url?
112
+ backend.merge_http_client http_client
113
113
  r
114
114
  else
115
115
  http_client
@@ -42,6 +42,10 @@ module Pincers::Http
42
42
  end
43
43
 
44
44
  def set_cookie(_cookie)
45
+ if _cookie.is_a? Hash
46
+ _cookie = Cookie.new(_cookie[:name], _cookie[:value], _cookie[:domain], _cookie[:path])
47
+ end
48
+
45
49
  @session.cookie_jar.set _cookie
46
50
  end
47
51
 
@@ -72,11 +76,15 @@ module Pincers::Http
72
76
  perform_in_session request
73
77
  end
74
78
 
75
- def fork(_keep_session = true)
79
+ def fork(_keep_session = false)
76
80
  fork_session = _keep_session ? @session : @session.clone
77
81
  self.class.new fork_session, @document
78
82
  end
79
83
 
84
+ def join(_other_client)
85
+ @session.sync _other_client.session
86
+ end
87
+
80
88
  def absolute_uri_for(_url)
81
89
  uri = _url.is_a?(URI) ? _url : Utils.parse_uri(_url)
82
90
  if uri.relative?
@@ -1,3 +1,24 @@
1
1
  module Pincers::Http
2
- class Cookie < Struct.new(:name, :value, :domain, :path, :expires, :secure); end
2
+ class Cookie
3
+
4
+ attr_reader :name, :value, :domain, :path, :expires, :secure
5
+
6
+ def initialize(_name, _value, _domain, _path=nil, _expires=nil, _secure=nil)
7
+ @name = _name
8
+ @value = _value
9
+ @domain = _domain.gsub(/^\./,'').downcase # RFC 6265 5.2.3
10
+ @path = valid_path?(_path) ? _path.downcase : '/'
11
+ @expires = _expires
12
+ @secure = _secure
13
+ end
14
+
15
+ private
16
+
17
+ def valid_path?(_path)
18
+ !_path.nil? && !_path.empty? && _path[0] == '/'
19
+ end
20
+
21
+ end
22
+
23
+
3
24
  end
@@ -20,38 +20,38 @@ module Pincers::Http
20
20
  for_origin(Utils.parse_uri(_url)).find { |c| c.name == _name }
21
21
  end
22
22
 
23
- def set(_parts)
24
- return nil unless _parts[:domain]
25
- return nil unless _parts[:name]
26
- return nil unless _parts[:value]
27
-
28
- cookie = Cookie.new(
29
- _parts[:name],
30
- _parts[:value],
31
- _parts[:domain].gsub(/^\./,''),
32
- _parts[:path] || '/',
33
- _parts[:expires],
34
- _parts[:secure]
35
- )
36
-
37
- replace_cookie cookie
38
- cookie
39
- end
40
-
41
- def set_raw(_uri, _raw)
42
- cookie = decode_cookie _raw
23
+ def set(_cookie)
24
+ if _cookie.name.nil? or _cookie.value.nil? or _cookie.domain.nil? or _cookie.path.nil?
25
+ raise ArgumentError, 'Invalid cookie'
26
+ end
43
27
 
44
- cookie.domain = compute_domain cookie.domain, _uri.host
45
- cookie.path = compute_path cookie.path, _uri.path
28
+ @cookies.each_with_index do |cookie, i|
29
+ if equivalent(cookie, _cookie)
30
+ @cookies[i] = _cookie
31
+ return _cookie
32
+ end
33
+ end
46
34
 
47
- return nil if cookie.domain.nil? or cookie.path.nil?
35
+ @cookies << _cookie
36
+ _cookie
37
+ end
48
38
 
49
- replace_cookie(cookie)
39
+ def set_raw(_request_uri, _raw)
40
+ cookie = decode_cookie _request_uri, _raw
41
+ set cookie unless cookie.nil?
50
42
  cookie
51
43
  end
52
44
 
45
+ def set_from_header(_uri, _header)
46
+ _header.split(/, (?=\w+=)/).map do |raw_cookie|
47
+ set_raw _uri, raw_cookie.strip
48
+ end
49
+ end
50
+
53
51
  def for_origin(_uri)
52
+ # RFC 6265 5.4.1
54
53
  @cookies.select do |c|
54
+ # TODO: add scheme and host only checks
55
55
  domains_match c.domain, _uri.host and paths_match c.path, _uri.path
56
56
  end
57
57
  end
@@ -62,38 +62,56 @@ module Pincers::Http
62
62
 
63
63
  private
64
64
 
65
- def decode_cookie(_raw)
65
+ def decode_cookie(_request, _raw)
66
66
  # taken from WEBrick implementation
67
67
  cookie_elem = _raw.split(/;/)
68
68
  first_elem = cookie_elem.shift
69
69
  first_elem.strip!
70
- key, value = first_elem.split(/\=/, 2)
71
70
 
72
- cookie = Cookie.new(key, dequote(value))
71
+ name, value = first_elem.split(/\=/, 2)
72
+ domain = nil
73
+ path = nil
74
+ expires = nil
75
+ secure = nil
76
+ # TODO: host_only = true
77
+
73
78
  cookie_elem.each do |pair|
74
79
  pair.strip!
75
- key, value = pair.split(/\=/, 2)
76
- value = dequote(value.strip) if value
77
-
78
- case key.downcase
79
- when "domain" then cookie.domain = value.downcase
80
- when "path" then cookie.path = value.downcase
81
- when "expires" then cookie.expires = value
82
- # when "max-age" then cookie.max_age = Integer(value)
83
- # when "comment" then cookie.comment = value
84
- # when "version" then cookie.version = Integer(value)
85
- when "secure" then cookie.secure = true
80
+ opt_key, opt_value = pair.split(/\=/, 2)
81
+ opt_value = dequote(opt_value.strip) if opt_value
82
+
83
+ case opt_key.downcase
84
+ when "domain"
85
+ domain = opt_value.downcase
86
+ # TODO: host_only = false
87
+ return nil unless domains_match(domain, _request.host) # RFC 6265 5.3.6
88
+ when "path"
89
+ path = opt_value.downcase if opt_value[0] == '/' # RFC 6265 5.2.4
90
+ when "expires" then expires = opt_value
91
+ # when "max-age" then max_age = Integer(value)
92
+ # when "comment" then comment = value
93
+ # when "version" then version = Integer(value)
94
+ when "secure" then secure = true
86
95
  end
87
96
  end
88
97
 
89
- cookie
98
+ Cookie.new(
99
+ name,
100
+ dequote(value),
101
+ domain || _request.host,
102
+ path || default_path(_request.path),
103
+ expires,
104
+ secure
105
+ )
90
106
  end
91
107
 
92
108
  def domains_match(_cookie_domain, _request_domain)
93
109
  # RFC 6265 - 5.1.3
94
110
  # TODO: ensure request domain is not an IP
95
111
  return true if _cookie_domain == _request_domain
96
- return true if _request_domain.end_with? ".#{_cookie_domain}"
112
+ if _request_domain.end_with? "#{_cookie_domain}"
113
+ return true if _cookie_domain[0] == '.' or _request_domain.end_with? ".#{_cookie_domain}"
114
+ end
97
115
  return false
98
116
  end
99
117
 
@@ -101,26 +119,13 @@ module Pincers::Http
101
119
  # RFC 6265 - 5.1.4
102
120
  _request_path = '/' if _request_path.empty?
103
121
  return true if _cookie_path == _request_path
104
- return true if _cookie_path[-1] == '/' and _request_path.start_with? _cookie_path
105
- return true if _request_path.start_with? "#{_cookie_path}/"
122
+ if _request_path.start_with? _cookie_path
123
+ return true if _cookie_path[-1] == '/' or _request_path.start_with? "#{_cookie_path}/"
124
+ end
106
125
  return false
107
126
  end
108
127
 
109
- def compute_domain(_cookie_domain, _request_domain)
110
- return _request_domain if _cookie_domain.nil?
111
- # cookies with different domain are discarded
112
- return nil unless _cookie_domain.end_with? _request_domain
113
- return _cookie_domain.gsub(/^\./,'') # remove leading dot
114
- end
115
-
116
- def compute_path(_cookie_path, _request_path)
117
- default_path = compute_default_path(_request_path)
118
- return default_path if _cookie_path.nil?
119
- return nil unless _cookie_path.start_with? default_path
120
- return _cookie_path
121
- end
122
-
123
- def compute_default_path(_request_path)
128
+ def default_path(_request_path)
124
129
  # RFC 6265 - 5.1.4
125
130
  return '/' unless _request_path[0] === '/'
126
131
  ls_idx = _request_path.rindex('/')
@@ -128,17 +133,6 @@ module Pincers::Http
128
133
  _request_path[0..ls_idx]
129
134
  end
130
135
 
131
- def replace_cookie(_cookie)
132
- @cookies.each_with_index do |cookie, i|
133
- if equivalent(cookie, _cookie)
134
- @cookies[i] = _cookie
135
- return
136
- end
137
- end
138
-
139
- @cookies << _cookie
140
- end
141
-
142
136
  def dequote(_str)
143
137
  # taken from WEBrick implementation
144
138
  ret = (/\A"(.*)"\Z/ =~ _str) ? $1 : _str.dup
@@ -38,6 +38,11 @@ module Pincers::Http
38
38
  self.class.new self
39
39
  end
40
40
 
41
+ def sync(_other)
42
+ @headers.merge! _other.headers
43
+ _other.cookie_jar.cookies.each { |c| cookie_jar.set c }
44
+ end
45
+
41
46
  def perform(_request)
42
47
  perform_recursive _request, @redirect_limit, nil
43
48
  end
@@ -57,16 +62,16 @@ module Pincers::Http
57
62
  copy_headers http_request, _request.headers
58
63
  set_cookies http_request, uri
59
64
 
60
- http_response = connect(uri).request http_request
65
+ http_response = connect(uri).request(http_request)
66
+ update_cookies(uri, http_response)
61
67
 
62
68
  case http_response
63
69
  when Net::HTTPSuccess then
64
- update_cookies(uri, http_response)
65
70
  http_response.uri = uri # uri is not always set by net/http
66
71
  http_response
67
72
  when Net::HTTPRedirection then
68
73
  location = Utils.parse_uri(http_response['location'])
69
- perform(_request, _limit - 1, location)
74
+ perform_recursive(_request, _limit - 1, location)
70
75
  else
71
76
  handle_error_response http_response
72
77
  end
@@ -92,8 +97,8 @@ module Pincers::Http
92
97
  end
93
98
 
94
99
  def update_cookies(_uri, _response)
95
- cookies = _response.get_fields('set-cookie')
96
- cookies.each { |raw| @cookie_jar.set_raw _uri, raw } if cookies
100
+ fields = _response.get_fields('set-cookie')
101
+ fields.each { |field| cookie_jar.set_from_header _uri, field } if fields
97
102
  end
98
103
  end
99
104
  end
@@ -1,3 +1,3 @@
1
1
  module Pincers
2
- VERSION = "0.7.0"
2
+ VERSION = "0.7.1"
3
3
  end
@@ -1,7 +1,8 @@
1
1
  require 'selenium-webdriver'
2
2
  require 'pincers/core/base_backend'
3
- require 'pincers/http/client'
4
3
  require 'pincers/webdriver/http_document'
4
+ require 'pincers/http/client'
5
+ require 'pincers/http/cookie'
5
6
 
6
7
  module Pincers::Webdriver
7
8
  class Backend < Pincers::Core::BaseBackend
@@ -161,7 +162,7 @@ module Pincers::Webdriver
161
162
  session = Pincers::Http::Session.new
162
163
  session.headers['User-Agent'] = user_agent
163
164
  session.proxy = proxy_address
164
- driver.manage.all_cookies.each { |c| session.cookie_jar.set c }
165
+ load_cookies_in_session session
165
166
 
166
167
  Pincers::Http::Client.new session, HttpDocument.new(self)
167
168
  end
@@ -217,5 +218,20 @@ module Pincers::Webdriver
217
218
  proxy = driver.capabilities.proxy
218
219
  proxy.nil? ? nil : (proxy.http || proxy.ssl)
219
220
  end
221
+
222
+ def load_cookies_in_session(_session)
223
+ driver.manage.all_cookies.each do |wd_cookie|
224
+ if wd_cookie[:domain] and wd_cookie[:name] and wd_cookie[:value]
225
+ _session.cookie_jar.set Pincers::Http::Cookie.new(
226
+ wd_cookie[:name],
227
+ wd_cookie[:value],
228
+ wd_cookie[:domain],
229
+ wd_cookie[:path],
230
+ wd_cookie[:expires],
231
+ wd_cookie[:secure]
232
+ )
233
+ end
234
+ end
235
+ end
220
236
  end
221
237
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pincers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-21 00:00:00.000000000 Z
11
+ date: 2015-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -231,8 +231,10 @@ files:
231
231
  - lib/pincers/chenso/backend.rb
232
232
  - lib/pincers/chenso/browsing_context.rb
233
233
  - lib/pincers/chenso/browsing_manager.rb
234
+ - lib/pincers/chenso/browsing_state.rb
234
235
  - lib/pincers/chenso/factory.rb
235
236
  - lib/pincers/chenso/html_form_request.rb
237
+ - lib/pincers/chenso/html_page_cache.rb
236
238
  - lib/pincers/chenso/html_page_request.rb
237
239
  - lib/pincers/core/base_backend.rb
238
240
  - lib/pincers/core/base_factory.rb