sunbro 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d1de54ea6b7adab57393a8a220e6ce2715a1875d
4
- data.tar.gz: 6d1b4fa82d615a15320b9a2281dba34c44f6b4f3
3
+ metadata.gz: 0d7f760133a4fab073cd7a7dd5c064ed1744f025
4
+ data.tar.gz: 68b98b70f0e591ca9056b1db73238ea1a578418c
5
5
  SHA512:
6
- metadata.gz: d658cb22b7b57abd492ffac8f2b0b4173c6f806ed93a4d28fa699cbce6178bd97fa87af64778e88eb841bd1f59c1efe0c0e3912377046709104ddc85a48a6ba1
7
- data.tar.gz: e96e191c25f0145f1e540237fc2c59dc8dc390871c23df01466186d15d539bb1d76ef8becc113bce6726fa627799886718f1da0bb36480925fae92ae36ea312e
6
+ metadata.gz: fd0844f9a0157b3dfe7f38f460fe44d023985fbb6cd62d99738f6007fef896499f1fc5439881b85bc37e49a45cdb704af176350f766eab7e3d5d0309f7afdf7f
7
+ data.tar.gz: cef9770fe754b0829452adc5ebb20cc0c5119e819b01a339cb807f3f034e982c78873b9e8414fe480510f99643d7d57945e0a0b5b4198ce5b9c415dc0efed499
@@ -11,43 +11,10 @@ require 'retryable'
11
11
  sunbro/dynamic_http
12
12
  sunbro/http
13
13
  sunbro/page
14
- sunbro/initialize
14
+ sunbro/connection
15
15
  ).each do |f|
16
16
  require f
17
17
  end
18
18
 
19
19
  module Sunbro
20
- MAX_RETRIES = 5
21
-
22
- def get_page(link, opts={})
23
- fetch_with_connection(http, link, opts)
24
- end
25
-
26
- def render_page(link, opts={})
27
- fetch_with_connection(dhttp, link, opts)
28
- end
29
-
30
- def fetch_with_connection(conn, link, opts)
31
- page, tries = nil, MAX_RETRIES
32
- begin
33
- page = conn.fetch_page(link, opts)
34
- sleep 1
35
- end until page.try(:present?) || (tries -= 1).zero?
36
- page.discard_doc! unless page.is_valid?
37
- page
38
- end
39
-
40
- def http
41
- @http ||= HTTP.new
42
- end
43
-
44
- def dhttp
45
- @dhttp ||= DynamicHTTP.new
46
- end
47
-
48
- def close_http_connections
49
- @http.close if @http
50
- @dhttp.close if @dhttp
51
- rescue IOError
52
- end
53
20
  end
@@ -1,9 +1,48 @@
1
- class Connection
2
- extend Sunbro
3
- attr_reader :http, :dhttp
1
+ module Sunbro
2
+ class Connection
3
+ attr_reader :http, :dhttp
4
4
 
5
- def close
6
- close_http_connections
5
+ MAX_RETRIES = 3
6
+
7
+ def fetch_page(link, opts={})
8
+ conn = opts[:force_format] == (:dhtml || 'dhtml') ? dhttp : http
9
+ tries = opts[:tries] || MAX_RETRIES
10
+ sleep_interval = opts[:sleep] || 1
11
+
12
+ page = Retryable.retryable(sleep: sleep_interval, tries: tries) do
13
+ web_retry(opts) do
14
+ conn.fetch_page(link, opts)
15
+ end
16
+ end
17
+ page.discard_doc! unless page.valid?
18
+ page
19
+ end
20
+
21
+ def session
22
+ @dhttp.try(:session)
23
+ end
24
+
25
+ def http
26
+ @http ||= HTTP.new
27
+ end
28
+
29
+ def dhttp
30
+ @dhttp ||= DynamicHTTP.new
31
+ end
32
+
33
+ def close
34
+ @http.try(:close)
35
+ @dhttp.try(:close)
36
+ rescue IOError
37
+ end
38
+
39
+ def web_retry(opts)
40
+ page, tries, sleep_interval = nil, opts[:tries], opts[:sleep]
41
+ begin
42
+ page = yield
43
+ sleep(sleep_interval) unless page.valid?
44
+ end until page.valid? || (tries -= 1).zero?
45
+ page
46
+ end
7
47
  end
8
48
  end
9
-
@@ -74,8 +74,10 @@ module Sunbro
74
74
 
75
75
  def get_page(url, opts)
76
76
  reset = opts.fetch(:reset) rescue true
77
+ start = Time.current.to_i
77
78
  session.visit(url.to_s)
78
79
  page = create_page_from_session(url, session, opts)
80
+ page.response_time = ((Time.now - start) * 1000).round
79
81
  session.reset! if reset
80
82
  page
81
83
  rescue Capybara::Poltergeist::TimeoutError => e
@@ -13,6 +13,7 @@ module Sunbro
13
13
  end
14
14
 
15
15
  def initialize(opts = {})
16
+ RestClient.proxy ||= Sunbro::Settings.proxy_url
16
17
  @connections = {}
17
18
  @opts = opts
18
19
  end
@@ -64,7 +64,7 @@ module Sunbro
64
64
  end
65
65
  end
66
66
 
67
- def is_valid?
67
+ def valid?
68
68
  (url != "about:blank") && !not_found? && present?
69
69
  end
70
70
 
@@ -151,7 +151,7 @@ module Sunbro
151
151
  href = doc.search('//head/base/@href')
152
152
  URI(href.to_s) unless href.nil? rescue nil
153
153
  end unless @base
154
-
154
+
155
155
  return nil if @base && @base.to_s().empty?
156
156
  @base
157
157
  end
@@ -1,3 +1,3 @@
1
1
  module Sunbro
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
metadata CHANGED
@@ -1,153 +1,153 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sunbro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jon Stokes
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-25 00:00:00.000000000 Z
11
+ date: 2015-08-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
+ name: nokogiri
14
15
  requirement: !ruby/object:Gem::Requirement
15
16
  requirements:
16
- - - '>='
17
+ - - ">="
17
18
  - !ruby/object:Gem::Version
18
19
  version: '0'
19
- name: nokogiri
20
- prerelease: false
21
20
  type: :runtime
21
+ prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
+ name: capybara
28
29
  requirement: !ruby/object:Gem::Requirement
29
30
  requirements:
30
- - - '>='
31
+ - - ">="
31
32
  - !ruby/object:Gem::Version
32
33
  version: '0'
33
- name: capybara
34
- prerelease: false
35
34
  type: :runtime
35
+ prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
+ name: poltergeist
42
43
  requirement: !ruby/object:Gem::Requirement
43
44
  requirements:
44
- - - '>='
45
+ - - ">="
45
46
  - !ruby/object:Gem::Version
46
47
  version: '0'
47
- name: poltergeist
48
- prerelease: false
49
48
  type: :runtime
49
+ prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
+ name: rest-client
56
57
  requirement: !ruby/object:Gem::Requirement
57
58
  requirements:
58
- - - '>='
59
+ - - ">="
59
60
  - !ruby/object:Gem::Version
60
61
  version: '0'
61
- name: rest-client
62
- prerelease: false
63
62
  type: :runtime
63
+ prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
+ name: activesupport
70
71
  requirement: !ruby/object:Gem::Requirement
71
72
  requirements:
72
- - - '>='
73
+ - - ">="
73
74
  - !ruby/object:Gem::Version
74
75
  version: '0'
75
- name: activesupport
76
- prerelease: false
77
76
  type: :runtime
77
+ prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - '>='
80
+ - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
+ name: retryable
84
85
  requirement: !ruby/object:Gem::Requirement
85
86
  requirements:
86
- - - '>='
87
+ - - ">="
87
88
  - !ruby/object:Gem::Version
88
89
  version: '0'
89
- name: retryable
90
- prerelease: false
91
90
  type: :runtime
91
+ prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - '>='
94
+ - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
+ name: bundler
98
99
  requirement: !ruby/object:Gem::Requirement
99
100
  requirements:
100
- - - ~>
101
+ - - "~>"
101
102
  - !ruby/object:Gem::Version
102
103
  version: '1.5'
103
- name: bundler
104
- prerelease: false
105
104
  type: :development
105
+ prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - ~>
108
+ - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: '1.5'
111
111
  - !ruby/object:Gem::Dependency
112
+ name: rake
112
113
  requirement: !ruby/object:Gem::Requirement
113
114
  requirements:
114
- - - '>='
115
+ - - ">="
115
116
  - !ruby/object:Gem::Version
116
117
  version: '0'
117
- name: rake
118
- prerelease: false
119
118
  type: :development
119
+ prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - '>='
122
+ - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
125
  - !ruby/object:Gem::Dependency
126
+ name: rspec
126
127
  requirement: !ruby/object:Gem::Requirement
127
128
  requirements:
128
- - - '>='
129
+ - - ">="
129
130
  - !ruby/object:Gem::Version
130
131
  version: '0'
131
- name: rspec
132
- prerelease: false
133
132
  type: :development
133
+ prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
- - - '>='
136
+ - - ">="
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
139
  - !ruby/object:Gem::Dependency
140
+ name: mocktra
140
141
  requirement: !ruby/object:Gem::Requirement
141
142
  requirements:
142
- - - '>='
143
+ - - ">="
143
144
  - !ruby/object:Gem::Version
144
145
  version: '0'
145
- name: mocktra
146
- prerelease: false
147
146
  type: :development
147
+ prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
- - - '>='
150
+ - - ">="
151
151
  - !ruby/object:Gem::Version
152
152
  version: '0'
153
153
  description: Requires phantomjs.
@@ -157,7 +157,7 @@ executables: []
157
157
  extensions: []
158
158
  extra_rdoc_files: []
159
159
  files:
160
- - .gitignore
160
+ - ".gitignore"
161
161
  - Gemfile
162
162
  - LICENSE.txt
163
163
  - README.md
@@ -166,7 +166,6 @@ files:
166
166
  - lib/sunbro/connection.rb
167
167
  - lib/sunbro/dynamic_http.rb
168
168
  - lib/sunbro/http.rb
169
- - lib/sunbro/initialize.rb
170
169
  - lib/sunbro/page.rb
171
170
  - lib/sunbro/settings.rb
172
171
  - lib/sunbro/version.rb
@@ -179,26 +178,27 @@ homepage: ''
179
178
  licenses:
180
179
  - MIT
181
180
  metadata: {}
182
- post_install_message:
181
+ post_install_message:
183
182
  rdoc_options: []
184
183
  require_paths:
185
184
  - lib
186
185
  required_ruby_version: !ruby/object:Gem::Requirement
187
186
  requirements:
188
- - - '>='
187
+ - - ">="
189
188
  - !ruby/object:Gem::Version
190
189
  version: '0'
191
190
  required_rubygems_version: !ruby/object:Gem::Requirement
192
191
  requirements:
193
- - - '>='
192
+ - - ">="
194
193
  - !ruby/object:Gem::Version
195
194
  version: '0'
196
195
  requirements: []
197
- rubyforge_project:
198
- rubygems_version: 2.1.9
199
- signing_key:
196
+ rubyforge_project:
197
+ rubygems_version: 2.4.6
198
+ signing_key:
200
199
  specification_version: 4
201
- summary: Some code that I use to crawl the web at scale. Shared in the spirit of jolly cooperation.
200
+ summary: Some code that I use to crawl the web at scale. Shared in the spirit of jolly
201
+ cooperation.
202
202
  test_files:
203
203
  - spec/page_spec.rb
204
204
  - spec/settings_spec.rb
@@ -1,4 +0,0 @@
1
- if Sunbro::Settings.proxy_url
2
- puts "## Setting RestClient proxy to #{Sunbro::Settings.proxy_url}"
3
- RestClient.proxy = Sunbro::Settings.proxy_url
4
- end