sunbro 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d1de54ea6b7adab57393a8a220e6ce2715a1875d
4
- data.tar.gz: 6d1b4fa82d615a15320b9a2281dba34c44f6b4f3
3
+ metadata.gz: 0d7f760133a4fab073cd7a7dd5c064ed1744f025
4
+ data.tar.gz: 68b98b70f0e591ca9056b1db73238ea1a578418c
5
5
  SHA512:
6
- metadata.gz: d658cb22b7b57abd492ffac8f2b0b4173c6f806ed93a4d28fa699cbce6178bd97fa87af64778e88eb841bd1f59c1efe0c0e3912377046709104ddc85a48a6ba1
7
- data.tar.gz: e96e191c25f0145f1e540237fc2c59dc8dc390871c23df01466186d15d539bb1d76ef8becc113bce6726fa627799886718f1da0bb36480925fae92ae36ea312e
6
+ metadata.gz: fd0844f9a0157b3dfe7f38f460fe44d023985fbb6cd62d99738f6007fef896499f1fc5439881b85bc37e49a45cdb704af176350f766eab7e3d5d0309f7afdf7f
7
+ data.tar.gz: cef9770fe754b0829452adc5ebb20cc0c5119e819b01a339cb807f3f034e982c78873b9e8414fe480510f99643d7d57945e0a0b5b4198ce5b9c415dc0efed499
@@ -11,43 +11,10 @@ require 'retryable'
11
11
  sunbro/dynamic_http
12
12
  sunbro/http
13
13
  sunbro/page
14
- sunbro/initialize
14
+ sunbro/connection
15
15
  ).each do |f|
16
16
  require f
17
17
  end
18
18
 
19
19
  module Sunbro
20
- MAX_RETRIES = 5
21
-
22
- def get_page(link, opts={})
23
- fetch_with_connection(http, link, opts)
24
- end
25
-
26
- def render_page(link, opts={})
27
- fetch_with_connection(dhttp, link, opts)
28
- end
29
-
30
- def fetch_with_connection(conn, link, opts)
31
- page, tries = nil, MAX_RETRIES
32
- begin
33
- page = conn.fetch_page(link, opts)
34
- sleep 1
35
- end until page.try(:present?) || (tries -= 1).zero?
36
- page.discard_doc! unless page.is_valid?
37
- page
38
- end
39
-
40
- def http
41
- @http ||= HTTP.new
42
- end
43
-
44
- def dhttp
45
- @dhttp ||= DynamicHTTP.new
46
- end
47
-
48
- def close_http_connections
49
- @http.close if @http
50
- @dhttp.close if @dhttp
51
- rescue IOError
52
- end
53
20
  end
@@ -1,9 +1,48 @@
1
- class Connection
2
- extend Sunbro
3
- attr_reader :http, :dhttp
1
+ module Sunbro
2
+ class Connection
3
+ attr_reader :http, :dhttp
4
4
 
5
- def close
6
- close_http_connections
5
+ MAX_RETRIES = 3
6
+
7
+ def fetch_page(link, opts={})
8
+ conn = opts[:force_format] == (:dhtml || 'dhtml') ? dhttp : http
9
+ tries = opts[:tries] || MAX_RETRIES
10
+ sleep_interval = opts[:sleep] || 1
11
+
12
+ page = Retryable.retryable(sleep: sleep_interval, tries: tries) do
13
+ web_retry(opts) do
14
+ conn.fetch_page(link, opts)
15
+ end
16
+ end
17
+ page.discard_doc! unless page.valid?
18
+ page
19
+ end
20
+
21
+ def session
22
+ @dhttp.try(:session)
23
+ end
24
+
25
+ def http
26
+ @http ||= HTTP.new
27
+ end
28
+
29
+ def dhttp
30
+ @dhttp ||= DynamicHTTP.new
31
+ end
32
+
33
+ def close
34
+ @http.try(:close)
35
+ @dhttp.try(:close)
36
+ rescue IOError
37
+ end
38
+
39
+ def web_retry(opts)
40
+ page, tries, sleep_interval = nil, opts[:tries], opts[:sleep]
41
+ begin
42
+ page = yield
43
+ sleep(sleep_interval) unless page.valid?
44
+ end until page.valid? || (tries -= 1).zero?
45
+ page
46
+ end
7
47
  end
8
48
  end
9
-
@@ -74,8 +74,10 @@ module Sunbro
74
74
 
75
75
  def get_page(url, opts)
76
76
  reset = opts.fetch(:reset) rescue true
77
+ start = Time.current.to_i
77
78
  session.visit(url.to_s)
78
79
  page = create_page_from_session(url, session, opts)
80
+ page.response_time = ((Time.now - start) * 1000).round
79
81
  session.reset! if reset
80
82
  page
81
83
  rescue Capybara::Poltergeist::TimeoutError => e
@@ -13,6 +13,7 @@ module Sunbro
13
13
  end
14
14
 
15
15
  def initialize(opts = {})
16
+ RestClient.proxy ||= Sunbro::Settings.proxy_url
16
17
  @connections = {}
17
18
  @opts = opts
18
19
  end
@@ -64,7 +64,7 @@ module Sunbro
64
64
  end
65
65
  end
66
66
 
67
- def is_valid?
67
+ def valid?
68
68
  (url != "about:blank") && !not_found? && present?
69
69
  end
70
70
 
@@ -151,7 +151,7 @@ module Sunbro
151
151
  href = doc.search('//head/base/@href')
152
152
  URI(href.to_s) unless href.nil? rescue nil
153
153
  end unless @base
154
-
154
+
155
155
  return nil if @base && @base.to_s().empty?
156
156
  @base
157
157
  end
@@ -1,3 +1,3 @@
1
1
  module Sunbro
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
metadata CHANGED
@@ -1,153 +1,153 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sunbro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jon Stokes
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-25 00:00:00.000000000 Z
11
+ date: 2015-08-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
+ name: nokogiri
14
15
  requirement: !ruby/object:Gem::Requirement
15
16
  requirements:
16
- - - '>='
17
+ - - ">="
17
18
  - !ruby/object:Gem::Version
18
19
  version: '0'
19
- name: nokogiri
20
- prerelease: false
21
20
  type: :runtime
21
+ prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
+ name: capybara
28
29
  requirement: !ruby/object:Gem::Requirement
29
30
  requirements:
30
- - - '>='
31
+ - - ">="
31
32
  - !ruby/object:Gem::Version
32
33
  version: '0'
33
- name: capybara
34
- prerelease: false
35
34
  type: :runtime
35
+ prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
+ name: poltergeist
42
43
  requirement: !ruby/object:Gem::Requirement
43
44
  requirements:
44
- - - '>='
45
+ - - ">="
45
46
  - !ruby/object:Gem::Version
46
47
  version: '0'
47
- name: poltergeist
48
- prerelease: false
49
48
  type: :runtime
49
+ prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
+ name: rest-client
56
57
  requirement: !ruby/object:Gem::Requirement
57
58
  requirements:
58
- - - '>='
59
+ - - ">="
59
60
  - !ruby/object:Gem::Version
60
61
  version: '0'
61
- name: rest-client
62
- prerelease: false
63
62
  type: :runtime
63
+ prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
+ name: activesupport
70
71
  requirement: !ruby/object:Gem::Requirement
71
72
  requirements:
72
- - - '>='
73
+ - - ">="
73
74
  - !ruby/object:Gem::Version
74
75
  version: '0'
75
- name: activesupport
76
- prerelease: false
77
76
  type: :runtime
77
+ prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - '>='
80
+ - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
+ name: retryable
84
85
  requirement: !ruby/object:Gem::Requirement
85
86
  requirements:
86
- - - '>='
87
+ - - ">="
87
88
  - !ruby/object:Gem::Version
88
89
  version: '0'
89
- name: retryable
90
- prerelease: false
91
90
  type: :runtime
91
+ prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - '>='
94
+ - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
+ name: bundler
98
99
  requirement: !ruby/object:Gem::Requirement
99
100
  requirements:
100
- - - ~>
101
+ - - "~>"
101
102
  - !ruby/object:Gem::Version
102
103
  version: '1.5'
103
- name: bundler
104
- prerelease: false
105
104
  type: :development
105
+ prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - ~>
108
+ - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: '1.5'
111
111
  - !ruby/object:Gem::Dependency
112
+ name: rake
112
113
  requirement: !ruby/object:Gem::Requirement
113
114
  requirements:
114
- - - '>='
115
+ - - ">="
115
116
  - !ruby/object:Gem::Version
116
117
  version: '0'
117
- name: rake
118
- prerelease: false
119
118
  type: :development
119
+ prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - '>='
122
+ - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
125
  - !ruby/object:Gem::Dependency
126
+ name: rspec
126
127
  requirement: !ruby/object:Gem::Requirement
127
128
  requirements:
128
- - - '>='
129
+ - - ">="
129
130
  - !ruby/object:Gem::Version
130
131
  version: '0'
131
- name: rspec
132
- prerelease: false
133
132
  type: :development
133
+ prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
- - - '>='
136
+ - - ">="
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
139
  - !ruby/object:Gem::Dependency
140
+ name: mocktra
140
141
  requirement: !ruby/object:Gem::Requirement
141
142
  requirements:
142
- - - '>='
143
+ - - ">="
143
144
  - !ruby/object:Gem::Version
144
145
  version: '0'
145
- name: mocktra
146
- prerelease: false
147
146
  type: :development
147
+ prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
- - - '>='
150
+ - - ">="
151
151
  - !ruby/object:Gem::Version
152
152
  version: '0'
153
153
  description: Requires phantomjs.
@@ -157,7 +157,7 @@ executables: []
157
157
  extensions: []
158
158
  extra_rdoc_files: []
159
159
  files:
160
- - .gitignore
160
+ - ".gitignore"
161
161
  - Gemfile
162
162
  - LICENSE.txt
163
163
  - README.md
@@ -166,7 +166,6 @@ files:
166
166
  - lib/sunbro/connection.rb
167
167
  - lib/sunbro/dynamic_http.rb
168
168
  - lib/sunbro/http.rb
169
- - lib/sunbro/initialize.rb
170
169
  - lib/sunbro/page.rb
171
170
  - lib/sunbro/settings.rb
172
171
  - lib/sunbro/version.rb
@@ -179,26 +178,27 @@ homepage: ''
179
178
  licenses:
180
179
  - MIT
181
180
  metadata: {}
182
- post_install_message:
181
+ post_install_message:
183
182
  rdoc_options: []
184
183
  require_paths:
185
184
  - lib
186
185
  required_ruby_version: !ruby/object:Gem::Requirement
187
186
  requirements:
188
- - - '>='
187
+ - - ">="
189
188
  - !ruby/object:Gem::Version
190
189
  version: '0'
191
190
  required_rubygems_version: !ruby/object:Gem::Requirement
192
191
  requirements:
193
- - - '>='
192
+ - - ">="
194
193
  - !ruby/object:Gem::Version
195
194
  version: '0'
196
195
  requirements: []
197
- rubyforge_project:
198
- rubygems_version: 2.1.9
199
- signing_key:
196
+ rubyforge_project:
197
+ rubygems_version: 2.4.6
198
+ signing_key:
200
199
  specification_version: 4
201
- summary: Some code that I use to crawl the web at scale. Shared in the spirit of jolly cooperation.
200
+ summary: Some code that I use to crawl the web at scale. Shared in the spirit of jolly
201
+ cooperation.
202
202
  test_files:
203
203
  - spec/page_spec.rb
204
204
  - spec/settings_spec.rb
@@ -1,4 +0,0 @@
1
- if Sunbro::Settings.proxy_url
2
- puts "## Setting RestClient proxy to #{Sunbro::Settings.proxy_url}"
3
- RestClient.proxy = Sunbro::Settings.proxy_url
4
- end