sunbro 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/sunbro.rb +1 -34
- data/lib/sunbro/connection.rb +45 -6
- data/lib/sunbro/dynamic_http.rb +2 -0
- data/lib/sunbro/http.rb +1 -0
- data/lib/sunbro/page.rb +2 -2
- data/lib/sunbro/version.rb +1 -1
- metadata +52 -52
- data/lib/sunbro/initialize.rb +0 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d7f760133a4fab073cd7a7dd5c064ed1744f025
|
4
|
+
data.tar.gz: 68b98b70f0e591ca9056b1db73238ea1a578418c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fd0844f9a0157b3dfe7f38f460fe44d023985fbb6cd62d99738f6007fef896499f1fc5439881b85bc37e49a45cdb704af176350f766eab7e3d5d0309f7afdf7f
|
7
|
+
data.tar.gz: cef9770fe754b0829452adc5ebb20cc0c5119e819b01a339cb807f3f034e982c78873b9e8414fe480510f99643d7d57945e0a0b5b4198ce5b9c415dc0efed499
|
data/lib/sunbro.rb
CHANGED
@@ -11,43 +11,10 @@ require 'retryable'
|
|
11
11
|
sunbro/dynamic_http
|
12
12
|
sunbro/http
|
13
13
|
sunbro/page
|
14
|
-
sunbro/
|
14
|
+
sunbro/connection
|
15
15
|
).each do |f|
|
16
16
|
require f
|
17
17
|
end
|
18
18
|
|
19
19
|
module Sunbro
|
20
|
-
MAX_RETRIES = 5
|
21
|
-
|
22
|
-
def get_page(link, opts={})
|
23
|
-
fetch_with_connection(http, link, opts)
|
24
|
-
end
|
25
|
-
|
26
|
-
def render_page(link, opts={})
|
27
|
-
fetch_with_connection(dhttp, link, opts)
|
28
|
-
end
|
29
|
-
|
30
|
-
def fetch_with_connection(conn, link, opts)
|
31
|
-
page, tries = nil, MAX_RETRIES
|
32
|
-
begin
|
33
|
-
page = conn.fetch_page(link, opts)
|
34
|
-
sleep 1
|
35
|
-
end until page.try(:present?) || (tries -= 1).zero?
|
36
|
-
page.discard_doc! unless page.is_valid?
|
37
|
-
page
|
38
|
-
end
|
39
|
-
|
40
|
-
def http
|
41
|
-
@http ||= HTTP.new
|
42
|
-
end
|
43
|
-
|
44
|
-
def dhttp
|
45
|
-
@dhttp ||= DynamicHTTP.new
|
46
|
-
end
|
47
|
-
|
48
|
-
def close_http_connections
|
49
|
-
@http.close if @http
|
50
|
-
@dhttp.close if @dhttp
|
51
|
-
rescue IOError
|
52
|
-
end
|
53
20
|
end
|
data/lib/sunbro/connection.rb
CHANGED
@@ -1,9 +1,48 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
module Sunbro
|
2
|
+
class Connection
|
3
|
+
attr_reader :http, :dhttp
|
4
4
|
|
5
|
-
|
6
|
-
|
5
|
+
MAX_RETRIES = 3
|
6
|
+
|
7
|
+
def fetch_page(link, opts={})
|
8
|
+
conn = opts[:force_format] == (:dhtml || 'dhtml') ? dhttp : http
|
9
|
+
tries = opts[:tries] || MAX_RETRIES
|
10
|
+
sleep_interval = opts[:sleep] || 1
|
11
|
+
|
12
|
+
page = Retryable.retryable(sleep: sleep_interval, tries: tries) do
|
13
|
+
web_retry(opts) do
|
14
|
+
conn.fetch_page(link, opts)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
page.discard_doc! unless page.valid?
|
18
|
+
page
|
19
|
+
end
|
20
|
+
|
21
|
+
def session
|
22
|
+
@dhttp.try(:session)
|
23
|
+
end
|
24
|
+
|
25
|
+
def http
|
26
|
+
@http ||= HTTP.new
|
27
|
+
end
|
28
|
+
|
29
|
+
def dhttp
|
30
|
+
@dhttp ||= DynamicHTTP.new
|
31
|
+
end
|
32
|
+
|
33
|
+
def close
|
34
|
+
@http.try(:close)
|
35
|
+
@dhttp.try(:close)
|
36
|
+
rescue IOError
|
37
|
+
end
|
38
|
+
|
39
|
+
def web_retry(opts)
|
40
|
+
page, tries, sleep_interval = nil, opts[:tries], opts[:sleep]
|
41
|
+
begin
|
42
|
+
page = yield
|
43
|
+
sleep(sleep_interval) unless page.valid?
|
44
|
+
end until page.valid? || (tries -= 1).zero?
|
45
|
+
page
|
46
|
+
end
|
7
47
|
end
|
8
48
|
end
|
9
|
-
|
data/lib/sunbro/dynamic_http.rb
CHANGED
@@ -74,8 +74,10 @@ module Sunbro
|
|
74
74
|
|
75
75
|
def get_page(url, opts)
|
76
76
|
reset = opts.fetch(:reset) rescue true
|
77
|
+
start = Time.current.to_i
|
77
78
|
session.visit(url.to_s)
|
78
79
|
page = create_page_from_session(url, session, opts)
|
80
|
+
page.response_time = ((Time.now - start) * 1000).round
|
79
81
|
session.reset! if reset
|
80
82
|
page
|
81
83
|
rescue Capybara::Poltergeist::TimeoutError => e
|
data/lib/sunbro/http.rb
CHANGED
data/lib/sunbro/page.rb
CHANGED
@@ -64,7 +64,7 @@ module Sunbro
|
|
64
64
|
end
|
65
65
|
end
|
66
66
|
|
67
|
-
def
|
67
|
+
def valid?
|
68
68
|
(url != "about:blank") && !not_found? && present?
|
69
69
|
end
|
70
70
|
|
@@ -151,7 +151,7 @@ module Sunbro
|
|
151
151
|
href = doc.search('//head/base/@href')
|
152
152
|
URI(href.to_s) unless href.nil? rescue nil
|
153
153
|
end unless @base
|
154
|
-
|
154
|
+
|
155
155
|
return nil if @base && @base.to_s().empty?
|
156
156
|
@base
|
157
157
|
end
|
data/lib/sunbro/version.rb
CHANGED
metadata
CHANGED
@@ -1,153 +1,153 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sunbro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jon Stokes
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-08-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
14
15
|
requirement: !ruby/object:Gem::Requirement
|
15
16
|
requirements:
|
16
|
-
- -
|
17
|
+
- - ">="
|
17
18
|
- !ruby/object:Gem::Version
|
18
19
|
version: '0'
|
19
|
-
name: nokogiri
|
20
|
-
prerelease: false
|
21
20
|
type: :runtime
|
21
|
+
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
+
name: capybara
|
28
29
|
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- -
|
31
|
+
- - ">="
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '0'
|
33
|
-
name: capybara
|
34
|
-
prerelease: false
|
35
34
|
type: :runtime
|
35
|
+
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
+
name: poltergeist
|
42
43
|
requirement: !ruby/object:Gem::Requirement
|
43
44
|
requirements:
|
44
|
-
- -
|
45
|
+
- - ">="
|
45
46
|
- !ruby/object:Gem::Version
|
46
47
|
version: '0'
|
47
|
-
name: poltergeist
|
48
|
-
prerelease: false
|
49
48
|
type: :runtime
|
49
|
+
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
+
name: rest-client
|
56
57
|
requirement: !ruby/object:Gem::Requirement
|
57
58
|
requirements:
|
58
|
-
- -
|
59
|
+
- - ">="
|
59
60
|
- !ruby/object:Gem::Version
|
60
61
|
version: '0'
|
61
|
-
name: rest-client
|
62
|
-
prerelease: false
|
63
62
|
type: :runtime
|
63
|
+
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
+
name: activesupport
|
70
71
|
requirement: !ruby/object:Gem::Requirement
|
71
72
|
requirements:
|
72
|
-
- -
|
73
|
+
- - ">="
|
73
74
|
- !ruby/object:Gem::Version
|
74
75
|
version: '0'
|
75
|
-
name: activesupport
|
76
|
-
prerelease: false
|
77
76
|
type: :runtime
|
77
|
+
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
+
name: retryable
|
84
85
|
requirement: !ruby/object:Gem::Requirement
|
85
86
|
requirements:
|
86
|
-
- -
|
87
|
+
- - ">="
|
87
88
|
- !ruby/object:Gem::Version
|
88
89
|
version: '0'
|
89
|
-
name: retryable
|
90
|
-
prerelease: false
|
91
90
|
type: :runtime
|
91
|
+
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- -
|
94
|
+
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
+
name: bundler
|
98
99
|
requirement: !ruby/object:Gem::Requirement
|
99
100
|
requirements:
|
100
|
-
- - ~>
|
101
|
+
- - "~>"
|
101
102
|
- !ruby/object:Gem::Version
|
102
103
|
version: '1.5'
|
103
|
-
name: bundler
|
104
|
-
prerelease: false
|
105
104
|
type: :development
|
105
|
+
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- - ~>
|
108
|
+
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '1.5'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
+
name: rake
|
112
113
|
requirement: !ruby/object:Gem::Requirement
|
113
114
|
requirements:
|
114
|
-
- -
|
115
|
+
- - ">="
|
115
116
|
- !ruby/object:Gem::Version
|
116
117
|
version: '0'
|
117
|
-
name: rake
|
118
|
-
prerelease: false
|
119
118
|
type: :development
|
119
|
+
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
|
-
- -
|
122
|
+
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
|
+
name: rspec
|
126
127
|
requirement: !ruby/object:Gem::Requirement
|
127
128
|
requirements:
|
128
|
-
- -
|
129
|
+
- - ">="
|
129
130
|
- !ruby/object:Gem::Version
|
130
131
|
version: '0'
|
131
|
-
name: rspec
|
132
|
-
prerelease: false
|
133
132
|
type: :development
|
133
|
+
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
|
-
- -
|
136
|
+
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0'
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
|
+
name: mocktra
|
140
141
|
requirement: !ruby/object:Gem::Requirement
|
141
142
|
requirements:
|
142
|
-
- -
|
143
|
+
- - ">="
|
143
144
|
- !ruby/object:Gem::Version
|
144
145
|
version: '0'
|
145
|
-
name: mocktra
|
146
|
-
prerelease: false
|
147
146
|
type: :development
|
147
|
+
prerelease: false
|
148
148
|
version_requirements: !ruby/object:Gem::Requirement
|
149
149
|
requirements:
|
150
|
-
- -
|
150
|
+
- - ">="
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: '0'
|
153
153
|
description: Requires phantomjs.
|
@@ -157,7 +157,7 @@ executables: []
|
|
157
157
|
extensions: []
|
158
158
|
extra_rdoc_files: []
|
159
159
|
files:
|
160
|
-
- .gitignore
|
160
|
+
- ".gitignore"
|
161
161
|
- Gemfile
|
162
162
|
- LICENSE.txt
|
163
163
|
- README.md
|
@@ -166,7 +166,6 @@ files:
|
|
166
166
|
- lib/sunbro/connection.rb
|
167
167
|
- lib/sunbro/dynamic_http.rb
|
168
168
|
- lib/sunbro/http.rb
|
169
|
-
- lib/sunbro/initialize.rb
|
170
169
|
- lib/sunbro/page.rb
|
171
170
|
- lib/sunbro/settings.rb
|
172
171
|
- lib/sunbro/version.rb
|
@@ -179,26 +178,27 @@ homepage: ''
|
|
179
178
|
licenses:
|
180
179
|
- MIT
|
181
180
|
metadata: {}
|
182
|
-
post_install_message:
|
181
|
+
post_install_message:
|
183
182
|
rdoc_options: []
|
184
183
|
require_paths:
|
185
184
|
- lib
|
186
185
|
required_ruby_version: !ruby/object:Gem::Requirement
|
187
186
|
requirements:
|
188
|
-
- -
|
187
|
+
- - ">="
|
189
188
|
- !ruby/object:Gem::Version
|
190
189
|
version: '0'
|
191
190
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
192
191
|
requirements:
|
193
|
-
- -
|
192
|
+
- - ">="
|
194
193
|
- !ruby/object:Gem::Version
|
195
194
|
version: '0'
|
196
195
|
requirements: []
|
197
|
-
rubyforge_project:
|
198
|
-
rubygems_version: 2.
|
199
|
-
signing_key:
|
196
|
+
rubyforge_project:
|
197
|
+
rubygems_version: 2.4.6
|
198
|
+
signing_key:
|
200
199
|
specification_version: 4
|
201
|
-
summary: Some code that I use to crawl the web at scale. Shared in the spirit of jolly
|
200
|
+
summary: Some code that I use to crawl the web at scale. Shared in the spirit of jolly
|
201
|
+
cooperation.
|
202
202
|
test_files:
|
203
203
|
- spec/page_spec.rb
|
204
204
|
- spec/settings_spec.rb
|
data/lib/sunbro/initialize.rb
DELETED