http_reader 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -1
- data/lib/http_reader/engine.rb +21 -8
- data/lib/http_reader/version.rb +1 -1
- data/spec/unit/lib/http_reader/engine_spec.rb +25 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d7bf3d80028e3d8d30021fa2a5f5e9e8bbc045d
|
4
|
+
data.tar.gz: 0f2d2d21c8406d9c82fd0342360a7140d0bae1fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e21c2d5c8c5d0e2fbe21eff99dd4db77d8a591eb5592d0ccf6ae8985fb9ff23bfa54f5ffa2c3ea7af7bf5a2628b49c4c44b06742cbf6d4cf663b6886cdca948d
|
7
|
+
data.tar.gz: 9fcf04a7fbbb4395b8cd197d90f7d3c21eefeeafdf3c717678574511546df2b6fbc5cb14f84b8d34983c76d7219dcb8616f214af75f39019fc47256e69050736
|
data/README.md
CHANGED
@@ -1,4 +1,8 @@
|
|
1
1
|
# HttpReader
|
2
|
+
[](http://badge.fury.io/rb/http_reader)
|
3
|
+
[](https://travis-ci.org/pniemczyk/http_reader)
|
4
|
+
[](https://gemnasium.com/pniemczyk/http_reader)
|
5
|
+
[](https://codeclimate.com/github/pniemczyk/http_reader)
|
2
6
|
|
3
7
|
Read any document on internet and parse to your own format :D
|
4
8
|
|
@@ -100,9 +104,10 @@ Or install it yourself as:
|
|
100
104
|
- httparty
|
101
105
|
- headless
|
102
106
|
- watir-webdriver
|
107
|
+
|
103
108
|
### System components
|
104
109
|
- xvfb
|
105
|
-
instalation on ubuntu: sudo apt-get install xvfb
|
110
|
+
*instalation on ubuntu: sudo apt-get install xvfb*
|
106
111
|
|
107
112
|
|
108
113
|
## Contributing
|
data/lib/http_reader/engine.rb
CHANGED
@@ -8,7 +8,7 @@ module HttpReader
|
|
8
8
|
class Engine
|
9
9
|
ReadError = Class.new(StandardError)
|
10
10
|
DefaultResponse = Struct.new(:body, :code, :message, :headers)
|
11
|
-
attr_reader :parsers, :default_parser, :http_client, :browser, :logger
|
11
|
+
attr_reader :parsers, :default_parser, :http_client, :browser, :logger, :browser_keep_running
|
12
12
|
|
13
13
|
def initialize(config = {})
|
14
14
|
@parsers = config.fetch(:parsers, [])
|
@@ -16,6 +16,7 @@ module HttpReader
|
|
16
16
|
@http_client = config.fetch(:http_client, HTTParty)
|
17
17
|
@browser = config.fetch(:browser, Watir::Browser)
|
18
18
|
@logger = config.fetch(:logger, Logger.new(STDOUT))
|
19
|
+
@browser_keep_running = config.fetch(:browser_keep_running, true)
|
19
20
|
end
|
20
21
|
|
21
22
|
def read(url, opts = {})
|
@@ -35,6 +36,12 @@ module HttpReader
|
|
35
36
|
raise ReadError.new(e.message)
|
36
37
|
end
|
37
38
|
|
39
|
+
def close_browser
|
40
|
+
active_browser.close
|
41
|
+
headless.destroy
|
42
|
+
@active_browser = nil
|
43
|
+
end
|
44
|
+
|
38
45
|
private
|
39
46
|
|
40
47
|
def find_parser(url)
|
@@ -46,12 +53,9 @@ module HttpReader
|
|
46
53
|
end
|
47
54
|
|
48
55
|
def browse(url, parser, opts = {})
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
html = parser.browse_actions_for_html(b, opts)
|
53
|
-
b.close
|
54
|
-
headless.destroy
|
56
|
+
active_browser.goto(url)
|
57
|
+
html = parser.browse_actions_for_html(active_browser, opts)
|
58
|
+
close_browser unless browser_keep_running
|
55
59
|
DefaultResponse.new(html, 200, opts[:message] || "success")
|
56
60
|
rescue => e
|
57
61
|
log_error('browse', e)
|
@@ -67,8 +71,17 @@ module HttpReader
|
|
67
71
|
DefaultResponse.new(nil, 500, e.message)
|
68
72
|
end
|
69
73
|
|
74
|
+
def active_browser
|
75
|
+
@active_browser ||= new_browser
|
76
|
+
end
|
77
|
+
|
78
|
+
def new_browser
|
79
|
+
headless.start
|
80
|
+
browser.new
|
81
|
+
end
|
82
|
+
|
70
83
|
def headless
|
71
|
-
@headless ||= Headless.new
|
84
|
+
@headless ||= Headless.new(display: 100, reuse: true, destroy_at_exit: true)
|
72
85
|
end
|
73
86
|
|
74
87
|
def log_error(method, ex, info = nil)
|
data/lib/http_reader/version.rb
CHANGED
@@ -164,25 +164,44 @@ describe HttpReader::Engine do
|
|
164
164
|
subject.read(test_url, request_opts: request_opts)
|
165
165
|
end
|
166
166
|
|
167
|
-
it 'should provide
|
167
|
+
it 'should provide browse_opts to request method' do
|
168
168
|
message = 'done'
|
169
169
|
browse_opts = { process: :continue, message: message}
|
170
170
|
browser_body = "body"
|
171
171
|
response = described_class::DefaultResponse.new(browser_body, 200, message)
|
172
|
-
expect(Headless).to receive(:new).and_return(headless)
|
172
|
+
expect(Headless).to receive(:new).with(display: 100, reuse: true, destroy_at_exit: true).and_return(headless)
|
173
173
|
expect(headless).to receive(:start)
|
174
174
|
expect(default_parser).to receive(:use_browser).and_return(true)
|
175
|
-
expect(browser).to receive(:
|
175
|
+
expect(browser).to receive(:new).and_return(active_browser)
|
176
|
+
expect(active_browser).to receive(:goto).with(test_url)
|
176
177
|
expect(default_parser).to receive(:browse_actions_for_html)
|
177
178
|
.with(active_browser, browse_opts)
|
178
179
|
.and_return(browser_body)
|
179
|
-
expect(active_browser).to receive(:close)
|
180
|
-
expect(headless).to receive(:destroy)
|
181
180
|
expect(default_parser).to receive(:parse).with(response, {})
|
182
181
|
|
183
182
|
subject.read(test_url, browse_opts: browse_opts)
|
184
183
|
end
|
185
184
|
|
185
|
+
it 'should close browser and destroy headless after browse' do
|
186
|
+
message = 'done'
|
187
|
+
browse_opts = { process: :continue, message: message}
|
188
|
+
browser_body = "body"
|
189
|
+
response = described_class::DefaultResponse.new(browser_body, 200, message)
|
190
|
+
expect(Headless).to receive(:new).with(display: 100, reuse: true, destroy_at_exit: true).and_return(headless)
|
191
|
+
expect(headless).to receive(:start)
|
192
|
+
expect(default_parser).to receive(:use_browser).and_return(true)
|
193
|
+
expect(browser).to receive(:new).and_return(active_browser)
|
194
|
+
expect(active_browser).to receive(:goto).with(test_url)
|
195
|
+
expect(default_parser).to receive(:browse_actions_for_html)
|
196
|
+
.with(active_browser, browse_opts)
|
197
|
+
.and_return(browser_body)
|
198
|
+
expect(default_parser).to receive(:parse).with(response, {})
|
199
|
+
subject.instance_variable_set(:@browser_keep_running,false)
|
200
|
+
expect(active_browser).to receive(:close)
|
201
|
+
expect(headless).to receive(:destroy)
|
202
|
+
subject.read(test_url, browse_opts: browse_opts)
|
203
|
+
end
|
204
|
+
|
186
205
|
context 'on raise errors' do
|
187
206
|
|
188
207
|
it 'raise ReadError' do
|
@@ -206,7 +225,7 @@ describe HttpReader::Engine do
|
|
206
225
|
error_msg = 'HttpReader::Engine#browse - Bad '
|
207
226
|
response = described_class::DefaultResponse.new(nil, 500, 'Bad')
|
208
227
|
expect(default_parser).to receive(:use_browser).and_return(true)
|
209
|
-
expect(browser).to receive(:
|
228
|
+
expect(browser).to receive(:new).and_raise('Bad')
|
210
229
|
expect(default_parser).to receive(:parse).with(response, {})
|
211
230
|
expect(logger).to receive(:error).with(error_msg)
|
212
231
|
subject.read(test_url)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paweł Niemczyk
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|