http_reader 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -1
- data/lib/http_reader/engine.rb +21 -8
- data/lib/http_reader/version.rb +1 -1
- data/spec/unit/lib/http_reader/engine_spec.rb +25 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d7bf3d80028e3d8d30021fa2a5f5e9e8bbc045d
|
4
|
+
data.tar.gz: 0f2d2d21c8406d9c82fd0342360a7140d0bae1fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e21c2d5c8c5d0e2fbe21eff99dd4db77d8a591eb5592d0ccf6ae8985fb9ff23bfa54f5ffa2c3ea7af7bf5a2628b49c4c44b06742cbf6d4cf663b6886cdca948d
|
7
|
+
data.tar.gz: 9fcf04a7fbbb4395b8cd197d90f7d3c21eefeeafdf3c717678574511546df2b6fbc5cb14f84b8d34983c76d7219dcb8616f214af75f39019fc47256e69050736
|
data/README.md
CHANGED
@@ -1,4 +1,8 @@
|
|
1
1
|
# HttpReader
|
2
|
+
[![Gem Version](https://badge.fury.io/rb/http_reader.svg)](http://badge.fury.io/rb/http_reader)
|
3
|
+
[![Build Status](https://secure.travis-ci.org/pniemczyk/http_reader.png?branch=master)](https://travis-ci.org/pniemczyk/http_reader)
|
4
|
+
[![Dependency Status](https://gemnasium.com/pniemczyk/http_reader.png)](https://gemnasium.com/pniemczyk/http_reader)
|
5
|
+
[![Code Climate](https://codeclimate.com/github/pniemczyk/http_reader/badges/gpa.svg)](https://codeclimate.com/github/pniemczyk/http_reader)
|
2
6
|
|
3
7
|
Read any document on internet and parse to your own format :D
|
4
8
|
|
@@ -100,9 +104,10 @@ Or install it yourself as:
|
|
100
104
|
- httparty
|
101
105
|
- headless
|
102
106
|
- watir-webdriver
|
107
|
+
|
103
108
|
### System components
|
104
109
|
- xvfb
|
105
|
-
instalation on ubuntu: sudo apt-get install xvfb
|
110
|
+
*instalation on ubuntu: sudo apt-get install xvfb*
|
106
111
|
|
107
112
|
|
108
113
|
## Contributing
|
data/lib/http_reader/engine.rb
CHANGED
@@ -8,7 +8,7 @@ module HttpReader
|
|
8
8
|
class Engine
|
9
9
|
ReadError = Class.new(StandardError)
|
10
10
|
DefaultResponse = Struct.new(:body, :code, :message, :headers)
|
11
|
-
attr_reader :parsers, :default_parser, :http_client, :browser, :logger
|
11
|
+
attr_reader :parsers, :default_parser, :http_client, :browser, :logger, :browser_keep_running
|
12
12
|
|
13
13
|
def initialize(config = {})
|
14
14
|
@parsers = config.fetch(:parsers, [])
|
@@ -16,6 +16,7 @@ module HttpReader
|
|
16
16
|
@http_client = config.fetch(:http_client, HTTParty)
|
17
17
|
@browser = config.fetch(:browser, Watir::Browser)
|
18
18
|
@logger = config.fetch(:logger, Logger.new(STDOUT))
|
19
|
+
@browser_keep_running = config.fetch(:browser_keep_running, true)
|
19
20
|
end
|
20
21
|
|
21
22
|
def read(url, opts = {})
|
@@ -35,6 +36,12 @@ module HttpReader
|
|
35
36
|
raise ReadError.new(e.message)
|
36
37
|
end
|
37
38
|
|
39
|
+
def close_browser
|
40
|
+
active_browser.close
|
41
|
+
headless.destroy
|
42
|
+
@active_browser = nil
|
43
|
+
end
|
44
|
+
|
38
45
|
private
|
39
46
|
|
40
47
|
def find_parser(url)
|
@@ -46,12 +53,9 @@ module HttpReader
|
|
46
53
|
end
|
47
54
|
|
48
55
|
def browse(url, parser, opts = {})
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
html = parser.browse_actions_for_html(b, opts)
|
53
|
-
b.close
|
54
|
-
headless.destroy
|
56
|
+
active_browser.goto(url)
|
57
|
+
html = parser.browse_actions_for_html(active_browser, opts)
|
58
|
+
close_browser unless browser_keep_running
|
55
59
|
DefaultResponse.new(html, 200, opts[:message] || "success")
|
56
60
|
rescue => e
|
57
61
|
log_error('browse', e)
|
@@ -67,8 +71,17 @@ module HttpReader
|
|
67
71
|
DefaultResponse.new(nil, 500, e.message)
|
68
72
|
end
|
69
73
|
|
74
|
+
def active_browser
|
75
|
+
@active_browser ||= new_browser
|
76
|
+
end
|
77
|
+
|
78
|
+
def new_browser
|
79
|
+
headless.start
|
80
|
+
browser.new
|
81
|
+
end
|
82
|
+
|
70
83
|
def headless
|
71
|
-
@headless ||= Headless.new
|
84
|
+
@headless ||= Headless.new(display: 100, reuse: true, destroy_at_exit: true)
|
72
85
|
end
|
73
86
|
|
74
87
|
def log_error(method, ex, info = nil)
|
data/lib/http_reader/version.rb
CHANGED
@@ -164,25 +164,44 @@ describe HttpReader::Engine do
|
|
164
164
|
subject.read(test_url, request_opts: request_opts)
|
165
165
|
end
|
166
166
|
|
167
|
-
it 'should provide
|
167
|
+
it 'should provide browse_opts to request method' do
|
168
168
|
message = 'done'
|
169
169
|
browse_opts = { process: :continue, message: message}
|
170
170
|
browser_body = "body"
|
171
171
|
response = described_class::DefaultResponse.new(browser_body, 200, message)
|
172
|
-
expect(Headless).to receive(:new).and_return(headless)
|
172
|
+
expect(Headless).to receive(:new).with(display: 100, reuse: true, destroy_at_exit: true).and_return(headless)
|
173
173
|
expect(headless).to receive(:start)
|
174
174
|
expect(default_parser).to receive(:use_browser).and_return(true)
|
175
|
-
expect(browser).to receive(:
|
175
|
+
expect(browser).to receive(:new).and_return(active_browser)
|
176
|
+
expect(active_browser).to receive(:goto).with(test_url)
|
176
177
|
expect(default_parser).to receive(:browse_actions_for_html)
|
177
178
|
.with(active_browser, browse_opts)
|
178
179
|
.and_return(browser_body)
|
179
|
-
expect(active_browser).to receive(:close)
|
180
|
-
expect(headless).to receive(:destroy)
|
181
180
|
expect(default_parser).to receive(:parse).with(response, {})
|
182
181
|
|
183
182
|
subject.read(test_url, browse_opts: browse_opts)
|
184
183
|
end
|
185
184
|
|
185
|
+
it 'should close browser and destroy headless after browse' do
|
186
|
+
message = 'done'
|
187
|
+
browse_opts = { process: :continue, message: message}
|
188
|
+
browser_body = "body"
|
189
|
+
response = described_class::DefaultResponse.new(browser_body, 200, message)
|
190
|
+
expect(Headless).to receive(:new).with(display: 100, reuse: true, destroy_at_exit: true).and_return(headless)
|
191
|
+
expect(headless).to receive(:start)
|
192
|
+
expect(default_parser).to receive(:use_browser).and_return(true)
|
193
|
+
expect(browser).to receive(:new).and_return(active_browser)
|
194
|
+
expect(active_browser).to receive(:goto).with(test_url)
|
195
|
+
expect(default_parser).to receive(:browse_actions_for_html)
|
196
|
+
.with(active_browser, browse_opts)
|
197
|
+
.and_return(browser_body)
|
198
|
+
expect(default_parser).to receive(:parse).with(response, {})
|
199
|
+
subject.instance_variable_set(:@browser_keep_running,false)
|
200
|
+
expect(active_browser).to receive(:close)
|
201
|
+
expect(headless).to receive(:destroy)
|
202
|
+
subject.read(test_url, browse_opts: browse_opts)
|
203
|
+
end
|
204
|
+
|
186
205
|
context 'on raise errors' do
|
187
206
|
|
188
207
|
it 'raise ReadError' do
|
@@ -206,7 +225,7 @@ describe HttpReader::Engine do
|
|
206
225
|
error_msg = 'HttpReader::Engine#browse - Bad '
|
207
226
|
response = described_class::DefaultResponse.new(nil, 500, 'Bad')
|
208
227
|
expect(default_parser).to receive(:use_browser).and_return(true)
|
209
|
-
expect(browser).to receive(:
|
228
|
+
expect(browser).to receive(:new).and_raise('Bad')
|
210
229
|
expect(default_parser).to receive(:parse).with(response, {})
|
211
230
|
expect(logger).to receive(:error).with(error_msg)
|
212
231
|
subject.read(test_url)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paweł Niemczyk
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|