http_reader 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 342e596f1ce05595886d576288111a8d873e5d06
4
+ data.tar.gz: d1bb9056c0e5ef44fa9185fbb8b97943fffa6639
5
+ SHA512:
6
+ metadata.gz: 9983010f18f2bee05fadc9a7583c4c1d1740a173191a5bab70129e2d55fdc6da6da2d061a70e39c4fa6daed91337e366a058b6ccebc74a2666fb8d0ef2690d6a
7
+ data.tar.gz: af912964ef1658a387989e177c4be500a4938aca5df004fc6d203dfcdf469c4f8fe8e7614ccf89cae4d7f67dd89d13835ee7ef0d9c01258110d36275d53135a3
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,4 @@
1
+ --color
2
+ --require spec_helper
3
+ --format progress
4
+ --no-profile
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ language: ruby
2
+ script: bundle exec rake
3
+ rvm:
4
+ - 1.9.3
5
+ - 2.0.0
6
+ - 2.1.0
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in http_reader.gemspec
4
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,6 @@
1
+ guard :rspec, cmd: 'rspec' do
2
+ watch(%r{^lib/(.+).rb$}) { |m| "spec/unit/lib/#{m[1]}_spec.rb" }
3
+ watch(%r{^spec/(.+).rb$}) { |m| "spec/#{m[1]}.rb" }
4
+ watch('spec/spec_helper.rb') { "spec" }
5
+ watch('Gemfile')
6
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Paweł Niemczyk
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,114 @@
1
+ # HttpReader
2
+
3
+ Read any document on internet and parse to your own format :D
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'http_reader'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install http_reader
18
+
19
+ ## Usage
20
+
21
+ engine = HttpReader::Engine.new(opts)
22
+ engine.read('http://www.google.com')
23
+
24
+ ### Available opts [Hash]
25
+ - **parsers:** list of document parser Classes [ default: [] ]
26
+ - **default_parser:** parser used when none parser was match for url [default: HashPageParser]
27
+ - **http_client:** http_client for downloading pages sources, [default: HTTParty]
28
+ - **browser:** browser_client to processing and download source, [default: Watir::Browser]
29
+ - **logger:** default: Logger
30
+
31
+ ## Examples
32
+
33
+ ### Usage default_parser as HashPageParser
34
+
35
+ engine = HttpReader::Engine.new
36
+ read_opts = {title: 'h1', items: '.content li;array'}
37
+ engine.read('http://example.org', read_opts)
38
+
39
+ **Where page body is:**
40
+
41
+ <h1>Information</h1>
42
+ <p>not importante</p>
43
+ <div class="content">
44
+ Items: <ul><li>A</li><li>B</li><li>C</li></ul>
45
+ </div>
46
+
47
+ **Result should be:**
48
+
49
+ {:title=>"Information", :items=>%w{A B C}}
50
+
51
+
52
+ ### Usage own Parser class
53
+
54
+ **Class body:**
55
+
56
+ Class TestParser < BasePageParser
57
+ @pattern = /^((http|https):\/\/www.google.com)$/
58
+ class << self
59
+ def browse_actions_for_html(browser, opts = {})
60
+ div = browser.div(id: 'als')
61
+ raise 'Cannot find div' unless div.exists?
62
+ div.html
63
+ end
64
+
65
+ def parse(response, opts = {})
66
+ n_body = Nokogiri::HTML(response.body)
67
+ { text: n_body.css('p').text }
68
+ end
69
+
70
+ def use_browser
71
+ true
72
+ end
73
+ end
74
+ end
75
+
76
+ **initializtion:**
77
+
78
+ engine = HttpReader::Engine.new(default_parser: TestParser)
79
+ engine.read('http://www.google.com')
80
+
81
+ **Or**
82
+
83
+ engine = HttpReader::Engine.new(parsers: [TestParser])
84
+ engine.read('http://www.google.com')
85
+
86
+ **Or**
87
+
88
+ engine = HttpReader::Engine.new
89
+ engine.read('http://www.google.com', parser: TestParser)
90
+
91
+
92
+
93
+ ## More info about syntax
94
+ - [watir-webdriver](https://github.com/watir/watir-webdriver)
95
+ - [nokogiri](http://ruby.bastardsbook.com/chapters/html-parsing/)
96
+
97
+ ## Dependecies
98
+ ### Gems
99
+ - nokogiri
100
+ - httparty
101
+ - headless
102
+ - watir-webdriver
103
+ ### System components
104
+ - xvfb
105
+ instalation on ubuntu: sudo apt-get install xvfb
106
+
107
+
108
+ ## Contributing
109
+
110
+ 1. Fork it
111
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
112
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
113
+ 4. Push to the branch (`git push origin my-new-feature`)
114
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task default: :spec
@@ -0,0 +1,35 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'http_reader/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'http_reader'
8
+ spec.version = HttpReader::VERSION
9
+ spec.authors = ['Paweł Niemczyk']
10
+ spec.email = ['pniemczyk@o2.pl']
11
+ spec.description = %q{Read page body and parse to specific data}
12
+ spec.summary = %q{Page parser}
13
+ spec.homepage = 'https://github.com/pniemczyk/http_reader'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_dependency 'httparty', '~> 0.13'
22
+ spec.add_dependency 'nokogiri', '~> 1.6'
23
+ spec.add_dependency 'headless', '~> 1.0'
24
+ spec.add_dependency 'watir-webdriver', '~> 0.6'
25
+
26
+ spec.add_development_dependency 'bundler', '~> 1.3'
27
+ spec.add_development_dependency 'rake' , '~> 0'
28
+ spec.add_development_dependency 'rspec', '~> 3.1'
29
+ spec.add_development_dependency 'guard-rspec', '~> 0'
30
+ spec.add_development_dependency 'coveralls', '~> 0'
31
+ spec.add_development_dependency 'awesome_print', '~> 0'
32
+
33
+ spec.post_install_message = 'Do not forget install xvfb. Have fun !'
34
+ end
35
+
@@ -0,0 +1,23 @@
1
+ module HttpReader
2
+ class BasePageMatcher
3
+ @pattern = /^.*$/
4
+
5
+ class << self
6
+ attr_reader :pattern
7
+
8
+ def match(url)
9
+ !(url =~ pattern).nil?
10
+ end
11
+ end
12
+
13
+ def read(body)
14
+ body
15
+ end
16
+
17
+ private
18
+
19
+ def pattern
20
+ self.class.pattern
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,24 @@
1
+ module HttpReader
2
+ class BasePageParser
3
+ @pattern = /^((http|https):\/\/).*$/
4
+
5
+ class << self
6
+ attr_reader :pattern
7
+
8
+ def match(url)
9
+ !(url =~ pattern).nil?
10
+ end
11
+
12
+ def parse(response, opts = {})
13
+ response.body
14
+ end
15
+
16
+ def browse_actions_for_html(browser, opts = {})
17
+ end
18
+
19
+ def use_browser
20
+ false
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,78 @@
1
+ require 'httparty'
2
+ require 'logger'
3
+ require 'watir-webdriver'
4
+ require 'headless'
5
+
6
+
7
+ module HttpReader
8
+ class Engine
9
+ ReadError = Class.new(StandardError)
10
+ DefaultResponse = Struct.new(:body, :code, :message, :headers)
11
+ attr_reader :parsers, :default_parser, :http_client, :browser, :logger
12
+
13
+ def initialize(config = {})
14
+ @parsers = config.fetch(:parsers, [])
15
+ @default_parser = config.fetch(:default_parser, HashPageParser)
16
+ @http_client = config.fetch(:http_client, HTTParty)
17
+ @browser = config.fetch(:browser, Watir::Browser)
18
+ @logger = config.fetch(:logger, Logger.new(STDOUT))
19
+ end
20
+
21
+ def read(url, opts = {})
22
+ parser = opts[:parser] || find_parser(url)
23
+ response = if parser.use_browser
24
+ browse_opts = opts.fetch(:browse_opts, {})
25
+ browse(url, parser, browse_opts)
26
+ else
27
+ request_opts = opts.fetch(:request_opts, {})
28
+ request(url, request_opts)
29
+ end
30
+
31
+ parse_opts = opts.fetch(:parse_opts, {})
32
+ parser.parse(response, parse_opts)
33
+ rescue => e
34
+ log_error('read', e, "url: #{url}, opts: #{opts.to_json}")
35
+ raise ReadError.new(e.message)
36
+ end
37
+
38
+ private
39
+
40
+ def find_parser(url)
41
+ parsers.each do |parser|
42
+ return parser if parser.match(url)
43
+ end
44
+
45
+ default_parser
46
+ end
47
+
48
+ def browse(url, parser, opts = {})
49
+ html = nil
50
+ headless.start
51
+ b = browser.start(url)
52
+ html = parser.browse_actions_for_html(b, opts)
53
+ b.close
54
+ headless.destroy
55
+ DefaultResponse.new(html, 200, opts[:message] || "success")
56
+ rescue => e
57
+ log_error('browse', e)
58
+ DefaultResponse.new(html, 500, e.message)
59
+ end
60
+
61
+ def request(url, opts = {})
62
+ method = opts.fetch(:method, :get)
63
+ options = opts.fetch(:options, {})
64
+ http_client.public_send(method, url, options)
65
+ rescue => e
66
+ log_error('request', e)
67
+ DefaultResponse.new(nil, 500, e.message)
68
+ end
69
+
70
+ def headless
71
+ @headless ||= Headless.new
72
+ end
73
+
74
+ def log_error(method, ex, info = nil)
75
+ logger.error("HttpReader::Engine##{method} - #{ex.message} #{info}")
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,35 @@
1
+ require 'nokogiri'
2
+
3
+ module HttpReader
4
+ class HashPageParser < BasePageParser
5
+ KEY_IDX = 0
6
+ SELECTOR_IDX = 1
7
+ TYPE_SEPARATOR = ';'
8
+
9
+ @pattern = /^((http|https):\/\/).*$/
10
+
11
+ def self.parse(response, opts = {})
12
+ page = Nokogiri::HTML(response.body)
13
+ hash = opts.inject({}) do |h, item|
14
+ key, value = prepare_key_value(page, item)
15
+ h[key] = value
16
+ h
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def self.prepare_key_value(page, item)
23
+ key = item[KEY_IDX]
24
+ selector, is_array = prepare_selector(item[SELECTOR_IDX])
25
+ result = page.css(selector)
26
+ value = result.map(&:text)
27
+ [key, is_array ? value : value.first]
28
+ end
29
+
30
+ def self.prepare_selector(value)
31
+ selector, is_array = value.split(TYPE_SEPARATOR)
32
+ [selector, is_array.to_s.downcase == 'array']
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,3 @@
1
+ module HttpReader
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,7 @@
1
+ require 'http_reader/version'
2
+ require 'http_reader/base_page_parser'
3
+ require 'http_reader/hash_page_parser'
4
+ require 'http_reader/engine'
5
+
6
+ module HttpReader
7
+ end
@@ -0,0 +1,29 @@
1
+ require 'bundler/setup'
2
+ require 'awesome_print'
3
+ Bundler.setup
4
+
5
+ require 'coveralls'
6
+ Coveralls.wear!
7
+
8
+ require 'http_reader'
9
+
10
+ RSpec.configure do |config|
11
+ config.filter_run :focus
12
+ config.run_all_when_everything_filtered = true
13
+
14
+ config.default_formatter = 'doc' if config.files_to_run.one?
15
+
16
+ config.profile_examples = 10
17
+ config.order = :random
18
+
19
+ Kernel.srand config.seed
20
+
21
+ config.expect_with :rspec do |expectations|
22
+ expectations.syntax = :expect
23
+ end
24
+
25
+ config.mock_with :rspec do |mocks|
26
+ mocks.syntax = :expect
27
+ mocks.verify_partial_doubles = true
28
+ end
29
+ end
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ describe HttpReader::BasePageParser do
4
+ subject { described_class }
5
+
6
+ let(:pattern) { /^((http|https):\/\/).*$/ }
7
+
8
+ context 'self' do
9
+ it '#pattern cover every url' do
10
+ expect(subject.pattern).to eq pattern
11
+ end
12
+
13
+ context '#match' do
14
+ it 'returns true for url string' do
15
+ expect(subject.match('http://some_url')).to eq true
16
+ end
17
+
18
+ it 'returns false for non url string' do
19
+ expect(subject.match('some_fake_url')).to eq false
20
+ end
21
+ end
22
+
23
+ context '#parse' do
24
+ let(:body) { 'test_body' }
25
+ let(:response) { double('response', body: body) }
26
+ it 'returns body' do
27
+ expect(subject.parse(response)).to eq body
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,216 @@
1
+ require 'spec_helper'
2
+
3
+
4
+ describe HttpReader::Engine do
5
+ let(:default_parser) { double('default_parser') }
6
+ let(:parsers) { [] }
7
+ let(:http_client) { double('HTTParty') }
8
+ let(:browser) { double('Watir::Browser') }
9
+ let(:logger) { double('Logger') }
10
+ let(:headless) { double('Headless') }
11
+
12
+ let(:init_opts) do
13
+ {
14
+ parsers: parsers,
15
+ default_parser: default_parser,
16
+ http_client: http_client,
17
+ browser: browser,
18
+ logger: logger
19
+ }
20
+ end
21
+
22
+ let(:test_url) { 'http://localhost/test' }
23
+ let(:active_browser) { double('active_browser') }
24
+
25
+ subject { described_class.new(init_opts) }
26
+ context '#initialize' do
27
+ context 'init #parsers' do
28
+ context 'default' do
29
+ let(:init_opts) { {} }
30
+ it 'should eq []' do
31
+ expect(subject.parsers).to eq []
32
+ end
33
+ end
34
+ context 'optional' do
35
+ it 'can be set as array of parsers' do
36
+ expect(subject.parsers).to eq parsers
37
+ end
38
+ end
39
+ end
40
+ context 'init #default_parser' do
41
+ context 'default' do
42
+ let(:init_opts) { {} }
43
+ it 'should eq HashPageParser' do
44
+ expect(subject.default_parser).to eq HttpReader::HashPageParser
45
+ end
46
+ end
47
+ context 'optional' do
48
+ it 'can be set new default_parser' do
49
+ expect(subject.default_parser).to eq default_parser
50
+ end
51
+ end
52
+ end
53
+ context 'init #http_client' do
54
+ context 'default' do
55
+ let(:init_opts) { {} }
56
+ it 'should eq HTTParty' do
57
+ expect(subject.http_client).to eq HTTParty
58
+ end
59
+ end
60
+ context 'optional' do
61
+ it 'can be set new http_client' do
62
+ expect(subject.http_client).to eq http_client
63
+ end
64
+ end
65
+ end
66
+ context 'init #browser' do
67
+ context 'default' do
68
+ let(:init_opts) { {} }
69
+ it 'should eq Watir::Browser' do
70
+ expect(subject.browser).to eq Watir::Browser
71
+ end
72
+ end
73
+ context 'optional' do
74
+ it 'can be set new browser' do
75
+ expect(subject.browser).to eq browser
76
+ end
77
+ end
78
+ end
79
+ context 'init #logger' do
80
+ context 'default' do
81
+ let(:init_opts) { {} }
82
+ it 'should eq Logger' do
83
+ expect(subject.logger).to be_a Logger
84
+ end
85
+ end
86
+ context 'optional' do
87
+ it 'can be set new browser' do
88
+ expect(subject.logger).to eq logger
89
+ end
90
+ end
91
+ end
92
+ end
93
+
94
+ context '#read' do
95
+ context 'should use parser' do
96
+ let(:parser_in_opts) { double('parser_in_opts') }
97
+ let(:response) { double('response', body: 'body')}
98
+ it 'from provided opts' do
99
+ expect(parser_in_opts).to receive(:use_browser).and_return(false)
100
+ expect(http_client).to receive(:get)
101
+ .with(test_url, {})
102
+ .and_return(response)
103
+ expect(parser_in_opts).to receive(:parse).with(response, {})
104
+ subject.read(test_url, parser: parser_in_opts)
105
+ end
106
+
107
+ it 'default when no parsers are available' do
108
+ expect(default_parser).to receive(:use_browser).and_return(false)
109
+ expect(http_client).to receive(:get)
110
+ .with(test_url, {})
111
+ .and_return(response)
112
+ expect(default_parser).to receive(:parse).with(response, {})
113
+ subject.read(test_url)
114
+ end
115
+
116
+ context 'which' do
117
+ let(:parser_one) { double('parser_one') }
118
+ let(:parser_two) { double('parser_two') }
119
+ let(:parsers) { [parser_one, parser_two] }
120
+ it 'match as first with url' do
121
+ expect(parser_one).to receive(:match).with(test_url).and_return(false)
122
+ expect(parser_two).to receive(:match).with(test_url).and_return(true)
123
+ expect(parser_two).to receive(:use_browser).and_return(false)
124
+ expect(http_client).to receive(:get)
125
+ .with(test_url, {})
126
+ .and_return(response)
127
+ expect(parser_two).to receive(:parse).with(response, {})
128
+ subject.read(test_url)
129
+ end
130
+
131
+ it 'is default_parser when no parser match' do
132
+ expect(parser_one).to receive(:match).with(test_url).and_return(false)
133
+ expect(parser_two).to receive(:match).with(test_url).and_return(false)
134
+ expect(default_parser).to receive(:use_browser).and_return(false)
135
+ expect(http_client).to receive(:get)
136
+ .with(test_url, {})
137
+ .and_return(response)
138
+ expect(default_parser).to receive(:parse).with(response, {})
139
+ subject.read(test_url)
140
+ end
141
+ end
142
+ end
143
+
144
+ it 'should provide parse_opts to #parser#parse method' do
145
+ parse_opts = { title: 'h1' }
146
+ response = double('response', body: 'body')
147
+ expect(default_parser).to receive(:use_browser).and_return(false)
148
+ expect(http_client).to receive(:get)
149
+ .with(test_url, {})
150
+ .and_return(response)
151
+ expect(default_parser).to receive(:parse).with(response, parse_opts)
152
+ subject.read(test_url, parse_opts: parse_opts)
153
+ end
154
+
155
+ it 'should provide request_opts to request method' do
156
+ http_client_method = :post
157
+ request_opts = { method: http_client_method, options: { body: {token: '123'}}}
158
+ response = double('response', body: 'body')
159
+ expect(default_parser).to receive(:use_browser).and_return(false)
160
+ expect(http_client).to receive(http_client_method)
161
+ .with(test_url, request_opts[:options])
162
+ .and_return(response)
163
+ expect(default_parser).to receive(:parse).with(response, {})
164
+ subject.read(test_url, request_opts: request_opts)
165
+ end
166
+
167
+ it 'should provide request_opts to request method' do
168
+ message = 'done'
169
+ browse_opts = { process: :continue, message: message}
170
+ browser_body = "body"
171
+ response = described_class::DefaultResponse.new(browser_body, 200, message)
172
+ expect(Headless).to receive(:new).and_return(headless)
173
+ expect(headless).to receive(:start)
174
+ expect(default_parser).to receive(:use_browser).and_return(true)
175
+ expect(browser).to receive(:start).with(test_url).and_return(active_browser)
176
+ expect(default_parser).to receive(:browse_actions_for_html)
177
+ .with(active_browser, browse_opts)
178
+ .and_return(browser_body)
179
+ expect(active_browser).to receive(:close)
180
+ expect(headless).to receive(:destroy)
181
+ expect(default_parser).to receive(:parse).with(response, {})
182
+
183
+ subject.read(test_url, browse_opts: browse_opts)
184
+ end
185
+
186
+ context 'on raise errors' do
187
+
188
+ it 'raise ReadError' do
189
+ error_msg = 'HttpReader::Engine#read - Bad url: http://localhost/test, opts: {}'
190
+ expect(default_parser).to receive(:use_browser).and_raise('Bad')
191
+ expect(logger).to receive(:error).with(error_msg)
192
+ expect { subject.read(test_url) }.to raise_error(described_class::ReadError, 'Bad')
193
+ end
194
+
195
+ it 'in #request' do
196
+ error_msg = 'HttpReader::Engine#request - Bad '
197
+ response = described_class::DefaultResponse.new(nil, 500, 'Bad')
198
+ expect(default_parser).to receive(:use_browser).and_return(false)
199
+ expect(http_client).to receive(:get).with(test_url, {}).and_raise('Bad')
200
+ expect(default_parser).to receive(:parse).with(response, {})
201
+ expect(logger).to receive(:error).with(error_msg)
202
+ subject.read(test_url)
203
+ end
204
+
205
+ it 'in #browse' do
206
+ error_msg = 'HttpReader::Engine#browse - Bad '
207
+ response = described_class::DefaultResponse.new(nil, 500, 'Bad')
208
+ expect(default_parser).to receive(:use_browser).and_return(true)
209
+ expect(browser).to receive(:start).with(test_url).and_raise('Bad')
210
+ expect(default_parser).to receive(:parse).with(response, {})
211
+ expect(logger).to receive(:error).with(error_msg)
212
+ subject.read(test_url)
213
+ end
214
+ end
215
+ end
216
+ end
@@ -0,0 +1,33 @@
1
+ require 'spec_helper'
2
+
3
+ describe HttpReader::HashPageParser do
4
+ subject { described_class }
5
+
6
+ let(:pattern) { /^((http|https):\/\/).*$/ }
7
+
8
+ context 'self' do
9
+ it '#pattern cover every url' do
10
+ expect(subject.pattern).to eq pattern
11
+ end
12
+
13
+ context '#match' do
14
+ it 'returns true for url string' do
15
+ expect(subject.match('http://some_url')).to eq true
16
+ end
17
+
18
+ it 'returns false for non url string' do
19
+ expect(subject.match('some_fake_url')).to eq false
20
+ end
21
+ end
22
+
23
+ context '#parse' do
24
+ let(:body) { '<h1>Information</h1><p>not importante</p><div class="content">Items: <ul><li>A</li><li>B</li><li>C</li></ul></div>' }
25
+ let(:opts) { {title: 'h1', items: '.content li;array'} }
26
+ let(:response) { double('response', body: body) }
27
+ let(:result) { {:title=>"Information", :items=>%w{A B C}} }
28
+ it 'returns body' do
29
+ expect(subject.parse(response, opts)).to eq result
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,5 @@
1
+ require 'spec_helper'
2
+
3
+ describe HttpReader do
4
+
5
+ end
metadata ADDED
@@ -0,0 +1,210 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: http_reader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Paweł Niemczyk
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: httparty
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.13'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.13'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.6'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.6'
41
+ - !ruby/object:Gem::Dependency
42
+ name: headless
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: watir-webdriver
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.6'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.6'
69
+ - !ruby/object:Gem::Dependency
70
+ name: bundler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.3'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.3'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '3.1'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '3.1'
111
+ - !ruby/object:Gem::Dependency
112
+ name: guard-rspec
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: coveralls
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: awesome_print
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ description: Read page body and parse to specific data
154
+ email:
155
+ - pniemczyk@o2.pl
156
+ executables: []
157
+ extensions: []
158
+ extra_rdoc_files: []
159
+ files:
160
+ - ".gitignore"
161
+ - ".rspec"
162
+ - ".travis.yml"
163
+ - Gemfile
164
+ - Guardfile
165
+ - LICENSE.txt
166
+ - README.md
167
+ - Rakefile
168
+ - http_reader.gemspec
169
+ - lib/http_reader.rb
170
+ - lib/http_reader/base_page_matcher.rb
171
+ - lib/http_reader/base_page_parser.rb
172
+ - lib/http_reader/engine.rb
173
+ - lib/http_reader/hash_page_parser.rb
174
+ - lib/http_reader/version.rb
175
+ - spec/spec_helper.rb
176
+ - spec/unit/lib/http_reader/base_page_parser_spec.rb
177
+ - spec/unit/lib/http_reader/engine_spec.rb
178
+ - spec/unit/lib/http_reader/hash_page_parser_spec.rb
179
+ - spec/unit/lib/http_reader_spec.rb
180
+ homepage: https://github.com/pniemczyk/http_reader
181
+ licenses:
182
+ - MIT
183
+ metadata: {}
184
+ post_install_message: Do not forget install xvfb. Have fun !
185
+ rdoc_options: []
186
+ require_paths:
187
+ - lib
188
+ required_ruby_version: !ruby/object:Gem::Requirement
189
+ requirements:
190
+ - - ">="
191
+ - !ruby/object:Gem::Version
192
+ version: '0'
193
+ required_rubygems_version: !ruby/object:Gem::Requirement
194
+ requirements:
195
+ - - ">="
196
+ - !ruby/object:Gem::Version
197
+ version: '0'
198
+ requirements: []
199
+ rubyforge_project:
200
+ rubygems_version: 2.2.2
201
+ signing_key:
202
+ specification_version: 4
203
+ summary: Page parser
204
+ test_files:
205
+ - spec/spec_helper.rb
206
+ - spec/unit/lib/http_reader/base_page_parser_spec.rb
207
+ - spec/unit/lib/http_reader/engine_spec.rb
208
+ - spec/unit/lib/http_reader/hash_page_parser_spec.rb
209
+ - spec/unit/lib/http_reader_spec.rb
210
+ has_rdoc: