yelp_html_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d307dd8f4fd93a6e978423ea47a217551e0613cc
4
+ data.tar.gz: 70762f489e33191f2d168ec218a2d2229c107ea0
5
+ SHA512:
6
+ metadata.gz: 96ed5e332d5e5a23bd874f7de062e27bc41970bc31141a227c67038a2211915b1d512a6390a44641ad4d3d7ffacc635bcc21546e1e45910b772f1bb1bd3f2eb9
7
+ data.tar.gz: 530f063531f932dfd12b519d1b4516664b7b0d30327111de8e095d6bfc251fdae02c2d8143a8cf54972c85eda54f3d421408923a945fe1972f1cd1501a6030b4
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2015 ACHIINTO
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,32 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+ require 'rdoc/task'
8
+
9
+ RDoc::Task.new(:rdoc) do |rdoc|
10
+ rdoc.rdoc_dir = 'rdoc'
11
+ rdoc.title = 'YelpHtmlParser'
12
+ rdoc.options << '--line-numbers'
13
+ rdoc.rdoc_files.include('README.rdoc')
14
+ rdoc.rdoc_files.include('lib/**/*.rb')
15
+ end
16
+
17
+
18
+
19
+
20
+ Bundler::GemHelper.install_tasks
21
+
22
+ require 'rake/testtask'
23
+
24
+ Rake::TestTask.new(:test) do |t|
25
+ t.libs << 'lib'
26
+ t.libs << 'test'
27
+ t.pattern = 'test/**/*_test.rb'
28
+ t.verbose = false
29
+ end
30
+
31
+
32
+ task default: :test
@@ -0,0 +1,11 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'yelp_html_parser/business'
4
+ require 'yelp_html_parser/errors'
5
+ require 'yelp_html_parser/resource_parsers/business'
6
+ require 'yelp_html_parser/resources/business'
7
+ require 'yelp_html_parser/helpers/secondary_attributes_parser'
8
+
9
+ module YelpHtmlParser
10
+ YELP_DOMAIN = "http://www.yelp.com/"
11
+ end
@@ -0,0 +1,19 @@
1
+ module YelpHtmlParser
2
+ class Business
3
+ BUSINESS_SEARCH_RESULTS_CSS_PATH = '#super-container > div.container.indexed-biz-archive > div.clearfix.layout-block.layout-a.scroll-map-container.search-results-block > div.column.column-alpha > div > div.content > ul > li'
4
+
5
+ def self.search(sw_latitude, sw_longitude, ne_latitude, ne_longitude, start_index = 0)
6
+ link = YelpHtmlParser::YELP_DOMAIN +
7
+ "search?start=#{ start_index }&l=g:#{ sw_latitude },#{ sw_longitude },#{ ne_latitude },#{ ne_longitude }"
8
+ user_agent = "googlebot"
9
+ options = { 'User-Agent' => 'Googlebot', 'read_timeout' => '20' }
10
+ doc = Nokogiri::HTML(open(link, options))
11
+ content = doc.css(BUSINESS_SEARCH_RESULTS_CSS_PATH)
12
+ content.css('.search-result').map do |business_content|
13
+ YelpHtmlParser::ResourceParsers::Business.new(business_content).build_business
14
+ end
15
+ rescue OpenURI::HTTPError => e
16
+ fail YelpHtmlParser::Errors::NetworkError
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,6 @@
1
+ module YelpHtmlParser
2
+ module Errors
3
+ class NetworkError < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,14 @@
1
+ module YelpHtmlParser
2
+ module Helpers
3
+ class SecondaryAttributesParser
4
+ def initialize(attribute)
5
+ @attribute = attribute
6
+ end
7
+
8
+ def parse
9
+ result = /(?:[^\n\s]).*(?=\n)/.match(@attribute).to_s
10
+ result.gsub("<br>",", ")
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,25 @@
1
+ module YelpHtmlParser
2
+ module ResourceParsers
3
+ class Business
4
+
5
+ def initialize(business_data)
6
+ return unless business_data
7
+ @main_attributes = business_data.css('.main-attributes')
8
+ @secondary_attributes = business_data.css('.secondary-attributes')
9
+ end
10
+
11
+ def build_business
12
+ return nil unless @main_attributes
13
+ business = YelpHtmlParser::Resources::Business.new
14
+ business.business_name = @main_attributes.css('.biz-name').first.children[0].to_s
15
+ business.url = @main_attributes.css('.biz-name').first.attributes["href"].value
16
+ business.avatar = @main_attributes.css('.photo-box-img').first.attributes['src'].value
17
+ address_string = @secondary_attributes.first.css('address').children.to_s
18
+ phone_string = @secondary_attributes.first.css('.biz-phone').children.to_s
19
+ business.address = YelpHtmlParser::Helpers::SecondaryAttributesParser.new(address_string).parse
20
+ business.phone = YelpHtmlParser::Helpers::SecondaryAttributesParser.new(phone_string).parse
21
+ business
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,15 @@
1
+ module YelpHtmlParser
2
+ module Resources
3
+ class Business
4
+ attr_accessor :business_name, :url, :avatar, :address, :phone
5
+
6
+ def initialize
7
+ @business_name = nil
8
+ @url = nil
9
+ @avatar = nil
10
+ @address = nil
11
+ @phone = nil
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,3 @@
1
+ module YelpHtmlParser
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yelp_html_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Achiinto
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.2'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.2'
41
+ - !ruby/object:Gem::Dependency
42
+ name: webmock
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.20'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.20'
55
+ description: This allows fetching and parsing Yelp businesses data from html content
56
+ where otherwise not avaiblable.
57
+ email: []
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - MIT-LICENSE
63
+ - Rakefile
64
+ - lib/yelp_html_parser.rb
65
+ - lib/yelp_html_parser/business.rb
66
+ - lib/yelp_html_parser/errors.rb
67
+ - lib/yelp_html_parser/helpers/secondary_attributes_parser.rb
68
+ - lib/yelp_html_parser/resource_parsers/business.rb
69
+ - lib/yelp_html_parser/resources/business.rb
70
+ - lib/yelp_html_parser/version.rb
71
+ homepage: ''
72
+ licenses: []
73
+ metadata: {}
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 2.4.5
91
+ signing_key:
92
+ specification_version: 4
93
+ summary: To prase Yelp html content into consumable business objects.
94
+ test_files: []