taiwan_tours 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2e64121574b9a2cbacc8da265b95fdd1921e9671
4
+ data.tar.gz: 3e585b8ba243438ee3b007ffd1d214c2d81f6a02
5
+ SHA512:
6
+ metadata.gz: 55193252cc08899017ac0b8bad0157986b3efc452dcb5dc2789b1522425c513723c3fbdc1fede4a1a8568d18b2f889dcc1a44e9d9febe8070dbba6efa1b072fc
7
+ data.tar.gz: f7109e3c3f637f9598dc1bc8dda6c80efdce811a2d48817ef8cae7aecd949734ef1efdc1ae39760e18d776d46afad287efd7409c8c345fe217f2af8d06a1e855
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.gem
data/.travis.yml ADDED
@@ -0,0 +1,16 @@
1
+ language: ruby
2
+ rvm:
3
+ - ruby-head
4
+ - "2.2.3"
5
+ - "2.1.0"
6
+ - "1.9.3"
7
+ - jruby-head
8
+ - jruby-19mode
9
+ matrix:
10
+ allow_failures:
11
+ - rvm: jruby-head
12
+ branches:
13
+ only:
14
+ - master
15
+ # uncomment this line if your project needs to run something other than `rake`:
16
+ # script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,12 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Gems required
4
+ gem 'oga'
5
+ gem 'json'
6
+ gem 'minitest'
7
+ gem 'webmock'
8
+ gem 'vcr'
9
+
10
+ group :setup do
11
+ gem 'rake'
12
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2015 ZhongMeiZhou https://github.com/ZhongMeiZhou
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # taiwan_tours [![Build Status](https://travis-ci.org/ZhongMeiZhou/scraper_project.svg)](https://travis-ci.org/ZhongMeiZhou/scraper_project)
2
+
3
+ The lonelyplanet web scraper service traverses details of Taiwan tour packages in a easiest way and with simple steps.
4
+
5
+ Our company entirely respect the privacy policies of our partner Lonelyplanet declared on 'robots.txt' file.
6
+
7
+
8
+ ## Gem Usage
9
+
10
+ Install our fantastic gem using the following simple command on your command line:
11
+
12
+ ```sh
13
+ $ gem install taiwan_tours
14
+ ```
15
+
16
+ either, as our gem is published by Rubygems.org you can also include it in your 'Gemfile' as:
17
+
18
+ ```ruby
19
+ gem taiwan_tours
20
+ ```
21
+
22
+ ## Try it yourself!
23
+ Run it from the command line as follow:
24
+
25
+ ```sh
26
+ $ taiwan_tours
27
+ ```
28
+
29
+ or it can also be include it in your own Ruby code with the steps:
30
+
31
+ ```ruby
32
+ require 'taiwan_tours'
33
+ taiwan = LonelyPlanetScrape::LonelyPlanetTours.new
34
+ tours = taiwan.tours
35
+ ```
36
+
37
+ ## Want to make improvements?
38
+
39
+ 1. Fork it ( https://github.com/ZhongMeiZhou/scraper_project )
40
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
41
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
42
+ 4. Push to the branch (`git push origin my-new-feature`)
43
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require 'rake/testtask'
2
+
3
+ task :default => [:spec]
4
+
5
+ desc 'Run specs'
6
+ Rake::TestTask.new(name=:spec) do |t|
7
+ t.pattern = 'spec/*_spec.rb'
8
+ end
data/bin/taiwan_tours ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ require 'json'
3
+ require './lib/taiwan_tours/lonelyplanet_scrap'
4
+
5
+ begin
6
+ taiwan_tours = LonelyPlanetScrape::LonelyPlanetTours.new
7
+ tour_arr = JSON.parse(taiwan_tours.tours)
8
+ puts 'According to LonelyPlanet, these are the best tour packages in Taiwan:'
9
+
10
+ tour_arr.each do |hash|
11
+ puts "- #{hash['title']} for $#{hash['price']}"
12
+ end
13
+
14
+ rescue => e
15
+ puts "Error occured - see details: #{e}"
16
+ end
data/lib/.DS_Store ADDED
Binary file
@@ -0,0 +1,52 @@
1
+ require 'oga'
2
+ require 'open-uri'
3
+ require 'json'
4
+ require './lib/taiwan_tours/lonelyplanet_scrap'
5
+ # Test class practice for handling errors and checking for HTML structure changes
6
+ # can be used to test functionality before implementing it in Scraper Class
7
+ class LonelyPlanetToursTest < LonelyPlanetScrape::LonelyPlanetTours
8
+ # override super class initialize function
9
+ def initialize
10
+ test_parse_html
11
+ end
12
+
13
+ def test_tour
14
+ @tours ||= test_html_extraction
15
+ end
16
+
17
+ private
18
+
19
+ # test connection to external site
20
+ def test_parse_html
21
+ parse_html
22
+ rescue OpenURI::HTTPError => e
23
+ puts "HTTP request error: #{e}"
24
+ end
25
+
26
+ # test scraping service and also test for changes in structure of elements being traversed
27
+ def test_html_extraction
28
+ result = []
29
+ fail 'OOPS, root article may have been changed or removed' if @document.xpath(TOUR_XPATH_CARD).text.empty?
30
+ fail 'Title h1 tag may have been changed or removed' if @document.xpath(CARD_TITLE_XPATH).text.empty?
31
+ fail 'Price span tag may have been changed or removed' if @document.xpath(CARD_PRICE_AMOUNT_XPATH).text.empty?
32
+ fail 'Content div tag may have been changed or removed' if @document.xpath(CARD_CONTENT_XPATH).text.empty?
33
+
34
+ @document.xpath(TOUR_XPATH_CARD).map do |card|
35
+ element = {}
36
+ element['img'] = card.xpath(CARD_IMGLINK_XPATH).text
37
+ element['title'] = card.xpath(CARD_TITLE_XPATH).text.strip
38
+ element['content'] = card.xpath(CARD_CONTENT_XPATH).text.strip
39
+ element['location'] = card.xpath(CARD_LOCATION_XPATH).text
40
+ element['price_currency'] = card.xpath(CARD_PRICE_CURRENCY_XPATH).text
41
+ element['price'] = card.xpath(CARD_PRICE_AMOUNT_XPATH).text
42
+ result << element
43
+ end
44
+ result.to_json
45
+ rescue StandardError => e
46
+ puts e.message
47
+ puts e.backtrace.inspect
48
+ end
49
+ end
50
+
51
+ test_run = LonelyPlanetToursTest.new
52
+ puts test_run.test_tour
@@ -0,0 +1,52 @@
1
+ require 'oga'
2
+ require 'open-uri'
3
+ require 'json'
4
+
5
+ # Module defines LonelyPlanetTours class which handles scraping of lonelyplanet Taiwan tours page
6
+ module LonelyPlanetScrape
7
+ class LonelyPlanetTours
8
+ # Define constants needed for scraping
9
+ # Please if the test for uri fails replace contant manually
10
+ LONELYPLANET_URL = 'http://www.lonelyplanet.com'
11
+ TOUR_RELATIVE_DIR = 'taiwan/tours'
12
+
13
+ TOUR_XPATH_CARD = "//article[contains(@class,'card')]"
14
+ CARD_IMGLINK_XPATH = ".//img[contains(@class,'card__figure__img')]/@src"
15
+ CARD_TITLE_XPATH = './/h1'
16
+ CARD_CONTENT_XPATH = ".//div[contains(@class,'card__content__desc')]//p"
17
+ CARD_LINK_XPATH = ".//div[contains(@class,'card__mask')]//a"
18
+ CARD_LOCATION_XPATH = ".//div[contains(@class,'card__footer__locale')]"
19
+ CARD_PRICE_CURRENCY_XPATH = ".//span[contains(@class,'js-currency')]"
20
+ CARD_PRICE_AMOUNT_XPATH = ".//span[contains(@class,'js-price')]"
21
+
22
+ def initialize
23
+ parse_html
24
+ end
25
+
26
+ def tours
27
+ @tours ||= extract_tours
28
+ end
29
+
30
+ private
31
+
32
+ def parse_html
33
+ url = "#{LONELYPLANET_URL}/#{TOUR_RELATIVE_DIR}"
34
+ @document = Oga.parse_html(open(url))
35
+ end
36
+
37
+ def extract_tours
38
+ result = []
39
+ @document.xpath(TOUR_XPATH_CARD).map do |card|
40
+ element = {}
41
+ element['img'] = card.xpath(CARD_IMGLINK_XPATH).text
42
+ element['title'] = card.xpath(CARD_TITLE_XPATH).text.strip
43
+ element['content'] = card.xpath(CARD_CONTENT_XPATH).text.strip
44
+ element['location'] = card.xpath(CARD_LOCATION_XPATH).text
45
+ element['price_currency'] = card.xpath(CARD_PRICE_CURRENCY_XPATH).text
46
+ element['price'] = card.xpath(CARD_PRICE_AMOUNT_XPATH).text
47
+ result << element
48
+ end
49
+ result.to_json
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,5 @@
1
+ # Versioning
2
+ module LonelyPlanetScrape
3
+ VERSION = '0.1.0'
4
+ DATE = '2015-10-16'
5
+ end
@@ -0,0 +1,2 @@
1
+ require 'taiwan_tours/lonelyplanet_scrap.rb'
2
+ require 'taiwan_tours/version.rb'