pet_rescue-scraper 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e0869a2e7eca1d31e68d6b428c5c470a94c73958
4
+ data.tar.gz: 7ac8ef13bcdde87b2cd716ab7bf8e3e9835e5429
5
+ SHA512:
6
+ metadata.gz: 0be3f54185d3d0d4fb90aef48ac500be43ea1bf94705350c1998ddb49751e620f1a0470671b09c3bb80fcc85cbf22407009b69daec8c7dd648ae7545ebf6efe5
7
+ data.tar.gz: d059b42dd88c4d0aab3d00313af90d08eed1346088f218246fb29119f50febbe96411da22216b69dde0cd8c389543264fd9abc73e7b77057c2804c87bec2def7
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pet_rescue-scraper.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Alex Smith
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # PetRescue::Scraper
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'pet_rescue-scraper'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install pet_rescue-scraper
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/pet_rescue-scraper/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task default: :spec
@@ -0,0 +1,18 @@
1
+ require 'open-uri'
2
+
3
+ module PetRescue
4
+ # A listing page on petrescue.com.au
5
+ class ListingPage
6
+ attr_reader :id
7
+
8
+ def initialize(id, parser)
9
+ @id = id
10
+ @url = "http://www.petrescue.com.au/listings/#{@id}"
11
+ @parser = parser
12
+ end
13
+
14
+ def pet
15
+ @parser.parse(open(@url))
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,106 @@
1
+ require 'nokogiri'
2
+ require 'pet_rescue/pet'
3
+
4
+ module PetRescue
5
+ # Parses an HTML listing page from petrescue.com.au
6
+ class ListingPageParser
7
+ def parse(document)
8
+ @document = Nokogiri::HTML(document)
9
+
10
+ Pet.new(name: pet_name,
11
+ size: pet_size,
12
+ gender: pet_gender,
13
+ breed: pet_breed,
14
+ age: pet_age,
15
+ location: pet_location,
16
+ vaccinated: pet_vaccinated?,
17
+ desexed: pet_desexed?,
18
+ biography: pet_biography,
19
+ rescue_group: pet_rescue_group,
20
+ small_photo_url: pet_small_photo_url,
21
+ large_photo_url: pet_large_photo_url,
22
+ adoption_fee: pet_adoption_fee,
23
+ adoption_process: pet_adoption_process,
24
+ contact_name: pet_contact_name,
25
+ contact_number: pet_contact_number)
26
+ end
27
+
28
+ def pet_name
29
+ @document.at_css("#main h1").text.strip
30
+ end
31
+
32
+ def pet_size
33
+ species.split(" ")[0]
34
+ end
35
+
36
+ def pet_gender
37
+ species.split(" ")[1]
38
+ end
39
+
40
+ def pet_breed
41
+ species.split(" ").drop(2).join(" ")
42
+ end
43
+
44
+ def pet_age
45
+ @document.at_css("dd.age").text.strip
46
+ end
47
+
48
+ def pet_location
49
+ @document.at_css(".located_in").text.strip.gsub("Located in ", "").strip
50
+ end
51
+
52
+ def pet_vaccinated?
53
+ @document.at_css("dd.vaccinated").text.strip == "Yes"
54
+ end
55
+
56
+ def pet_desexed?
57
+ @document.at_css("dd.desexed").text.strip == "Yes"
58
+ end
59
+
60
+ def pet_biography
61
+ @document.at_css(".personality").text.strip
62
+ end
63
+
64
+ def pet_adoption_fee
65
+ @document.at_css("dd.adoption_fee").text.strip.gsub("$", "").to_i
66
+ end
67
+
68
+ def pet_adoption_process
69
+ node = @document.at_css(".adoption_process")
70
+ node && node.text.strip
71
+ end
72
+
73
+ def pet_rescue_group
74
+ @document.at_css("dd.fostered_by").text.strip
75
+ end
76
+
77
+ def pet_contact_name
78
+ node = @document.at_css("dd.contact_name")
79
+ node && node.text.strip
80
+ end
81
+
82
+ def pet_contact_number
83
+ node = @document.at_css("dt.contact_number")
84
+ node &&= node.next_element.children.first
85
+ node && node.text.strip
86
+ end
87
+
88
+ def pet_small_photo_url
89
+ photo_node && photo_node.at_css("img").attributes["src"].value
90
+ end
91
+
92
+ def pet_large_photo_url
93
+ photo_node && photo_node.at_css("a").attributes["href"].value
94
+ end
95
+
96
+ private
97
+
98
+ def photo_node
99
+ @document.at_css("#featured_photo")
100
+ end
101
+
102
+ def species
103
+ @document.at_css(".species").text.strip
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,26 @@
1
+ require 'virtus'
2
+
3
+ module PetRescue
4
+ # Model for a pet on petrescue.com.au
5
+ class Pet
6
+ include Virtus.model
7
+
8
+ attribute :id, String
9
+ attribute :name, String
10
+ attribute :size, String
11
+ attribute :gender, String
12
+ attribute :breed, String
13
+ attribute :age, String
14
+ attribute :location, String
15
+ attribute :vaccinated, Boolean
16
+ attribute :desexed, Boolean
17
+ attribute :biography, String
18
+ attribute :adoption_fee, Integer
19
+ attribute :adoption_process, String
20
+ attribute :rescue_group, String
21
+ attribute :contact_name , String
22
+ attribute :contact_number, String
23
+ attribute :small_photo_url, String
24
+ attribute :large_photo_url, String
25
+ end
26
+ end
@@ -0,0 +1,5 @@
1
+ module PetRescue
2
+ module Scraper
3
+ VERSION = "1.0.0"
4
+ end
5
+ end
@@ -0,0 +1,33 @@
1
+ require 'pet_rescue/scraper/version'
2
+ require 'pet_rescue/search_results_page'
3
+
4
+ module PetRescue
5
+ module Scraper
6
+ # A collection of dog listings on petrescue.com.au
7
+ class DogListings
8
+ include Enumerable
9
+
10
+ def initialize(per_page: 48)
11
+ @per_page = per_page
12
+ end
13
+
14
+ def each(&block)
15
+ search_page = SearchResultsPage.from_url(first_page_url)
16
+
17
+ loop do
18
+ search_page.listing_pages.each(&block)
19
+
20
+ break unless search_page.has_next_page?
21
+
22
+ search_page = search_page.next_page
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def first_page_url
29
+ "http://www.petrescue.com.au/listings/dogs?per_page=#{@per_page}&page=1"
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,46 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'pet_rescue/listing_page'
4
+ require 'pet_rescue/listing_page_parser'
5
+
6
+ module PetRescue
7
+ # A page of search results on petrescue.com.au
8
+ class SearchResultsPage
9
+ def self.from_url(url)
10
+ SearchResultsPage.new(open(url))
11
+ end
12
+
13
+ def initialize(document)
14
+ @document = Nokogiri::HTML(document)
15
+ end
16
+
17
+ def listing_pages
18
+ listing_ids.map do |id|
19
+ parser = PetRescue::ListingPageParser.new
20
+ ListingPage.new(id, parser)
21
+ end
22
+ end
23
+
24
+ def has_next_page?
25
+ !!next_page_href
26
+ end
27
+
28
+ def next_page
29
+ url = "http://www.petrescue.com.au#{next_page_href}"
30
+ SearchResultsPage.from_url(url)
31
+ end
32
+
33
+ private
34
+
35
+ def next_page_href
36
+ node = @document.at_css(".next a")
37
+ node && node[:href]
38
+ end
39
+
40
+ def listing_ids
41
+ @document
42
+ .css(".listing .name a")
43
+ .map { |node| Integer(node[:href].to_s.gsub("/listings/", "")) }
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'pet_rescue/scraper/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "pet_rescue-scraper"
8
+ spec.version = PetRescue::Scraper::VERSION
9
+ spec.authors = ["Alex Smith"]
10
+ spec.email = ["alex@thatalexguy.com"]
11
+ spec.summary = %q{Scraper for petrescue.com.au}
12
+ spec.description = %q{Allows scraping of pet listings from petrescue.com.au}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "nokogiri", "~> 1.6.3.1"
22
+ spec.add_dependency "virtus", "~> 1.0.3"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.7"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_development_dependency "rspec", "~> 3.1.0"
27
+ spec.add_development_dependency "vcr", "~> 2.9.3"
28
+ spec.add_development_dependency "webmock", "~> 1.19.0"
29
+ spec.add_development_dependency "simplecov", "~> 0.9.1"
30
+ end