pet_rescue-scraper 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +31 -0
- data/Rakefile +6 -0
- data/lib/pet_rescue/listing_page.rb +18 -0
- data/lib/pet_rescue/listing_page_parser.rb +106 -0
- data/lib/pet_rescue/pet.rb +26 -0
- data/lib/pet_rescue/scraper/version.rb +5 -0
- data/lib/pet_rescue/scraper.rb +33 -0
- data/lib/pet_rescue/search_results_page.rb +46 -0
- data/pet_rescue-scraper.gemspec +30 -0
- data/spec/fixtures/dog_search_first_page.html +1492 -0
- data/spec/fixtures/dog_search_last_page.html +742 -0
- data/spec/fixtures/mau.html +354 -0
- data/spec/fixtures/muttley.html +402 -0
- data/spec/fixtures/vcr_cassettes/dogs.yml +3743 -0
- data/spec/fixtures/wyatt.html +375 -0
- data/spec/pet_rescue/listing_page_parser_spec.rb +140 -0
- data/spec/pet_rescue/listing_page_spec.rb +20 -0
- data/spec/pet_rescue/scraper_spec.rb +15 -0
- data/spec/pet_rescue/search_results_page_spec.rb +32 -0
- data/spec/spec_helper.rb +19 -0
- data/spec/support/vcr.rb +7 -0
- metadata +192 -0
@@ -0,0 +1,375 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<!--[if lt IE 7 ]> <html lang='en' class='ie6 no-js oldie'> <![endif]-->
|
3
|
+
<!--[if IEMobile 7 ]> <html lang='en' class='iemob7 no-js oldie'> <![endif]-->
|
4
|
+
<!--[if IE 7 ]> <html lang='en' class='ie7 no-js oldie'> <![endif]-->
|
5
|
+
<!--[if IE 8 ]> <html lang='en' class='ie8 no-js oldie'> <![endif]-->
|
6
|
+
<!--[if IE 9 ]> <html lang='en' class='ie9 no-js oldie'> <![endif]-->
|
7
|
+
<!--[if (gt IE 9)|!(IE)]><!--> <html lang='en' class='no-js'> <!--<![endif]-->
|
8
|
+
<head><script type="text/javascript">var NREUMQ=NREUMQ||[];NREUMQ.push(["mark","firstbyte",new Date().getTime()]);</script>
|
9
|
+
<meta charset='utf-8'>
|
10
|
+
<title>Wyatt - Medium Male American Staffordshire Terrier Mix in NSW - PetRescue
|
11
|
+
</title>
|
12
|
+
<link href="/favicon.ico" rel="shortcut icon" type="image/vnd.microsoft.icon" />
|
13
|
+
<link href='http://cdn.petrescue.com.au/uploads/pet_photos/2014/9/22/319860_a1ce4_340x340_004c5.jpg' rel='image_src'>
|
14
|
+
<meta content='This little guy is a real cuddle bug. He will be energetic and strong, so obedience is a must, with this he will become a wonderful calm and confid...' name='description'>
|
15
|
+
|
16
|
+
<link href="/assets/application-339bb9cdfdf7da48f8b19302f2e3524d.css" media="all" rel="stylesheet" type="text/css" />
|
17
|
+
<link href="/assets/secondary-b77f59b7435b23a203fa92dd941b6987.css" media="all" rel="stylesheet" type="text/css" />
|
18
|
+
|
19
|
+
<script src="/assets/modernizr-c3fe5867e320146b2ec8807eda3321fe.js" type="text/javascript"></script>
|
20
|
+
<script src="http://w.sharethis.com/button/buttons.js" type="text/javascript"></script>
|
21
|
+
<script>
|
22
|
+
var __st_loadLate=true;
|
23
|
+
stLight.options({publisher: "999ee18a-45b3-4e4f-8c53-62f45e1fa414"});
|
24
|
+
</script>
|
25
|
+
|
26
|
+
<!--[if IE 6]>
|
27
|
+
<script src="/assets/dd_belatedpng-79ea5b16160a7f4ea3e45ee41b7109c1.js" type="text/javascript"></script>
|
28
|
+
<![endif]-->
|
29
|
+
<meta content="authenticity_token" name="csrf-param" />
|
30
|
+
<meta content="xcJM373IDsRQg+72Y0+ZW5jbyXY9eUcRqoRA38E8XL0=" name="csrf-token" />
|
31
|
+
<script>
|
32
|
+
setTimeout(function(){var a=document.createElement("script");
|
33
|
+
var b=document.getElementsByTagName("script")[0];
|
34
|
+
a.src=document.location.protocol+"//dnn506yrbagrg.cloudfront.net/pages/scripts/0013/9890.js?"+Math.floor(new Date().getTime()/3600000);
|
35
|
+
a.async=true;a.type="text/javascript";b.parentNode.insertBefore(a,b)}, 1);
|
36
|
+
</script>
|
37
|
+
<script>
|
38
|
+
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
39
|
+
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
40
|
+
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
41
|
+
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
|
42
|
+
|
43
|
+
ga('create', 'UA-288342-1', 'auto'); // Replace with your property ID.
|
44
|
+
ga('send', 'pageview');
|
45
|
+
</script>
|
46
|
+
|
47
|
+
</head>
|
48
|
+
<body class='public has-sidebar listings listings-show listings-controller show-action'>
|
49
|
+
<div id='fb-root'></div>
|
50
|
+
<script>
|
51
|
+
window.fbAsyncInit = function() {
|
52
|
+
FB.init({
|
53
|
+
appId: "408746099178659",
|
54
|
+
channelUrl: "http://www.petrescue.com.au/channel.html",
|
55
|
+
status: true,
|
56
|
+
cookie: true,
|
57
|
+
xfbml: true
|
58
|
+
});
|
59
|
+
};
|
60
|
+
|
61
|
+
(function(d){
|
62
|
+
var js, id = 'facebook-jssdk', ref = d.getElementsByTagName('script')[0];
|
63
|
+
if (d.getElementById(id)) {return;}
|
64
|
+
js = d.createElement('script'); js.id = id; js.async = true;
|
65
|
+
js.src = "//connect.facebook.net/en_US/all.js";
|
66
|
+
ref.parentNode.insertBefore(js, ref);
|
67
|
+
}(document));
|
68
|
+
</script>
|
69
|
+
|
70
|
+
<script>
|
71
|
+
var googletag = googletag || {};
|
72
|
+
googletag.cmd = googletag.cmd || [];
|
73
|
+
(function() {
|
74
|
+
var gads = document.createElement('script');
|
75
|
+
gads.async = true;
|
76
|
+
gads.type = 'text/javascript';
|
77
|
+
var useSSL = 'https:' == document.location.protocol;
|
78
|
+
gads.src = (useSSL ? 'https:' : 'http:') +
|
79
|
+
'//www.googletagservices.com/tag/js/gpt.js';
|
80
|
+
var node = document.getElementsByTagName('script')[0];
|
81
|
+
node.parentNode.insertBefore(gads, node);
|
82
|
+
})();
|
83
|
+
</script>
|
84
|
+
|
85
|
+
<!--[if lt IE 8]>
|
86
|
+
<div id='browser-upgrade'>
|
87
|
+
<div class='inner'>You are using an outdated web browser which is not fully supported by PetRescue.<br />Please consider upgrading to <a href="http://browsehappy.com/" target="_blank">a faster, more secure browser</a>.</div>
|
88
|
+
</div>
|
89
|
+
<![endif]-->
|
90
|
+
<header id='header' role='banner'>
|
91
|
+
<div class='wrapper'>
|
92
|
+
<h1 id='logo'><a href="/">PetRescue</a></h1>
|
93
|
+
<div class='no_user_signed_in' id='current-user'>
|
94
|
+
<div class='avatar'></div>
|
95
|
+
<div class='name'><a href="/users/sign_in" class="login" data-remote="true">Log In</a></div>
|
96
|
+
<a href="/users/sign_up" class="register">Sign Up</a>
|
97
|
+
</div>
|
98
|
+
|
99
|
+
</div>
|
100
|
+
</header>
|
101
|
+
<nav id='navigation' role='navigation'>
|
102
|
+
<div class='wrapper'>
|
103
|
+
<ul>
|
104
|
+
<li><a href="/">Home</a></li>
|
105
|
+
<li>
|
106
|
+
<span class='dropdown'>Find A Pet</span>
|
107
|
+
<div class='pointer'>
|
108
|
+
<ul>
|
109
|
+
<li class='dogs'><a href="/listings/dogs">Dogs</a></li>
|
110
|
+
<li class='cats'><a href="/listings/cats">Cats</a></li>
|
111
|
+
<li class='other'><a href="/listings/other">Other Pets</a></li>
|
112
|
+
</ul>
|
113
|
+
</div>
|
114
|
+
</li>
|
115
|
+
<li><a href="/about">About Us</a></li>
|
116
|
+
<li><a href="/rescue_directory">Rescue Directory</a></li>
|
117
|
+
<li><a href="/library">Library</a></li>
|
118
|
+
<li><a href="/faq">FAQ</a></li>
|
119
|
+
<li><a href="/get-involved">Get Involved</a></li>
|
120
|
+
<li><a href="/contact">Contact</a></li>
|
121
|
+
</ul>
|
122
|
+
|
123
|
+
</div>
|
124
|
+
</nav>
|
125
|
+
|
126
|
+
<div id='content-wrap'>
|
127
|
+
<div class="ad-970x90 ad-gts" id="div-gpt-ad-1398134396986-0"></div>
|
128
|
+
|
129
|
+
<div class='wrapper'>
|
130
|
+
<div id='notices'>
|
131
|
+
</div>
|
132
|
+
|
133
|
+
<div id='main'>
|
134
|
+
|
135
|
+
<h1>
|
136
|
+
Wyatt
|
137
|
+
</h1>
|
138
|
+
<div id='primary'>
|
139
|
+
<article id='listing_content'>
|
140
|
+
<div class='actions'>
|
141
|
+
<div class='add_remove_favourites'><form accept-charset="UTF-8" action="/users/favourites" method="post"><div style="margin:0;padding:0;display:inline"><input name="utf8" type="hidden" value="✓" /><input name="authenticity_token" type="hidden" value="xcJM373IDsRQg+72Y0+ZW5jbyXY9eUcRqoRA38E8XL0=" /></div><input id="favourites_listing_id319860" name="favourites[listing_id]" type="hidden" value="319860" /><input class="add_to_favourites" name="add_to_favourites" type="submit" value="Favourite" /></form>
|
142
|
+
</div>
|
143
|
+
|
144
|
+
</div>
|
145
|
+
<div class='share_listing'>
|
146
|
+
<h5>Share</h5>
|
147
|
+
<span class='share_button st_facebook_custom'>
|
148
|
+
<span class='icon'></span>
|
149
|
+
</span>
|
150
|
+
<span class='share_button st_twitter_custom'>
|
151
|
+
<span class='icon'></span>
|
152
|
+
</span>
|
153
|
+
<span class='share_button st_googleplus_custom'>
|
154
|
+
<span class='icon'></span>
|
155
|
+
</span>
|
156
|
+
<span class='share_button email_custom' data-link-to='/listings/319860/share'>
|
157
|
+
<span class='icon'></span>
|
158
|
+
Email
|
159
|
+
</span>
|
160
|
+
<script>
|
161
|
+
var el = document.querySelector(".st_facebook_custom");
|
162
|
+
el.setAttribute("displaytext", "Facebook");
|
163
|
+
el.setAttribute("st_via", "PetRescue");
|
164
|
+
el.setAttribute("st_url", "http://www.petrescue.com.au/listings/319860");
|
165
|
+
|
166
|
+
var el = document.querySelector(".st_twitter_custom");
|
167
|
+
el.setAttribute("displaytext", "Twitter");
|
168
|
+
el.setAttribute("st_via", "PetRescue");
|
169
|
+
el.setAttribute("st_url", "http://www.petrescue.com.au/listings/319860");
|
170
|
+
|
171
|
+
var el = document.querySelector(".st_googleplus_custom");
|
172
|
+
el.setAttribute("displaytext", "Google +");
|
173
|
+
el.setAttribute("st_via", "PetRescue");
|
174
|
+
el.setAttribute("st_url", "http://www.petrescue.com.au/listings/319860");
|
175
|
+
|
176
|
+
var el = document.querySelector(".email_custom");
|
177
|
+
el.setAttribute("displaytext", "Email");
|
178
|
+
</script>
|
179
|
+
<span>
|
180
|
+
<a href="/listings/319860/adoption_poster" id="print_adoption_poster" target="_blank">Print Adoption Poster</a>
|
181
|
+
</span>
|
182
|
+
|
183
|
+
</div>
|
184
|
+
<h2 class='species'>Medium Male American Staffordshire Terrier Mix</h2>
|
185
|
+
<h4 class='located_in'>Located in New South Wales</h4>
|
186
|
+
<div class='personality'><p>This little guy is a real cuddle bug. He will be energetic and strong, so obedience is a must, with this he will become a wonderful calm and confident member of the family and will love spending time indoors as well as outdoors. He will be ideal for an active family with or without children. A good size yard is preferable but not essential, a smaller yard is ok as long as you make time for daily walks and play time. He loves water and toys and get along very well with all his puppy playmates. Meal time is one of his favorite time of day and he is happy to share with all his puppy friends. </p></div>
|
187
|
+
|
188
|
+
|
189
|
+
|
190
|
+
<h3>Wyatt’s details</h3>
|
191
|
+
<dl class='pets-details'>
|
192
|
+
<dt class='first age'>Age:</dt>
|
193
|
+
<dd class='first age'>3 months</dd>
|
194
|
+
<dt class='adoption_fee'>Adoption Fee</dt>
|
195
|
+
<dd class='adoption_fee'>$500</dd>
|
196
|
+
<dt class='desexed'>Desexed?</dt>
|
197
|
+
<dd class='desexed'><span class="boolean-image-true boolean-image-yes">Yes</span></dd>
|
198
|
+
<dt class='vaccinated'>Vaccinated?</dt>
|
199
|
+
<dd class='vaccinated'><span class="boolean-image-true boolean-image-yes">Yes</span></dd>
|
200
|
+
<dt class='wormed'>Wormed?</dt>
|
201
|
+
<dd class='wormed'><span class="boolean-image-true boolean-image-yes">Yes</span></dd>
|
202
|
+
<dt class='heart_worm_treated'>Heart Worm Treated?</dt>
|
203
|
+
<dd class='heart_worm_treated'><span class="boolean-image-false boolean-image-no">No</span></dd>
|
204
|
+
<dt class='fostered_by'>Rescue Group:</dt>
|
205
|
+
<dd class='fostered_by'><a href="/groups/10276">J&J Rescue</a></dd>
|
206
|
+
<dt class='contact_name'>Contact:</dt>
|
207
|
+
<dd class='contact_name'>Donna</dd>
|
208
|
+
<dt class='contact_number'>Phone:</dt>
|
209
|
+
<dd>
|
210
|
+
0410510308
|
211
|
+
<strong>- preferred</strong>
|
212
|
+
</dd>
|
213
|
+
<dt class='contact_email'>Email:</dt>
|
214
|
+
<dd class='contact_email'>
|
215
|
+
<a href="/listings/319860/enquire">Send enquiry email</a>
|
216
|
+
</dd>
|
217
|
+
</dl>
|
218
|
+
<div class='poster_with_id'>
|
219
|
+
<div class='additional_details'>
|
220
|
+
<h3 class='animal_id'>
|
221
|
+
PetRescue ID:
|
222
|
+
<span id='listing_id'>319860</span>
|
223
|
+
</h3>
|
224
|
+
</div>
|
225
|
+
</div>
|
226
|
+
|
227
|
+
|
228
|
+
</article>
|
229
|
+
</div>
|
230
|
+
<div id='secondary'>
|
231
|
+
<div class='adopt_this_pet_container'>
|
232
|
+
<a href="/listings/319860/contact_details" id="adopt_this_pet">Find out more about this pet</a>
|
233
|
+
|
234
|
+
</div>
|
235
|
+
<div id='contact_information'>
|
236
|
+
<h3>Contact</h3>
|
237
|
+
<p class='contact_name'>Donna</p>
|
238
|
+
<p class='contact_number'>0410510308</p>
|
239
|
+
<p class='contact_email'>jandjrescue5@gmail.com</p>
|
240
|
+
<p class='contact_preferred_method'>
|
241
|
+
Preferred contact:
|
242
|
+
Phone
|
243
|
+
</p>
|
244
|
+
|
245
|
+
</div>
|
246
|
+
<div id='pet_images'>
|
247
|
+
<div id='featured_photo'>
|
248
|
+
<a href="http://cdn.petrescue.com.au/uploads/pet_photos/2014/9/22/319860_a1ce4_900x900_004c5.jpg" class="fancybox" data-fancybox-group="listing"><img alt="Photo of Wyatt" src="http://cdn.petrescue.com.au/uploads/pet_photos/2014/9/22/319860_a1ce4_340x340_004c5.jpg" /></a>
|
249
|
+
</div>
|
250
|
+
<ul id='thumbnails'>
|
251
|
+
</ul>
|
252
|
+
</div>
|
253
|
+
|
254
|
+
|
255
|
+
<div class="ad-336x280 ad-gts" id="div-gpt-ad-1392619022747-0" margin-bottom="10px"></div>
|
256
|
+
<p class='last_updated_at'>Last updated on <time datetime="2014-10-02T19:45:10+08:00">October 02, 2014 19:45</time></p>
|
257
|
+
<p class='view_count' id='views'>This listing has been viewed 273 times</p>
|
258
|
+
<a href="/listings/319860/report" id="report_listing">Report listing</a>
|
259
|
+
</div>
|
260
|
+
|
261
|
+
|
262
|
+
</div>
|
263
|
+
<div id='sidebar'>
|
264
|
+
<section class='make-a-donation'>
|
265
|
+
<p>PetRescue is a not-for-profit organisation. We rely wholly on the kindness of pet lovers like you to help us save lives. Please donate today.</p>
|
266
|
+
<a href="/donate">Make A Donation</a>
|
267
|
+
</section>
|
268
|
+
<nav class='listing-selection'>
|
269
|
+
<h4>Find a pet</h4>
|
270
|
+
<ul>
|
271
|
+
<li class='dogs'><a href="/listings/dogs">Dogs</a></li>
|
272
|
+
<li class='cats'><a href="/listings/cats">Cats</a></li>
|
273
|
+
<li class='other'><a href="/listings/other">Other Pets</a></li>
|
274
|
+
</ul>
|
275
|
+
</nav>
|
276
|
+
<section id='find-by-petrescue-id'>
|
277
|
+
<form action='#'>
|
278
|
+
<label for='animal_id'>PetRescue ID</label>
|
279
|
+
<input id='animal_id' placeholder='PetRescue ID' type='text'>
|
280
|
+
</form>
|
281
|
+
<a href="/what_is_a_petrescue_id?layout=false" class="fancybox">What is a PetRescue ID?</a>
|
282
|
+
</section>
|
283
|
+
|
284
|
+
|
285
|
+
|
286
|
+
<div class="ad-160x600 ad-gts" id="div-gpt-ad-1371512488971-0"></div>
|
287
|
+
|
288
|
+
</div>
|
289
|
+
</div>
|
290
|
+
|
291
|
+
</div>
|
292
|
+
|
293
|
+
<div id='footer'>
|
294
|
+
<nav class='footer-navigation' role='navigation'>
|
295
|
+
<ul>
|
296
|
+
<li><a href="/">Home</a></li>
|
297
|
+
<li>
|
298
|
+
<span class='dropdown'>Find A Pet</span>
|
299
|
+
<div class='pointer'>
|
300
|
+
<ul>
|
301
|
+
<li class='dogs'><a href="/listings/dogs">Dogs</a></li>
|
302
|
+
<li class='cats'><a href="/listings/cats">Cats</a></li>
|
303
|
+
<li class='other'><a href="/listings/other">Other Pets</a></li>
|
304
|
+
</ul>
|
305
|
+
</div>
|
306
|
+
</li>
|
307
|
+
<li><a href="/about">About Us</a></li>
|
308
|
+
<li><a href="/rescue_directory">Rescue Directory</a></li>
|
309
|
+
<li><a href="/library">Library</a></li>
|
310
|
+
<li><a href="/faq">FAQ</a></li>
|
311
|
+
<li><a href="/get-involved">Get Involved</a></li>
|
312
|
+
<li><a href="/contact">Contact</a></li>
|
313
|
+
</ul>
|
314
|
+
|
315
|
+
</nav>
|
316
|
+
<nav class='footer-sponsors' role='navigation'>
|
317
|
+
<div class='all-sponsors'>
|
318
|
+
<div class='primary'>
|
319
|
+
<h4>Major partners</h4>
|
320
|
+
<ul>
|
321
|
+
<li>
|
322
|
+
<a href="http://www.pedigree.com.au" id="pedigree" target="_blank">Pedigree</a>
|
323
|
+
</li>
|
324
|
+
</ul>
|
325
|
+
</div>
|
326
|
+
<div class='secondary'>
|
327
|
+
<h4>Partners</h4>
|
328
|
+
<ul>
|
329
|
+
<li>
|
330
|
+
<a href="http://www.jetpets.com.au" id="jetpets" target="_blank">Jetpets</a>
|
331
|
+
</li>
|
332
|
+
<li>
|
333
|
+
<a href="http://www.thefrontiergroup.com.au" id="tfg" target="_blank">The Frontier Group</a>
|
334
|
+
</li>
|
335
|
+
<li>
|
336
|
+
<a href="/our_partners" id="our-sponsors" target="_blank">Find out more about our partners</a>
|
337
|
+
</li>
|
338
|
+
</ul>
|
339
|
+
</div>
|
340
|
+
</div>
|
341
|
+
</nav>
|
342
|
+
<div class='footer-copyright'>
|
343
|
+
<div class='copyright-terms-privacy'>
|
344
|
+
<small class='copyright'>© PetRescue Ltd 2004 - 2014</small>
|
345
|
+
<a href="/terms_of_use">Terms of Use</a>
|
346
|
+
<p>&</p>
|
347
|
+
<a href="/privacy">Privacy Policy</a>
|
348
|
+
</div>
|
349
|
+
</div>
|
350
|
+
</div>
|
351
|
+
<script src="/assets/application-820417cd97b2fc12c79b62c348928d41.js" type="text/javascript"></script>
|
352
|
+
|
353
|
+
<script type="text/javascript">
|
354
|
+
googletag.cmd.push(function(){
|
355
|
+
googletag.defineSlot('/4905769/PetRescue_v3_ListingPages_BTF_336x280', [336, 280], 'div-gpt-ad-1392619022747-0').addService(googletag.pubads());googletag.defineSlot('/4905769/PetRescue_v3_BelowNav_ATF_970x90_970x90', [970, 90], 'div-gpt-ad-1398134396986-0').addService(googletag.pubads());googletag.defineSlot('/4905769/PetRescue_v3_Right_ATF_160x600_160x600', [160, 600], 'div-gpt-ad-1371512488971-0').addService(googletag.pubads());
|
356
|
+
googletag.pubads().enableSingleRequest();
|
357
|
+
googletag.pubads().enableAsyncRendering();
|
358
|
+
googletag.enableServices();
|
359
|
+
googletag.display('div-gpt-ad-1392619022747-0');googletag.display('div-gpt-ad-1398134396986-0');googletag.display('div-gpt-ad-1371512488971-0');
|
360
|
+
});
|
361
|
+
</script>
|
362
|
+
|
363
|
+
<script type="text/javascript">if (!NREUMQ.f) { NREUMQ.f=function() {
|
364
|
+
NREUMQ.push(["load",new Date().getTime()]);
|
365
|
+
var e=document.createElement("script");
|
366
|
+
e.type="text/javascript";
|
367
|
+
e.src=(("http:"===document.location.protocol)?"http:":"https:") + "//" +
|
368
|
+
"js-agent.newrelic.com/nr-100.js";
|
369
|
+
document.body.appendChild(e);
|
370
|
+
if(NREUMQ.a)NREUMQ.a();
|
371
|
+
};
|
372
|
+
NREUMQ.a=window.onload;window.onload=NREUMQ.f;
|
373
|
+
};
|
374
|
+
NREUMQ.push(["nrfj","beacon-6.newrelic.com","15830f52ae","792648","JldYFkILClQBQk4IWRZMXwxXF0lLDF8W",0,113,new Date().getTime(),"","","","",""]);</script></body>
|
375
|
+
</html>
|
@@ -0,0 +1,140 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'pet_rescue/listing_page_parser'
|
3
|
+
|
4
|
+
describe PetRescue::ListingPageParser do
|
5
|
+
subject(:parser) { PetRescue::ListingPageParser.new }
|
6
|
+
|
7
|
+
describe "#parse" do
|
8
|
+
it "extracts the pet's name" do
|
9
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
10
|
+
|
11
|
+
expect(pet.name).to eq("Wyatt")
|
12
|
+
end
|
13
|
+
|
14
|
+
it "extracts the pet's size" do
|
15
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
16
|
+
|
17
|
+
expect(pet.size).to eq("Medium")
|
18
|
+
end
|
19
|
+
|
20
|
+
it "extracts the pet's gender" do
|
21
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
22
|
+
|
23
|
+
expect(pet.gender).to eq("Male")
|
24
|
+
end
|
25
|
+
|
26
|
+
it "extracts the pet's breed" do
|
27
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
28
|
+
|
29
|
+
expect(pet.breed).to eq("American Staffordshire Terrier Mix")
|
30
|
+
end
|
31
|
+
|
32
|
+
it "extracts the pet's age" do
|
33
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
34
|
+
|
35
|
+
expect(pet.age).to eq("3 months")
|
36
|
+
end
|
37
|
+
|
38
|
+
it "extracts the pet's location" do
|
39
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
40
|
+
|
41
|
+
expect(pet.location).to eq("New South Wales")
|
42
|
+
end
|
43
|
+
|
44
|
+
it "extracts whether the pet has been vaccinated" do
|
45
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
46
|
+
|
47
|
+
expect(pet.vaccinated?).to eq(true)
|
48
|
+
end
|
49
|
+
|
50
|
+
it "extracts whether the pet has been desexed" do
|
51
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
52
|
+
|
53
|
+
expect(pet.desexed?).to eq(true)
|
54
|
+
end
|
55
|
+
|
56
|
+
it "extracts a biography of the pet" do
|
57
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
58
|
+
|
59
|
+
expect(pet.biography).to eq(<<-BIO.strip.gsub(/\s+/, ' '))
|
60
|
+
This little guy is a real cuddle bug. He will be energetic and strong,
|
61
|
+
so obedience is a must, with this he will become a wonderful calm and
|
62
|
+
confident member of the family and will love spending time indoors as
|
63
|
+
well as outdoors. He will be ideal for an active family with or
|
64
|
+
without children. A good size yard is preferable but not essential, a
|
65
|
+
smaller yard is ok as long as you make time for daily walks and play
|
66
|
+
time. He loves water and toys and get along very well with all his
|
67
|
+
puppy playmates. Meal time is one of his favorite time of day and he is
|
68
|
+
happy to share with all his puppy friends.
|
69
|
+
BIO
|
70
|
+
end
|
71
|
+
|
72
|
+
it "extracts the name of the pet rescue group" do
|
73
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
74
|
+
|
75
|
+
expect(pet.rescue_group).to eq("J&J Rescue")
|
76
|
+
end
|
77
|
+
|
78
|
+
it "extracts the URL of a small photo of the pet" do
|
79
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
80
|
+
|
81
|
+
expect(pet.small_photo_url).to eq("http://cdn.petrescue.com.au/uploads/pet_photos/2014/9/22/319860_a1ce4_340x340_004c5.jpg")
|
82
|
+
end
|
83
|
+
|
84
|
+
it "extracts the URL of a large photo of the pet" do
|
85
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
86
|
+
|
87
|
+
expect(pet.large_photo_url).to eq("http://cdn.petrescue.com.au/uploads/pet_photos/2014/9/22/319860_a1ce4_900x900_004c5.jpg")
|
88
|
+
end
|
89
|
+
|
90
|
+
it "extracts the pet adoption fee for the pet" do
|
91
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
92
|
+
|
93
|
+
expect(pet.adoption_fee).to eq(500)
|
94
|
+
end
|
95
|
+
|
96
|
+
it "handles the absence of a '$' in the pet adoption fee" do
|
97
|
+
pet = parser.parse(read_fixture("muttley.html"))
|
98
|
+
|
99
|
+
expect(pet.adoption_fee).to eq(300)
|
100
|
+
end
|
101
|
+
|
102
|
+
it "extracts the adoption process for the pet when present" do
|
103
|
+
pet = parser.parse(read_fixture("mau.html"))
|
104
|
+
|
105
|
+
expect(pet.adoption_process).to eq(<<-PROCESS.strip.gsub(/\s+/, ' '))
|
106
|
+
For more information about Mau please contact Sydney Dogs and Cats Home
|
107
|
+
PROCESS
|
108
|
+
end
|
109
|
+
|
110
|
+
it "handles the absence of a pet adoption process " do
|
111
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
112
|
+
|
113
|
+
expect(pet.adoption_process).to eq(nil)
|
114
|
+
end
|
115
|
+
|
116
|
+
it "extracts a contact name for the pet" do
|
117
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
118
|
+
|
119
|
+
expect(pet.contact_name).to eq("Donna")
|
120
|
+
end
|
121
|
+
|
122
|
+
it "handles the absence of a contact name" do
|
123
|
+
pet = parser.parse(read_fixture("mau.html"))
|
124
|
+
|
125
|
+
expect(pet.contact_name).to eq(nil)
|
126
|
+
end
|
127
|
+
|
128
|
+
it "extracts a contact number for the pet" do
|
129
|
+
pet = parser.parse(read_fixture("wyatt.html"))
|
130
|
+
|
131
|
+
expect(pet.contact_number).to eq("0410510308")
|
132
|
+
end
|
133
|
+
|
134
|
+
it "returns nil when a contact number is not specified" do
|
135
|
+
pet = parser.parse(read_fixture("mau.html"))
|
136
|
+
|
137
|
+
expect(pet.contact_number).to eq(nil)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'pet_rescue/listing_page'
|
3
|
+
|
4
|
+
describe PetRescue::ListingPage do
|
5
|
+
describe "#pet" do
|
6
|
+
it "passes the listing resource for the given ID to the given parser" do
|
7
|
+
resource = spy(:resource)
|
8
|
+
parser = spy(:parser)
|
9
|
+
id = 123
|
10
|
+
|
11
|
+
page = PetRescue::ListingPage.new(id, parser)
|
12
|
+
|
13
|
+
allow(page).to receive(:open).and_return(resource)
|
14
|
+
|
15
|
+
page.pet
|
16
|
+
|
17
|
+
expect(parser).to have_received(:parse).with(resource)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'support/vcr'
|
3
|
+
require 'pet_rescue/scraper'
|
4
|
+
|
5
|
+
describe PetRescue::Scraper do
|
6
|
+
it "can gather dog listing pages from multiple pages of search results" do
|
7
|
+
VCR.use_cassette('dogs') do
|
8
|
+
scraper = PetRescue::Scraper::DogListings.new(per_page: 48)
|
9
|
+
|
10
|
+
listing_ids = scraper.take(49).map { |listing| listing.id }
|
11
|
+
|
12
|
+
expect(listing_ids.size).to eq(49)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'pet_rescue/search_results_page'
|
3
|
+
|
4
|
+
describe PetRescue::SearchResultsPage do
|
5
|
+
describe "#listing_pages" do
|
6
|
+
it "is a collection of listing pages linked to from the page" do
|
7
|
+
document = read_fixture("dog_search_first_page.html")
|
8
|
+
page = PetRescue::SearchResultsPage.new(document)
|
9
|
+
|
10
|
+
expect(page.listing_pages.size).to eq(48)
|
11
|
+
expect(page.listing_pages.all? { |listing_page|
|
12
|
+
listing_page.is_a?(PetRescue::ListingPage)
|
13
|
+
}).to eq(true)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
describe "#has_next_page?" do
|
18
|
+
it "is true when the page is not the last page of search results" do
|
19
|
+
document = read_fixture("dog_search_first_page.html")
|
20
|
+
page = PetRescue::SearchResultsPage.new(document)
|
21
|
+
|
22
|
+
expect(page.has_next_page?).to eq(true)
|
23
|
+
end
|
24
|
+
|
25
|
+
it "is false when the page is the last page of search results" do
|
26
|
+
document = read_fixture("dog_search_last_page.html")
|
27
|
+
page = PetRescue::SearchResultsPage.new(document)
|
28
|
+
|
29
|
+
expect(page.has_next_page?).to eq(false)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
$LOAD_PATH << File.expand_path('../lib', __FILE__)
|
2
|
+
|
3
|
+
if ENV["COVERAGE"] == "true"
|
4
|
+
require 'simplecov'
|
5
|
+
|
6
|
+
SimpleCov.formatters = [
|
7
|
+
SimpleCov::Formatter::HTMLFormatter,
|
8
|
+
]
|
9
|
+
|
10
|
+
SimpleCov.start
|
11
|
+
end
|
12
|
+
|
13
|
+
def fixture_path(filename)
|
14
|
+
File.expand_path(File.join("fixtures", filename), File.dirname(__FILE__))
|
15
|
+
end
|
16
|
+
|
17
|
+
def read_fixture(filename)
|
18
|
+
File.open(fixture_path(filename))
|
19
|
+
end
|