pet_rescue-scraper 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,375 @@
1
+ <!DOCTYPE html>
2
+ <!--[if lt IE 7 ]> <html lang='en' class='ie6 no-js oldie'> <![endif]-->
3
+ <!--[if IEMobile 7 ]> <html lang='en' class='iemob7 no-js oldie'> <![endif]-->
4
+ <!--[if IE 7 ]> <html lang='en' class='ie7 no-js oldie'> <![endif]-->
5
+ <!--[if IE 8 ]> <html lang='en' class='ie8 no-js oldie'> <![endif]-->
6
+ <!--[if IE 9 ]> <html lang='en' class='ie9 no-js oldie'> <![endif]-->
7
+ <!--[if (gt IE 9)|!(IE)]><!--> <html lang='en' class='no-js'> <!--<![endif]-->
8
+ <head><script type="text/javascript">var NREUMQ=NREUMQ||[];NREUMQ.push(["mark","firstbyte",new Date().getTime()]);</script>
9
+ <meta charset='utf-8'>
10
+ <title>Wyatt - Medium Male American Staffordshire Terrier Mix in NSW - PetRescue
11
+ </title>
12
+ <link href="/favicon.ico" rel="shortcut icon" type="image/vnd.microsoft.icon" />
13
+ <link href='http://cdn.petrescue.com.au/uploads/pet_photos/2014/9/22/319860_a1ce4_340x340_004c5.jpg' rel='image_src'>
14
+ <meta content='This little guy is a real cuddle bug. He will be energetic and strong, so obedience is a must, with this he will become a wonderful calm and confid...' name='description'>
15
+
16
+ <link href="/assets/application-339bb9cdfdf7da48f8b19302f2e3524d.css" media="all" rel="stylesheet" type="text/css" />
17
+ <link href="/assets/secondary-b77f59b7435b23a203fa92dd941b6987.css" media="all" rel="stylesheet" type="text/css" />
18
+
19
+ <script src="/assets/modernizr-c3fe5867e320146b2ec8807eda3321fe.js" type="text/javascript"></script>
20
+ <script src="http://w.sharethis.com/button/buttons.js" type="text/javascript"></script>
21
+ <script>
22
+ var __st_loadLate=true;
23
+ stLight.options({publisher: "999ee18a-45b3-4e4f-8c53-62f45e1fa414"});
24
+ </script>
25
+
26
+ <!--[if IE 6]>
27
+ <script src="/assets/dd_belatedpng-79ea5b16160a7f4ea3e45ee41b7109c1.js" type="text/javascript"></script>
28
+ <![endif]-->
29
+ <meta content="authenticity_token" name="csrf-param" />
30
+ <meta content="xcJM373IDsRQg+72Y0+ZW5jbyXY9eUcRqoRA38E8XL0=" name="csrf-token" />
31
+ <script>
32
+ setTimeout(function(){var a=document.createElement("script");
33
+ var b=document.getElementsByTagName("script")[0];
34
+ a.src=document.location.protocol+"//dnn506yrbagrg.cloudfront.net/pages/scripts/0013/9890.js?"+Math.floor(new Date().getTime()/3600000);
35
+ a.async=true;a.type="text/javascript";b.parentNode.insertBefore(a,b)}, 1);
36
+ </script>
37
+ <script>
38
+ (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
39
+ (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
40
+ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
41
+ })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
42
+
43
+ ga('create', 'UA-288342-1', 'auto'); // Replace with your property ID.
44
+ ga('send', 'pageview');
45
+ </script>
46
+
47
+ </head>
48
+ <body class='public has-sidebar listings listings-show listings-controller show-action'>
49
+ <div id='fb-root'></div>
50
+ <script>
51
+ window.fbAsyncInit = function() {
52
+ FB.init({
53
+ appId: "408746099178659",
54
+ channelUrl: "http://www.petrescue.com.au/channel.html",
55
+ status: true,
56
+ cookie: true,
57
+ xfbml: true
58
+ });
59
+ };
60
+
61
+ (function(d){
62
+ var js, id = 'facebook-jssdk', ref = d.getElementsByTagName('script')[0];
63
+ if (d.getElementById(id)) {return;}
64
+ js = d.createElement('script'); js.id = id; js.async = true;
65
+ js.src = "//connect.facebook.net/en_US/all.js";
66
+ ref.parentNode.insertBefore(js, ref);
67
+ }(document));
68
+ </script>
69
+
70
+ <script>
71
+ var googletag = googletag || {};
72
+ googletag.cmd = googletag.cmd || [];
73
+ (function() {
74
+ var gads = document.createElement('script');
75
+ gads.async = true;
76
+ gads.type = 'text/javascript';
77
+ var useSSL = 'https:' == document.location.protocol;
78
+ gads.src = (useSSL ? 'https:' : 'http:') +
79
+ '//www.googletagservices.com/tag/js/gpt.js';
80
+ var node = document.getElementsByTagName('script')[0];
81
+ node.parentNode.insertBefore(gads, node);
82
+ })();
83
+ </script>
84
+
85
+ <!--[if lt IE 8]>
86
+ <div id='browser-upgrade'>
87
+ <div class='inner'>You are using an outdated web browser which is not fully supported by PetRescue.<br />Please consider upgrading to <a href="http://browsehappy.com/" target="_blank">a faster, more secure browser</a>.</div>
88
+ </div>
89
+ <![endif]-->
90
+ <header id='header' role='banner'>
91
+ <div class='wrapper'>
92
+ <h1 id='logo'><a href="/">PetRescue</a></h1>
93
+ <div class='no_user_signed_in' id='current-user'>
94
+ <div class='avatar'></div>
95
+ <div class='name'><a href="/users/sign_in" class="login" data-remote="true">Log In</a></div>
96
+ <a href="/users/sign_up" class="register">Sign Up</a>
97
+ </div>
98
+
99
+ </div>
100
+ </header>
101
+ <nav id='navigation' role='navigation'>
102
+ <div class='wrapper'>
103
+ <ul>
104
+ <li><a href="/">Home</a></li>
105
+ <li>
106
+ <span class='dropdown'>Find A Pet</span>
107
+ <div class='pointer'>
108
+ <ul>
109
+ <li class='dogs'><a href="/listings/dogs">Dogs</a></li>
110
+ <li class='cats'><a href="/listings/cats">Cats</a></li>
111
+ <li class='other'><a href="/listings/other">Other Pets</a></li>
112
+ </ul>
113
+ </div>
114
+ </li>
115
+ <li><a href="/about">About Us</a></li>
116
+ <li><a href="/rescue_directory">Rescue Directory</a></li>
117
+ <li><a href="/library">Library</a></li>
118
+ <li><a href="/faq">FAQ</a></li>
119
+ <li><a href="/get-involved">Get Involved</a></li>
120
+ <li><a href="/contact">Contact</a></li>
121
+ </ul>
122
+
123
+ </div>
124
+ </nav>
125
+
126
+ <div id='content-wrap'>
127
+ <div class="ad-970x90 ad-gts" id="div-gpt-ad-1398134396986-0"></div>
128
+
129
+ <div class='wrapper'>
130
+ <div id='notices'>
131
+ </div>
132
+
133
+ <div id='main'>
134
+
135
+ <h1>
136
+ Wyatt
137
+ </h1>
138
+ <div id='primary'>
139
+ <article id='listing_content'>
140
+ <div class='actions'>
141
+ <div class='add_remove_favourites'><form accept-charset="UTF-8" action="/users/favourites" method="post"><div style="margin:0;padding:0;display:inline"><input name="utf8" type="hidden" value="&#x2713;" /><input name="authenticity_token" type="hidden" value="xcJM373IDsRQg+72Y0+ZW5jbyXY9eUcRqoRA38E8XL0=" /></div><input id="favourites_listing_id319860" name="favourites[listing_id]" type="hidden" value="319860" /><input class="add_to_favourites" name="add_to_favourites" type="submit" value="Favourite" /></form>
142
+ </div>
143
+
144
+ </div>
145
+ <div class='share_listing'>
146
+ <h5>Share</h5>
147
+ <span class='share_button st_facebook_custom'>
148
+ <span class='icon'></span>
149
+ </span>
150
+ <span class='share_button st_twitter_custom'>
151
+ <span class='icon'></span>
152
+ </span>
153
+ <span class='share_button st_googleplus_custom'>
154
+ <span class='icon'></span>
155
+ </span>
156
+ <span class='share_button email_custom' data-link-to='/listings/319860/share'>
157
+ <span class='icon'></span>
158
+ Email
159
+ </span>
160
+ <script>
161
+ var el = document.querySelector(".st_facebook_custom");
162
+ el.setAttribute("displaytext", "Facebook");
163
+ el.setAttribute("st_via", "PetRescue");
164
+ el.setAttribute("st_url", "http://www.petrescue.com.au/listings/319860");
165
+
166
+ var el = document.querySelector(".st_twitter_custom");
167
+ el.setAttribute("displaytext", "Twitter");
168
+ el.setAttribute("st_via", "PetRescue");
169
+ el.setAttribute("st_url", "http://www.petrescue.com.au/listings/319860");
170
+
171
+ var el = document.querySelector(".st_googleplus_custom");
172
+ el.setAttribute("displaytext", "Google +");
173
+ el.setAttribute("st_via", "PetRescue");
174
+ el.setAttribute("st_url", "http://www.petrescue.com.au/listings/319860");
175
+
176
+ var el = document.querySelector(".email_custom");
177
+ el.setAttribute("displaytext", "Email");
178
+ </script>
179
+ <span>
180
+ <a href="/listings/319860/adoption_poster" id="print_adoption_poster" target="_blank">Print Adoption Poster</a>
181
+ </span>
182
+
183
+ </div>
184
+ <h2 class='species'>Medium Male American Staffordshire Terrier Mix</h2>
185
+ <h4 class='located_in'>Located in New South Wales</h4>
186
+ <div class='personality'><p>This little guy is a real cuddle bug. He will be energetic and strong, so obedience is a must, with this he will become a wonderful calm and confident member of the family and will love spending time indoors as well as outdoors. He will be ideal for an active family with or without children. A good size yard is preferable but not essential, a smaller yard is ok as long as you make time for daily walks and play time. He loves water and toys and get along very well with all his puppy playmates. Meal time is one of his favorite time of day and he is happy to share with all his puppy friends. </p></div>
187
+
188
+
189
+
190
+ <h3>Wyatt’s details</h3>
191
+ <dl class='pets-details'>
192
+ <dt class='first age'>Age:</dt>
193
+ <dd class='first age'>3 months</dd>
194
+ <dt class='adoption_fee'>Adoption Fee</dt>
195
+ <dd class='adoption_fee'>$500</dd>
196
+ <dt class='desexed'>Desexed?</dt>
197
+ <dd class='desexed'><span class="boolean-image-true boolean-image-yes">Yes</span></dd>
198
+ <dt class='vaccinated'>Vaccinated?</dt>
199
+ <dd class='vaccinated'><span class="boolean-image-true boolean-image-yes">Yes</span></dd>
200
+ <dt class='wormed'>Wormed?</dt>
201
+ <dd class='wormed'><span class="boolean-image-true boolean-image-yes">Yes</span></dd>
202
+ <dt class='heart_worm_treated'>Heart Worm Treated?</dt>
203
+ <dd class='heart_worm_treated'><span class="boolean-image-false boolean-image-no">No</span></dd>
204
+ <dt class='fostered_by'>Rescue Group:</dt>
205
+ <dd class='fostered_by'><a href="/groups/10276">J&amp;J Rescue</a></dd>
206
+ <dt class='contact_name'>Contact:</dt>
207
+ <dd class='contact_name'>Donna</dd>
208
+ <dt class='contact_number'>Phone:</dt>
209
+ <dd>
210
+ 0410510308
211
+ <strong>- preferred</strong>
212
+ </dd>
213
+ <dt class='contact_email'>Email:</dt>
214
+ <dd class='contact_email'>
215
+ <a href="/listings/319860/enquire">Send enquiry email</a>
216
+ </dd>
217
+ </dl>
218
+ <div class='poster_with_id'>
219
+ <div class='additional_details'>
220
+ <h3 class='animal_id'>
221
+ PetRescue ID:
222
+ <span id='listing_id'>319860</span>
223
+ </h3>
224
+ </div>
225
+ </div>
226
+
227
+
228
+ </article>
229
+ </div>
230
+ <div id='secondary'>
231
+ <div class='adopt_this_pet_container'>
232
+ <a href="/listings/319860/contact_details" id="adopt_this_pet">Find out more about this pet</a>
233
+
234
+ </div>
235
+ <div id='contact_information'>
236
+ <h3>Contact</h3>
237
+ <p class='contact_name'>Donna</p>
238
+ <p class='contact_number'>0410510308</p>
239
+ <p class='contact_email'>jandjrescue5@gmail.com</p>
240
+ <p class='contact_preferred_method'>
241
+ Preferred contact:
242
+ Phone
243
+ </p>
244
+
245
+ </div>
246
+ <div id='pet_images'>
247
+ <div id='featured_photo'>
248
+ <a href="http://cdn.petrescue.com.au/uploads/pet_photos/2014/9/22/319860_a1ce4_900x900_004c5.jpg" class="fancybox" data-fancybox-group="listing"><img alt="Photo of Wyatt" src="http://cdn.petrescue.com.au/uploads/pet_photos/2014/9/22/319860_a1ce4_340x340_004c5.jpg" /></a>
249
+ </div>
250
+ <ul id='thumbnails'>
251
+ </ul>
252
+ </div>
253
+
254
+
255
+ <div class="ad-336x280 ad-gts" id="div-gpt-ad-1392619022747-0" margin-bottom="10px"></div>
256
+ <p class='last_updated_at'>Last updated on <time datetime="2014-10-02T19:45:10+08:00">October 02, 2014 19:45</time></p>
257
+ <p class='view_count' id='views'>This listing has been viewed 273 times</p>
258
+ <a href="/listings/319860/report" id="report_listing">Report listing</a>
259
+ </div>
260
+
261
+
262
+ </div>
263
+ <div id='sidebar'>
264
+ <section class='make-a-donation'>
265
+ <p>PetRescue is a not-for-profit organisation. We rely wholly on the kindness of pet lovers like you to help us save lives. Please donate today.</p>
266
+ <a href="/donate">Make A Donation</a>
267
+ </section>
268
+ <nav class='listing-selection'>
269
+ <h4>Find a pet</h4>
270
+ <ul>
271
+ <li class='dogs'><a href="/listings/dogs">Dogs</a></li>
272
+ <li class='cats'><a href="/listings/cats">Cats</a></li>
273
+ <li class='other'><a href="/listings/other">Other Pets</a></li>
274
+ </ul>
275
+ </nav>
276
+ <section id='find-by-petrescue-id'>
277
+ <form action='#'>
278
+ <label for='animal_id'>PetRescue ID</label>
279
+ <input id='animal_id' placeholder='PetRescue ID' type='text'>
280
+ </form>
281
+ <a href="/what_is_a_petrescue_id?layout=false" class="fancybox">What is a PetRescue ID?</a>
282
+ </section>
283
+
284
+
285
+
286
+ <div class="ad-160x600 ad-gts" id="div-gpt-ad-1371512488971-0"></div>
287
+
288
+ </div>
289
+ </div>
290
+
291
+ </div>
292
+
293
+ <div id='footer'>
294
+ <nav class='footer-navigation' role='navigation'>
295
+ <ul>
296
+ <li><a href="/">Home</a></li>
297
+ <li>
298
+ <span class='dropdown'>Find A Pet</span>
299
+ <div class='pointer'>
300
+ <ul>
301
+ <li class='dogs'><a href="/listings/dogs">Dogs</a></li>
302
+ <li class='cats'><a href="/listings/cats">Cats</a></li>
303
+ <li class='other'><a href="/listings/other">Other Pets</a></li>
304
+ </ul>
305
+ </div>
306
+ </li>
307
+ <li><a href="/about">About Us</a></li>
308
+ <li><a href="/rescue_directory">Rescue Directory</a></li>
309
+ <li><a href="/library">Library</a></li>
310
+ <li><a href="/faq">FAQ</a></li>
311
+ <li><a href="/get-involved">Get Involved</a></li>
312
+ <li><a href="/contact">Contact</a></li>
313
+ </ul>
314
+
315
+ </nav>
316
+ <nav class='footer-sponsors' role='navigation'>
317
+ <div class='all-sponsors'>
318
+ <div class='primary'>
319
+ <h4>Major partners</h4>
320
+ <ul>
321
+ <li>
322
+ <a href="http://www.pedigree.com.au" id="pedigree" target="_blank">Pedigree</a>
323
+ </li>
324
+ </ul>
325
+ </div>
326
+ <div class='secondary'>
327
+ <h4>Partners</h4>
328
+ <ul>
329
+ <li>
330
+ <a href="http://www.jetpets.com.au" id="jetpets" target="_blank">Jetpets</a>
331
+ </li>
332
+ <li>
333
+ <a href="http://www.thefrontiergroup.com.au" id="tfg" target="_blank">The Frontier Group</a>
334
+ </li>
335
+ <li>
336
+ <a href="/our_partners" id="our-sponsors" target="_blank">Find out more about our partners</a>
337
+ </li>
338
+ </ul>
339
+ </div>
340
+ </div>
341
+ </nav>
342
+ <div class='footer-copyright'>
343
+ <div class='copyright-terms-privacy'>
344
+ <small class='copyright'>© PetRescue Ltd 2004 - 2014</small>
345
+ <a href="/terms_of_use">Terms of Use</a>
346
+ <p>&amp;</p>
347
+ <a href="/privacy">Privacy Policy</a>
348
+ </div>
349
+ </div>
350
+ </div>
351
+ <script src="/assets/application-820417cd97b2fc12c79b62c348928d41.js" type="text/javascript"></script>
352
+
353
+ <script type="text/javascript">
354
+ googletag.cmd.push(function(){
355
+ googletag.defineSlot('/4905769/PetRescue_v3_ListingPages_BTF_336x280', [336, 280], 'div-gpt-ad-1392619022747-0').addService(googletag.pubads());googletag.defineSlot('/4905769/PetRescue_v3_BelowNav_ATF_970x90_970x90', [970, 90], 'div-gpt-ad-1398134396986-0').addService(googletag.pubads());googletag.defineSlot('/4905769/PetRescue_v3_Right_ATF_160x600_160x600', [160, 600], 'div-gpt-ad-1371512488971-0').addService(googletag.pubads());
356
+ googletag.pubads().enableSingleRequest();
357
+ googletag.pubads().enableAsyncRendering();
358
+ googletag.enableServices();
359
+ googletag.display('div-gpt-ad-1392619022747-0');googletag.display('div-gpt-ad-1398134396986-0');googletag.display('div-gpt-ad-1371512488971-0');
360
+ });
361
+ </script>
362
+
363
+ <script type="text/javascript">if (!NREUMQ.f) { NREUMQ.f=function() {
364
+ NREUMQ.push(["load",new Date().getTime()]);
365
+ var e=document.createElement("script");
366
+ e.type="text/javascript";
367
+ e.src=(("http:"===document.location.protocol)?"http:":"https:") + "//" +
368
+ "js-agent.newrelic.com/nr-100.js";
369
+ document.body.appendChild(e);
370
+ if(NREUMQ.a)NREUMQ.a();
371
+ };
372
+ NREUMQ.a=window.onload;window.onload=NREUMQ.f;
373
+ };
374
+ NREUMQ.push(["nrfj","beacon-6.newrelic.com","15830f52ae","792648","JldYFkILClQBQk4IWRZMXwxXF0lLDF8W",0,113,new Date().getTime(),"","","","",""]);</script></body>
375
+ </html>
@@ -0,0 +1,140 @@
1
+ require 'spec_helper'
2
+ require 'pet_rescue/listing_page_parser'
3
+
4
+ describe PetRescue::ListingPageParser do
5
+ subject(:parser) { PetRescue::ListingPageParser.new }
6
+
7
+ describe "#parse" do
8
+ it "extracts the pet's name" do
9
+ pet = parser.parse(read_fixture("wyatt.html"))
10
+
11
+ expect(pet.name).to eq("Wyatt")
12
+ end
13
+
14
+ it "extracts the pet's size" do
15
+ pet = parser.parse(read_fixture("wyatt.html"))
16
+
17
+ expect(pet.size).to eq("Medium")
18
+ end
19
+
20
+ it "extracts the pet's gender" do
21
+ pet = parser.parse(read_fixture("wyatt.html"))
22
+
23
+ expect(pet.gender).to eq("Male")
24
+ end
25
+
26
+ it "extracts the pet's breed" do
27
+ pet = parser.parse(read_fixture("wyatt.html"))
28
+
29
+ expect(pet.breed).to eq("American Staffordshire Terrier Mix")
30
+ end
31
+
32
+ it "extracts the pet's age" do
33
+ pet = parser.parse(read_fixture("wyatt.html"))
34
+
35
+ expect(pet.age).to eq("3 months")
36
+ end
37
+
38
+ it "extracts the pet's location" do
39
+ pet = parser.parse(read_fixture("wyatt.html"))
40
+
41
+ expect(pet.location).to eq("New South Wales")
42
+ end
43
+
44
+ it "extracts whether the pet has been vaccinated" do
45
+ pet = parser.parse(read_fixture("wyatt.html"))
46
+
47
+ expect(pet.vaccinated?).to eq(true)
48
+ end
49
+
50
+ it "extracts whether the pet has been desexed" do
51
+ pet = parser.parse(read_fixture("wyatt.html"))
52
+
53
+ expect(pet.desexed?).to eq(true)
54
+ end
55
+
56
+ it "extracts a biography of the pet" do
57
+ pet = parser.parse(read_fixture("wyatt.html"))
58
+
59
+ expect(pet.biography).to eq(<<-BIO.strip.gsub(/\s+/, ' '))
60
+ This little guy is a real cuddle bug. He will be energetic and strong,
61
+ so obedience is a must, with this he will become a wonderful calm and
62
+ confident member of the family and will love spending time indoors as
63
+ well as outdoors. He will be ideal for an active family with or
64
+ without children. A good size yard is preferable but not essential, a
65
+ smaller yard is ok as long as you make time for daily walks and play
66
+ time. He loves water and toys and get along very well with all his
67
+ puppy playmates. Meal time is one of his favorite time of day and he is
68
+ happy to share with all his puppy friends.
69
+ BIO
70
+ end
71
+
72
+ it "extracts the name of the pet rescue group" do
73
+ pet = parser.parse(read_fixture("wyatt.html"))
74
+
75
+ expect(pet.rescue_group).to eq("J&J Rescue")
76
+ end
77
+
78
+ it "extracts the URL of a small photo of the pet" do
79
+ pet = parser.parse(read_fixture("wyatt.html"))
80
+
81
+ expect(pet.small_photo_url).to eq("http://cdn.petrescue.com.au/uploads/pet_photos/2014/9/22/319860_a1ce4_340x340_004c5.jpg")
82
+ end
83
+
84
+ it "extracts the URL of a large photo of the pet" do
85
+ pet = parser.parse(read_fixture("wyatt.html"))
86
+
87
+ expect(pet.large_photo_url).to eq("http://cdn.petrescue.com.au/uploads/pet_photos/2014/9/22/319860_a1ce4_900x900_004c5.jpg")
88
+ end
89
+
90
+ it "extracts the pet adoption fee for the pet" do
91
+ pet = parser.parse(read_fixture("wyatt.html"))
92
+
93
+ expect(pet.adoption_fee).to eq(500)
94
+ end
95
+
96
+ it "handles the absence of a '$' in the pet adoption fee" do
97
+ pet = parser.parse(read_fixture("muttley.html"))
98
+
99
+ expect(pet.adoption_fee).to eq(300)
100
+ end
101
+
102
+ it "extracts the adoption process for the pet when present" do
103
+ pet = parser.parse(read_fixture("mau.html"))
104
+
105
+ expect(pet.adoption_process).to eq(<<-PROCESS.strip.gsub(/\s+/, ' '))
106
+ For more information about Mau please contact Sydney Dogs and Cats Home
107
+ PROCESS
108
+ end
109
+
110
+ it "handles the absence of a pet adoption process " do
111
+ pet = parser.parse(read_fixture("wyatt.html"))
112
+
113
+ expect(pet.adoption_process).to eq(nil)
114
+ end
115
+
116
+ it "extracts a contact name for the pet" do
117
+ pet = parser.parse(read_fixture("wyatt.html"))
118
+
119
+ expect(pet.contact_name).to eq("Donna")
120
+ end
121
+
122
+ it "handles the absence of a contact name" do
123
+ pet = parser.parse(read_fixture("mau.html"))
124
+
125
+ expect(pet.contact_name).to eq(nil)
126
+ end
127
+
128
+ it "extracts a contact number for the pet" do
129
+ pet = parser.parse(read_fixture("wyatt.html"))
130
+
131
+ expect(pet.contact_number).to eq("0410510308")
132
+ end
133
+
134
+ it "returns nil when a contact number is not specified" do
135
+ pet = parser.parse(read_fixture("mau.html"))
136
+
137
+ expect(pet.contact_number).to eq(nil)
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,20 @@
1
+ require 'spec_helper'
2
+ require 'pet_rescue/listing_page'
3
+
4
+ describe PetRescue::ListingPage do
5
+ describe "#pet" do
6
+ it "passes the listing resource for the given ID to the given parser" do
7
+ resource = spy(:resource)
8
+ parser = spy(:parser)
9
+ id = 123
10
+
11
+ page = PetRescue::ListingPage.new(id, parser)
12
+
13
+ allow(page).to receive(:open).and_return(resource)
14
+
15
+ page.pet
16
+
17
+ expect(parser).to have_received(:parse).with(resource)
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+ require 'support/vcr'
3
+ require 'pet_rescue/scraper'
4
+
5
+ describe PetRescue::Scraper do
6
+ it "can gather dog listing pages from multiple pages of search results" do
7
+ VCR.use_cassette('dogs') do
8
+ scraper = PetRescue::Scraper::DogListings.new(per_page: 48)
9
+
10
+ listing_ids = scraper.take(49).map { |listing| listing.id }
11
+
12
+ expect(listing_ids.size).to eq(49)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,32 @@
1
+ require 'spec_helper'
2
+ require 'pet_rescue/search_results_page'
3
+
4
+ describe PetRescue::SearchResultsPage do
5
+ describe "#listing_pages" do
6
+ it "is a collection of listing pages linked to from the page" do
7
+ document = read_fixture("dog_search_first_page.html")
8
+ page = PetRescue::SearchResultsPage.new(document)
9
+
10
+ expect(page.listing_pages.size).to eq(48)
11
+ expect(page.listing_pages.all? { |listing_page|
12
+ listing_page.is_a?(PetRescue::ListingPage)
13
+ }).to eq(true)
14
+ end
15
+ end
16
+
17
+ describe "#has_next_page?" do
18
+ it "is true when the page is not the last page of search results" do
19
+ document = read_fixture("dog_search_first_page.html")
20
+ page = PetRescue::SearchResultsPage.new(document)
21
+
22
+ expect(page.has_next_page?).to eq(true)
23
+ end
24
+
25
+ it "is false when the page is the last page of search results" do
26
+ document = read_fixture("dog_search_last_page.html")
27
+ page = PetRescue::SearchResultsPage.new(document)
28
+
29
+ expect(page.has_next_page?).to eq(false)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,19 @@
1
+ $LOAD_PATH << File.expand_path('../lib', __FILE__)
2
+
3
+ if ENV["COVERAGE"] == "true"
4
+ require 'simplecov'
5
+
6
+ SimpleCov.formatters = [
7
+ SimpleCov::Formatter::HTMLFormatter,
8
+ ]
9
+
10
+ SimpleCov.start
11
+ end
12
+
13
+ def fixture_path(filename)
14
+ File.expand_path(File.join("fixtures", filename), File.dirname(__FILE__))
15
+ end
16
+
17
+ def read_fixture(filename)
18
+ File.open(fixture_path(filename))
19
+ end
@@ -0,0 +1,7 @@
1
+ require 'vcr'
2
+ require 'webmock'
3
+
4
+ VCR.configure do |c|
5
+ c.cassette_library_dir = 'spec/fixtures/vcr_cassettes'
6
+ c.hook_into :webmock
7
+ end