libcraigscrape 1.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +12 -1
- data/Gemfile +12 -0
- data/Rakefile +1 -54
- data/bin/craig_report_schema.yml +4 -1
- data/bin/craigwatch +148 -146
- data/bin/report_mailer/report.html.erb +20 -0
- data/bin/report_mailer/{craigslist_report.plain.erb → report.text.erb} +7 -6
- data/lib/geo_listings.rb +1 -1
- data/lib/libcraigscrape.rb +52 -59
- data/lib/listings.rb +75 -39
- data/lib/posting.rb +120 -63
- data/lib/scraper.rb +43 -63
- data/spec/assets/geolisting_iso_us_120412.html +441 -0
- data/spec/assets/listing_cta_ftl_112612.html +1470 -0
- data/spec/assets/listing_rea_miami_123012.html +1397 -0
- data/spec/assets/listing_search_ppa_nyc_121212.html +1584 -0
- data/spec/assets/posting_daytona_art_120512-2.html +160 -0
- data/spec/assets/posting_daytona_art_120512.html +153 -0
- data/spec/assets/posting_mdc_cto_ftl_112612.html +170 -0
- data/spec/assets/posting_mdc_reb_120612.html +183 -0
- data/spec/assets/posting_sfbay_1226.html +157 -0
- data/spec/assets/posting_sya_121012-2.html +122 -0
- data/spec/assets/posting_sya_121012.html +165 -0
- data/spec/assets/this_post_has_expired_old.html +48 -0
- data/spec/geolisting_spec.rb +9 -0
- data/spec/listings_spec.rb +77 -0
- data/spec/postings_spec.rb +157 -0
- data/spec/spec_helper.rb +8 -0
- data/test/test_craigslist_geolisting.rb +5 -5
- data/test/test_craigslist_listing.rb +30 -30
- data/test/test_craigslist_posting.rb +25 -145
- metadata +200 -114
- data/bin/report_mailer/craigslist_report.html.erb +0 -17
@@ -0,0 +1,48 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<title></title>
|
5
|
+
<meta name="robots" content="NOARCHIVE,NOFOLLOW">
|
6
|
+
<link type="text/css" rel="stylesheet" media="all" href="http://www.craigslist.org/styles/craigslist.css?v=8">
|
7
|
+
</head>
|
8
|
+
|
9
|
+
<body class="posting">
|
10
|
+
|
11
|
+
|
12
|
+
<div class="bchead">
|
13
|
+
|
14
|
+
<a href="http://charleston.craigslist.org/">charleston craigslist</a> >
|
15
|
+
|
16
|
+
<a href="http://charleston.craigslist.org/sss/">for sale / wanted</a> >
|
17
|
+
<a href="http://charleston.craigslist.org/cto/">cars & trucks - by owner</a>
|
18
|
+
</div>
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
<hr>
|
25
|
+
<br>
|
26
|
+
<br>
|
27
|
+
<h2>This posting has expired.</h2>
|
28
|
+
<h5>(The title on the listings page will be removed in just a few minutes.)</h5>
|
29
|
+
|
30
|
+
<br><br>
|
31
|
+
|
32
|
+
<hr>
|
33
|
+
<ul class="clfooter">
|
34
|
+
<li>Copyright © 2011 craigslist, inc.</li>
|
35
|
+
<li><a href="http://www.craigslist.org/about/terms.of.use.html">terms of use</a></li>
|
36
|
+
<li><a href="http://www.craigslist.org/about/privacy_policy">privacy policy</a></li>
|
37
|
+
<li><a href="/forums/?forumID=8">feedback forum</a></li>
|
38
|
+
</ul>
|
39
|
+
|
40
|
+
<script type="text/javascript" src="http://www.craigslist.org/js/jquery-1.4.2.js"></script>
|
41
|
+
<script type="text/javascript" src="http://www.craigslist.org/js/postings.js"></script>
|
42
|
+
<script type="text/javascript"><!--
|
43
|
+
pID = 1968731193;
|
44
|
+
-->
|
45
|
+
</script>
|
46
|
+
</body>
|
47
|
+
</html>
|
48
|
+
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe CraigScrape::Listings do
|
5
|
+
context "listing_cta_ftl_112612.html" do
|
6
|
+
subject { described_class.new( uri_for('listing_cta_ftl_112612.html') ) }
|
7
|
+
specify{ subject.posts.should have(100).items }
|
8
|
+
specify{ subject.posts.collect(&:post_date).uniq.should eq([Date.strptime('11/26/2012', '%m/%d/%Y')]) }
|
9
|
+
specify{ subject.next_page_href.should eq('index100.html') }
|
10
|
+
specify{ subject.posts[0].attributes.should eq({
|
11
|
+
:label => '#2009 Lexus GS 450h 4dr Car Hybrid (only 20,733 miles)',
|
12
|
+
:href => 'http://miami.craigslist.org/pbc/ctd/3437084110.html',
|
13
|
+
:url => 'http://miami.craigslist.org/pbc/ctd/3437084110.html',
|
14
|
+
:location => 'Lake Worth',
|
15
|
+
:section => 'dealer',
|
16
|
+
:img_types => [:img],
|
17
|
+
:post_date => Date.parse('2012/11/26') }) }
|
18
|
+
specify{ subject.posts[1].attributes.should eq({
|
19
|
+
:label => 'we buy junk-bus-truck- car for cash!!$500-$5000-5612062848',
|
20
|
+
:price => Money.new(500000, 'USD'),
|
21
|
+
:href => 'http://miami.craigslist.org/brw/ctd/3437083983.html',
|
22
|
+
:url => 'http://miami.craigslist.org/brw/ctd/3437083983.html',
|
23
|
+
:location => 'all over',
|
24
|
+
:section => 'dealer',
|
25
|
+
:img_types => [],
|
26
|
+
:post_date => Date.parse('2012/11/26') }) }
|
27
|
+
end
|
28
|
+
|
29
|
+
context 'listing_search_ppa_nyc_121212.html' do
|
30
|
+
subject { described_class.new( uri_for('listing_search_ppa_nyc_121212.html') ) }
|
31
|
+
|
32
|
+
specify{ subject.posts.should have(100).items }
|
33
|
+
specify{ subject.posts.collect(&:post_date).uniq.should eq(['12/12/2012',
|
34
|
+
'12/11/2012', '12/10/2012'].collect{|t| Date.strptime(t, "%m/%d/%Y") } ) }
|
35
|
+
specify{ subject.next_page_href.should eq('http://newyork.craigslist.org/search/ppa?query=kenmore&srchType=A&s=100') }
|
36
|
+
specify{ subject.posts[0].attributes.should eq({
|
37
|
+
:label => 'Staten island appliance repair',
|
38
|
+
:href => 'http://newyork.craigslist.org/stn/app/3440211032.html',
|
39
|
+
:url => 'http://newyork.craigslist.org/stn/app/3440211032.html',
|
40
|
+
:location => '7184487435',
|
41
|
+
:section => 'appliances - by owner',
|
42
|
+
:img_types => [],
|
43
|
+
:post_date => Date.parse('2012/12/12') }) }
|
44
|
+
specify{ subject.posts[1].attributes.should eq({
|
45
|
+
:label => 'Kenmore 5200 BTU Air Conditioner',
|
46
|
+
:href => 'http://newyork.craigslist.org/mnh/app/3474408782.html',
|
47
|
+
:url => 'http://newyork.craigslist.org/mnh/app/3474408782.html',
|
48
|
+
:location => 'Upper West Side',
|
49
|
+
:section => 'appliances - by owner',
|
50
|
+
:img_types => [:pic],
|
51
|
+
:price => Money.new(3000, 'USD'),
|
52
|
+
:post_date => Date.parse('2012/12/12') }) }
|
53
|
+
end
|
54
|
+
|
55
|
+
context "listing_rea_miami_123012.html" do
|
56
|
+
subject { described_class.new( uri_for('listing_rea_miami_123012.html') ) }
|
57
|
+
specify{ subject.posts.should have(100).items }
|
58
|
+
specify{ subject.posts[0].attributes.should eq({
|
59
|
+
:label => '3bd 2ba Home for Sale in Miami - Reduced',
|
60
|
+
:href => 'http://miami.craigslist.org/mdc/reb/3478403162.html',
|
61
|
+
:url => 'http://miami.craigslist.org/mdc/reb/3478403162.html',
|
62
|
+
:location => 'Miami',
|
63
|
+
:section => 'broker',
|
64
|
+
:img_types => [:img],
|
65
|
+
:price => Money.new(24900000, 'USD'),
|
66
|
+
:post_date => Date.parse('2012/12/30') }) }
|
67
|
+
specify{ subject.posts[12].attributes.should eq({
|
68
|
+
:label => 'Miami, FL Home for Sale - 4bd 3ba/1hba',
|
69
|
+
:href => 'http://miami.craigslist.org/mdc/reb/3478359527.html',
|
70
|
+
:url => 'http://miami.craigslist.org/mdc/reb/3478359527.html',
|
71
|
+
:location => 'Other',
|
72
|
+
:section => 'broker',
|
73
|
+
:img_types => [:img],
|
74
|
+
:price => Money.new(45800000, 'USD'),
|
75
|
+
:post_date => Date.parse('2012/12/30') }) }
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,157 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe CraigScrape::Posting do
|
5
|
+
context "this_post_has_expired_old.html" do
|
6
|
+
subject{ described_class.new uri_for('this_post_has_expired_old.html') }
|
7
|
+
|
8
|
+
its(:posting_has_expired?){ should be_true }
|
9
|
+
end
|
10
|
+
|
11
|
+
context "posting_sya_121012.html" do
|
12
|
+
# This example was picked since it has pics
|
13
|
+
subject{ described_class.new uri_for('posting_sya_121012.html') }
|
14
|
+
|
15
|
+
its(:full_section) {should eq(["south florida craigslist", "miami / dade", "for sale / wanted", "computers - by dealer"])}
|
16
|
+
its(:header) {should eq("Sony Vaio - $480 (orlando,florida)")}
|
17
|
+
its(:label) {should eq("Sony Vaio - $480")}
|
18
|
+
its(:title) {should eq("Sony Vaio")}
|
19
|
+
its(:location) {should eq('orlando,florida')}
|
20
|
+
its(:posting_id) {should eq(3469913065)}
|
21
|
+
its(:reply_to) {should eq('9cxgv-3469913065@sale.craigslist.org')}
|
22
|
+
its(:post_time) {should eq(DateTime.parse('2012-12-10 20:51:00 -0500'))}
|
23
|
+
its(:price) {should eq(480)}
|
24
|
+
its(:images) {should eq([])}
|
25
|
+
its(:pics) do
|
26
|
+
pics_list = ['3Eb3Fc3M25Ne5Ed5J6cca593d0c806b3614c1',
|
27
|
+
'3K73L43J45G25H55J1cca2d0db7d75fe11448', '3E53Gc3F85I95K25M1ccaf6c5790cbd541b57',
|
28
|
+
'3G83Kc3F15L45E75J9cca7e4e7fbdfe981fd9', '3Kc3ma3N65Le5Kf5U3ccae67bd8aa8129140c'
|
29
|
+
].collect{|src| ['http://images.craigslist.org/', src, '.jpg'].join }
|
30
|
+
should eq( pics_list )
|
31
|
+
end
|
32
|
+
its(:img_types) {should eq([:pic])}
|
33
|
+
its(:contents_as_plain) {should eq("Sony Vaio for sale! Its in great condition but I no longer hard need for it. No low ball offers!!")}
|
34
|
+
its(:contents) {should eq("<br>Sony Vaio for sale! Its in great condition but I no longer hard need for it. No low ball offers!!<br><br><br><br>")}
|
35
|
+
end
|
36
|
+
|
37
|
+
context "posting_sya_121012-2.html" do
|
38
|
+
# This example was picked since it has images and no text
|
39
|
+
subject{ described_class.new uri_for('posting_sya_121012-2.html') }
|
40
|
+
|
41
|
+
its(:full_section) {should eq(["south florida craigslist", "broward county", "for sale / wanted", "computers - by dealer"])}
|
42
|
+
its(:header) {should eq("METRO PCS ★ANDROID★SMARTPHONE★ Samsung SCH Admire Red Clean ES - $80 (BROWARD)")}
|
43
|
+
its(:label) {should eq("METRO PCS ★ANDROID★SMARTPHONE★ Samsung SCH Admire Red Clean ES - $80")}
|
44
|
+
its(:title) {should eq("METRO PCS ★ANDROID★SMARTPHONE★ Samsung SCH Admire Red Clean ES")}
|
45
|
+
its(:location) {should eq('BROWARD')}
|
46
|
+
its(:posting_id) {should eq(3469905497)}
|
47
|
+
its(:reply_to) {should eq('z7jmh-3469905497@sale.craigslist.org')}
|
48
|
+
its(:post_time) {should eq(DateTime.parse('2012-12-10 20:47:00 -0500'))}
|
49
|
+
its(:price) {should eq(80)}
|
50
|
+
its(:images) do
|
51
|
+
images_list = ["http://i1157.photobucket.com/albums/p590/emy123000/T2eC16NE9s2fp7dBQuCykypg60_12.jpg", "http://i1157.photobucket.com/albums/p590/emy123000/KGrHqZqwFCS4TIRoZBQKvdVJIQ60_57.jpg", "http://i1157.photobucket.com/albums/p590/emy123000/KGrHqZqwFCS4TIRoZBQKvdVJIQ60_57.jpg", "http://i1157.photobucket.com/albums/p590/emy123000/ScreenShot2012-06-25at60811AM.png", "http://i1157.photobucket.com/albums/p590/emy123000/KGrHqZowFCp4FZqoWBQvsVUbFdw60_12.jpg"]
|
52
|
+
|
53
|
+
should eq(images_list)
|
54
|
+
end
|
55
|
+
its(:pics) {should eq([])}
|
56
|
+
its(:img_types) {should eq([:img])}
|
57
|
+
its(:contents_as_plain) {should eq("")}
|
58
|
+
its(:contents) {should eq("<a href=\"http://s1157.photobucket.com/albums/p590/emy123000/?action=view¤t=T2eC16NE9s2fp7dBQuCykypg60_12.jpg\" target=\"_blank\" rel=\"nofollow\"><img src=\"http://i1157.photobucket.com/albums/p590/emy123000/T2eC16NE9s2fp7dBQuCykypg60_12.jpg\" border=\"0\" alt=\"Photobucket\"></a><br><a href=\"http://s1157.photobucket.com/albums/p590/emy123000/?action=view¤t=KGrHqZqwFCS4TIRoZBQKvdVJIQ60_57.jpg\" target=\"_blank\" rel=\"nofollow\"><img src=\"http://i1157.photobucket.com/albums/p590/emy123000/KGrHqZqwFCS4TIRoZBQKvdVJIQ60_57.jpg\" border=\"0\" alt=\"Photobucket\"></a><a href=\"http://s1157.photobucket.com/albums/p590/emy123000/?action=view¤t=KGrHqZqwFCS4TIRoZBQKvdVJIQ60_57.jpg\" target=\"_blank\" rel=\"nofollow\"><img src=\"http://i1157.photobucket.com/albums/p590/emy123000/KGrHqZqwFCS4TIRoZBQKvdVJIQ60_57.jpg\" border=\"0\" alt=\"Photobucket\"></a><a href=\"http://s1157.photobucket.com/albums/p590/emy123000/?action=view¤t=ScreenShot2012-06-25at60811AM.png\" target=\"_blank\" rel=\"nofollow\"><img src=\"http://i1157.photobucket.com/albums/p590/emy123000/ScreenShot2012-06-25at60811AM.png\" border=\"0\" alt=\"Photobucket\"></a><a href=\"http://s1157.photobucket.com/albums/p590/emy123000/?action=view¤t=KGrHqZowFCp4FZqoWBQvsVUbFdw60_12.jpg\" target=\"_blank\" rel=\"nofollow\"><img src=\"http://i1157.photobucket.com/albums/p590/emy123000/KGrHqZowFCp4FZqoWBQvsVUbFdw60_12.jpg\" border=\"0\" alt=\"Photobucket\"></a>")}
|
59
|
+
end
|
60
|
+
|
61
|
+
context "posting_mdc_cto_ftl_112612.html" do
|
62
|
+
subject{ described_class.new uri_for('posting_mdc_cto_ftl_112612.html') }
|
63
|
+
|
64
|
+
its(:title) {should eq("1999 Mustang GT w/ '08 3 Valve Engine Swap")}
|
65
|
+
its(:contents) {should eq("I am selling my 1999 Mustang GT with a 2008 GT 3 Valve engine swap. The car is a 5 speed with 3.73 rear gears. It has a Diablo Sport chip with 2 tunes. The rear tires have plentyt of tread and are Michelin Pilot sports ($700 less than a year ago). The rims are staggered and are less than a year old. The car has a 2003 Mustang Cobra hood. It also has a 2003 Cobra front bumper. The paint on the car is less than a year old. The bad: The car will need 2 front tires soon. They are currently Nitto 555's. The A/C Compressor will need changing soon as it is making a little noise. It works but I dont use it just in case. The drivers side seat needs to be re-upholstered also. Minor problems considering the work that went into this car. The instrument cluster is from a Mach 1 and shows 120,000 miles, but the engine has around 70,000 miles. <br><br>\nA lot of time and money was spent on this car to do the swap right. It is my daily driver and has never given me a single problem. It's always had Mobil 1 Synthetic oil and it's plenty fast and a lot of fun to drive. If you are looking for something unique and you are a Mustang fan, you might want to consider this car.<br><br>\nThe seats are from a newer model Mustang GT. The car runs really good, and turns heads everywhere it goes, especially when I pop the hood and those in the know see the engine that doesnt belong there. : )<br><br>\nI am will consider all offers so please don't be shy, the worse that I can do is say no. I am interested in some specific cars as well that I willl consider on trade. BMW 530 or 540, Infinity G35, Lexus IS 300 or GS300 or 400. I respond better to texts or emails. Call with any quesrtions. 305-310-5993 or email me at Torresa76@aol.com<br><br>\nThanks for looking. : )")}
|
66
|
+
its(:contents_as_plain) {should eq("I am selling my 1999 Mustang GT with a 2008 GT 3 Valve engine swap. The car is a 5 speed with 3.73 rear gears. It has a Diablo Sport chip with 2 tunes. The rear tires have plentyt of tread and are Michelin Pilot sports ($700 less than a year ago). The rims are staggered and are less than a year old. The car has a 2003 Mustang Cobra hood. It also has a 2003 Cobra front bumper. The paint on the car is less than a year old. The bad: The car will need 2 front tires soon. They are currently Nitto 555's. The A/C Compressor will need changing soon as it is making a little noise. It works but I dont use it just in case. The drivers side seat needs to be re-upholstered also. Minor problems considering the work that went into this car. The instrument cluster is from a Mach 1 and shows 120,000 miles, but the engine has around 70,000 miles. \nA lot of time and money was spent on this car to do the swap right. It is my daily driver and has never given me a single problem. It's always had Mobil 1 Synthetic oil and it's plenty fast and a lot of fun to drive. If you are looking for something unique and you are a Mustang fan, you might want to consider this car.\nThe seats are from a newer model Mustang GT. The car runs really good, and turns heads everywhere it goes, especially when I pop the hood and those in the know see the engine that doesnt belong there. : )\nI am will consider all offers so please don't be shy, the worse that I can do is say no. I am interested in some specific cars as well that I willl consider on trade. BMW 530 or 540, Infinity G35, Lexus IS 300 or GS300 or 400. I respond better to texts or emails. Call with any quesrtions. 305-310-5993 or email me at Torresa76@aol.com\nThanks for looking. : )")}
|
67
|
+
its(:full_section) {should eq(["south florida craigslist", "miami / dade", "for sale / wanted", "cars & trucks - by owner"])}
|
68
|
+
its(:header) {should eq("1999 Mustang GT w/ '08 3 Valve Engine Swap - $8500 (Homestead)")}
|
69
|
+
its(:label) {should eq("1999 Mustang GT w/ '08 3 Valve Engine Swap - $8500")}
|
70
|
+
its(:location) {should eq('Homestead')}
|
71
|
+
its(:posting_id) {should eq(3437079882)}
|
72
|
+
its(:reply_to) {should eq(nil)}
|
73
|
+
its(:post_time) {should eq(DateTime.parse('2012-11-26 21:34:00 -0500'))}
|
74
|
+
its(:price) {should eq(8500)}
|
75
|
+
its(:images) {should eq([])}
|
76
|
+
its(:pics) do
|
77
|
+
pics_list = ['3M53of3H65N15E15M2cbqdd2e7af939c215a3',
|
78
|
+
'3G13F23Hd5I15Nb5T1cbqfb3e2605ddf31b8b', '3n13F23N25Lf5Y65Facbq05143722c4801267',
|
79
|
+
'3Ee3Ne3H85N85K15Hecbq79f17c0a2e03136e', '3me3pb3Nb5Le5Hd5Mdcbqe446ce3ce2ef1f80'
|
80
|
+
].collect{|src| ['http://images.craigslist.org/', src, '.jpg'].join }
|
81
|
+
should eq( pics_list )
|
82
|
+
end
|
83
|
+
its(:img_types) {should eq([:pic])}
|
84
|
+
end
|
85
|
+
|
86
|
+
context "posting_daytona_art_120512.html" do
|
87
|
+
subject{ described_class.new uri_for('posting_daytona_art_120512.html') }
|
88
|
+
|
89
|
+
its(:full_section) {should eq(["daytona beach craigslist", "for sale / wanted", "arts & crafts - by owner"])}
|
90
|
+
its(:header) {should eq("METAL SCULPTURES GREAT Christmas gifts (ormond)")}
|
91
|
+
its(:label) {should eq("METAL SCULPTURES GREAT Christmas gifts")}
|
92
|
+
its(:title) {should eq("METAL SCULPTURES GREAT Christmas gifts")}
|
93
|
+
its(:location) {should eq('ormond')}
|
94
|
+
its(:posting_id) {should eq(3431080802)}
|
95
|
+
its(:reply_to) {should eq('rbwts-3431080802@sale.craigslist.org')}
|
96
|
+
its(:post_time) {should eq(DateTime.parse('2012-12-05 21:25:00 -0500'))}
|
97
|
+
its(:price) {should eq(nil)}
|
98
|
+
its(:images) {should eq([])}
|
99
|
+
its(:pics) {should eq(["http://images.craigslist.org/3Kb3M83I85Gc5Ea5H2cbo8eb0fb5e4af71968.jpg", "http://images.craigslist.org/3Lb3M33l35E85F35P0cbod80bd9115e311350.jpg", "http://images.craigslist.org/3Ef3Ib3H55L35K55J6cbof57b4d73878111d0.jpg"])}
|
100
|
+
its(:img_types) {should eq([:pic])}
|
101
|
+
its(:contents_as_plain) {should eq("Assorted Metal sculptures from local artist, call 386 235-4390")}
|
102
|
+
its(:contents) {should eq("Assorted Metal sculptures from local artist, call 386 235-4390")}
|
103
|
+
end
|
104
|
+
|
105
|
+
context "posting_daytona_art_120512-2.html" do
|
106
|
+
subject{ described_class.new uri_for('posting_daytona_art_120512-2.html') }
|
107
|
+
|
108
|
+
its(:full_section) {should eq(["daytona beach craigslist", "for sale / wanted", "arts & crafts - by owner"])}
|
109
|
+
its(:header) {should eq("Premier Bouquet Wrap - $2 (PALM COAST)")}
|
110
|
+
its(:label) {should eq("Premier Bouquet Wrap - $2")}
|
111
|
+
its(:title) {should eq("Premier Bouquet Wrap")}
|
112
|
+
its(:location) {should eq('PALM COAST')}
|
113
|
+
its(:posting_id) {should eq(3448282416)}
|
114
|
+
its(:reply_to) {should eq('nqmhm-3448282416@sale.craigslist.org')}
|
115
|
+
its(:post_time) {should eq(DateTime.parse('2012-12-01 15:02:00 -0500'))}
|
116
|
+
its(:price) {should eq(2)}
|
117
|
+
its(:images) {should eq([])}
|
118
|
+
its(:pics) {should eq(["http://images.craigslist.org/3I93pe3Hf5G75J55M2cc13e19b59314771029.jpg"])}
|
119
|
+
its(:img_types) {should eq([:pic])}
|
120
|
+
its(:contents_as_plain) {should eq("THESE ARE USED IN FLOWER / CRAFT SHOPS . ALL ARE NEW, BOXED AND AND VERY WELL MADE. I HAVE A CASE OF THESE I WILL SELL FOR ONE PRICE, OR WILL SELL BY THE PIECE. CASE PRICE IS FOR ABOUT 144 PIECES $75.00. \nPremier Bouquet Wrap\nFlower Bridal Bouquet Wrap White/Satin \nNew White Satin \nThe wraps are approximately 6 1/2\" Long \nThe bridal bouquet wrap is a creative alternative to tying a ribbon around your flowers. Just slide the wrap around flower stems\nThis wrap is perfect for covering/decorating the stems on \"Wedding\", Quinceañera\" or \"Prom\" bouquets. They can also be used with \"Wedding bouquet\" holder handles. These wraps are made with quality Satin material, easy to install and feels soft and smooth on the Bride's or Bridesmaid's hands. These wraps put the finishing touches on any Floral Wedding Bouquet.\nPLEASE CALL . . .\nAJ-518-858-2002")}
|
121
|
+
its(:contents) {should eq("THESE ARE USED IN FLOWER / CRAFT SHOPS . ALL ARE NEW, BOXED AND AND VERY WELL MADE. I HAVE A CASE OF THESE I WILL SELL FOR ONE PRICE, OR WILL SELL BY THE PIECE. CASE PRICE IS FOR ABOUT 144 PIECES $75.00. <br><br>\nPremier Bouquet Wrap<br><br>\nFlower Bridal Bouquet Wrap White/Satin <br><br>\nNew White Satin <br><br>\nThe wraps are approximately 6 1/2\" Long <br><br>\nThe bridal bouquet wrap is a creative alternative to tying a ribbon around your flowers. Just slide the wrap around flower stems<br><br>\nThis wrap is perfect for covering/decorating the stems on \"Wedding\", Quinceañera\" or \"Prom\" bouquets. They can also be used with \"Wedding bouquet\" holder handles. These wraps are made with quality Satin material, easy to install and feels soft and smooth on the Bride's or Bridesmaid's hands. These wraps put the finishing touches on any Floral Wedding Bouquet.<br><br>\nPLEASE CALL . . .<br><br>\nAJ-518-858-2002<br><br><br>")}
|
122
|
+
end
|
123
|
+
|
124
|
+
context "posting_mdc_reb_120612.html" do
|
125
|
+
subject{ described_class.new uri_for('posting_mdc_reb_120612.html') }
|
126
|
+
its(:system_post?){ should be_false }
|
127
|
+
|
128
|
+
its(:full_section) {should eq(["south florida craigslist", "miami / dade", "housing", "real estate - by broker"])}
|
129
|
+
its(:header) {should eq("$1149000 / 3br - 2000ft² - ✱✱✱BEAUTIFUL HOUSE FOR SALE IN FLORIDA KEYS (Florida Key Islamorada)")}
|
130
|
+
its(:label) {should eq("$1149000 / 3br - 2000ft² - ✱✱✱BEAUTIFUL HOUSE FOR SALE IN FLORIDA KEYS")}
|
131
|
+
its(:title) {should eq("✱✱✱BEAUTIFUL HOUSE FOR SALE IN FLORIDA KEYS ")}
|
132
|
+
its(:location) {should eq('Florida Key Islamorada')}
|
133
|
+
its(:posting_id) {should eq(3438004368)}
|
134
|
+
its(:reply_to) {should eq('p7h8m-3438004368@hous.craigslist.org')}
|
135
|
+
its(:post_time) {should eq(DateTime.parse('2012-12-05 12:46:00 -0500'))}
|
136
|
+
its(:price) {should eq(1149000)}
|
137
|
+
its(:images) {should eq([])}
|
138
|
+
its(:pics) {should eq(["http://images.craigslist.org/3M43Jb3ld5L55Z35M5cbr12a6ec99f72d18e2.jpg", "http://images.craigslist.org/3L73H63l45I55L35G4cbr8902484988f3112f.jpg", "http://images.craigslist.org/3Le3Ic3Hf5I75La5M1cbrdd1617f48d4c1f02.jpg"])}
|
139
|
+
its(:img_types) {should eq([:pic])}
|
140
|
+
its(:contents_as_plain) {should eq("\u0095 $1,149,000.00 \n\u0095 2000ft²\n\u0095 3-bedroom\n\u0095 3-full bath\nJUST REDUCED FOR A QUICK SALE!!!!\nThis great Three Story 3-bedroom 3 Full bath home in Islamorada, Florida Keys is the perfect get-away to relax, enjoy the fresh breezes, the sandy beach and watch spectacular sunsets. A spacious master suite bedroom upstairs has a private bath with whirlpool Jacuzzi tub and private patio. Downstairs you have access to full kitchen custom cabinets, granite countertops, stainless steel appliances, open living-dining room, Travertine marble throughout the whole house, two bedrooms and two full baths. Enjoy a concrete dock and davits with boat access to the Gulf and ocean in less than five minutes via the deep-water canal right outside your door. Enjoy access to a private community beach with picnic tables and tiki huts for fun barbecues; jet skiing; boat ramp and recreational boating and water skiing. Don't miss out on these .Located in a quiet neighborhood at Mile Marker 74 in Islamorada, this location is tastefully furnished, has a gourmet kitchen and is ideal for boaters.\n(hablamos español) \nCall for appointment 305.467.6348 / 786.484.0917\nMarisol Acosta\nLicensed, Realtor Associate\nAkoya Realty LLC\nwww.akoyarealty.com") }
|
141
|
+
its(:contents) {should eq("\u0095 $1,149,000.00 <br>\n\u0095 2000ft²<br>\n\u0095 3-bedroom<br>\n\u0095 3-full bath<br><br>\nJUST REDUCED FOR A QUICK SALE!!!!<br><br>\nThis great Three Story 3-bedroom 3 Full bath home in Islamorada, Florida Keys is the perfect get-away to relax, enjoy the fresh breezes, the sandy beach and watch spectacular sunsets. A spacious master suite bedroom upstairs has a private bath with whirlpool Jacuzzi tub and private patio. Downstairs you have access to full kitchen custom cabinets, granite countertops, stainless steel appliances, open living-dining room, Travertine marble throughout the whole house, two bedrooms and two full baths. Enjoy a concrete dock and davits with boat access to the Gulf and ocean in less than five minutes via the deep-water canal right outside your door. Enjoy access to a private community beach with picnic tables and tiki huts for fun barbecues; jet skiing; boat ramp and recreational boating and water skiing. Don't miss out on these .Located in a quiet neighborhood at Mile Marker 74 in Islamorada, this location is tastefully furnished, has a gourmet kitchen and is ideal for boaters.<br><br>\n(hablamos español) <br><br>\nCall for appointment 305.467.6348 / 786.484.0917<br>\nMarisol Acosta<br>\nLicensed, Realtor Associate<br>\nAkoya Realty LLC<br>\nwww.akoyarealty.com") }
|
142
|
+
end
|
143
|
+
|
144
|
+
context "posting_sfbay_1226.html" do
|
145
|
+
# This example was picked since it has pics
|
146
|
+
subject{ described_class.new uri_for('posting_sfbay_1226.html') }
|
147
|
+
|
148
|
+
its(:full_section) {should eq(["SF bay area craigslist", "east bay", "community", "lost & found"])}
|
149
|
+
its(:header) {should eq("LOST CAT - black SH neutered M - STAR (lafayette / orinda / moraga)")}
|
150
|
+
its(:label) {should eq("LOST CAT - black SH neutered M - STAR")}
|
151
|
+
its(:title) {should eq("LOST CAT - black SH neutered M - STAR")}
|
152
|
+
its(:location) {should eq('lafayette / orinda / moraga')}
|
153
|
+
its(:posting_id) {should eq(3456070558)}
|
154
|
+
its(:reply_to) {should eq('qnggz-3456070558@comm.craigslist.org')}
|
155
|
+
its(:post_time) {should eq(DateTime.parse('2012-12-26 17:41:00 PST'))}
|
156
|
+
end
|
157
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -476,8 +476,8 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
|
|
476
476
|
%w(
|
477
477
|
jacksonville panamacity orlando fortmyers keys tallahassee ocala gainesville tampa
|
478
478
|
pensacola daytona treasure sarasota staugustine spacecoast lakeland newyork
|
479
|
-
).collect{|p| "#{p}.craigslist.org"},
|
480
|
-
CraigScrape::GeoListings.find_sites( ["us/fl","-us/fl/miami", "+ newyork.craigslist.org"], hier_dir)
|
479
|
+
).collect{|p| "#{p}.craigslist.org"}.sort,
|
480
|
+
CraigScrape::GeoListings.find_sites( ["us/fl","-us/fl/miami", "+ newyork.craigslist.org"], hier_dir).sort
|
481
481
|
)
|
482
482
|
|
483
483
|
assert_equal(
|
@@ -510,12 +510,12 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
|
|
510
510
|
porthuron greensboro orangecounty fargo ogden charlotte allentown joplin chautauqua lakecharles omaha
|
511
511
|
springfieldil roswell montana killeen milwaukee nd williamsport columbia racine southcoast ames huntington
|
512
512
|
cincinnati auburn miami
|
513
|
-
).collect{|p| "#{p}.craigslist.org"},
|
513
|
+
).collect{|p| "#{p}.craigslist.org"}.sort,
|
514
514
|
CraigScrape::GeoListings.find_sites(
|
515
515
|
["us","- us/fl", "+ us/fl/miami", ' -jacksonville.craigslist.org'], hier_dir
|
516
|
-
)
|
516
|
+
).sort
|
517
517
|
)
|
518
518
|
|
519
519
|
end
|
520
520
|
|
521
|
-
end
|
521
|
+
end
|
@@ -113,8 +113,8 @@ class CraigslistListingTest < Test::Unit::TestCase
|
|
113
113
|
assert_equal "Husqvarna Viking Rose Embroidery-Sewing Machine - $400", fortmyers_art_index600_060909.posts[0].label
|
114
114
|
assert_equal "Punta Gorda, Charlotte County", fortmyers_art_index600_060909.posts[0].location
|
115
115
|
assert_equal [], fortmyers_art_index600_060909.posts[0].pics
|
116
|
-
assert_equal
|
117
|
-
assert_equal
|
116
|
+
assert_equal Date.parse('2008/10/28'), fortmyers_art_index600_060909.posts[0].post_date
|
117
|
+
assert_equal DateTime.parse('2008-10-28T21:51:00-04:00'), fortmyers_art_index600_060909.posts[0].post_time
|
118
118
|
assert_equal 897549505, fortmyers_art_index600_060909.posts[0].posting_id
|
119
119
|
assert_equal 400.0, fortmyers_art_index600_060909.posts[0].price
|
120
120
|
assert_equal nil, fortmyers_art_index600_060909.posts[0].reply_to
|
@@ -139,8 +139,8 @@ class CraigslistListingTest < Test::Unit::TestCase
|
|
139
139
|
assert_equal "ARTISTS' MOVING SALE-BAYSHORE", fortmyers_art_index600_060909.posts[1].label
|
140
140
|
assert_equal "Naples", fortmyers_art_index600_060909.posts[1].location
|
141
141
|
assert_equal [], fortmyers_art_index600_060909.posts[1].pics
|
142
|
-
assert_equal
|
143
|
-
assert_equal
|
142
|
+
assert_equal Date.parse('2008/10/24'), fortmyers_art_index600_060909.posts[1].post_date
|
143
|
+
assert_equal DateTime.parse('2008-10-24T09:31:00-04:00'), fortmyers_art_index600_060909.posts[1].post_time
|
144
144
|
assert_equal 891513957, fortmyers_art_index600_060909.posts[1].posting_id
|
145
145
|
assert_equal nil, fortmyers_art_index600_060909.posts[1].price
|
146
146
|
assert_equal "sale-891513957@craigslist.org", fortmyers_art_index600_060909.posts[1].reply_to
|
@@ -165,8 +165,8 @@ class CraigslistListingTest < Test::Unit::TestCase
|
|
165
165
|
assert_equal "tapestry sewing machine and embroidery arm luggage - $250", fortmyers_art_index600_060909.posts[2].label
|
166
166
|
assert_equal "Punta Gorda", fortmyers_art_index600_060909.posts[2].location
|
167
167
|
assert_equal [], fortmyers_art_index600_060909.posts[2].pics
|
168
|
-
assert_equal
|
169
|
-
assert_equal
|
168
|
+
assert_equal Date.parse('2008/09/03'), fortmyers_art_index600_060909.posts[2].post_date
|
169
|
+
assert_equal DateTime.parse('2008-09-03T15:31:00-04:00'), fortmyers_art_index600_060909.posts[2].post_time
|
170
170
|
assert_equal 825684735, fortmyers_art_index600_060909.posts[2].posting_id
|
171
171
|
assert_equal 250.0, fortmyers_art_index600_060909.posts[2].price
|
172
172
|
assert_equal "sale-825684735@craigslist.org", fortmyers_art_index600_060909.posts[2].reply_to
|
@@ -191,8 +191,8 @@ class CraigslistListingTest < Test::Unit::TestCase
|
|
191
191
|
assert_equal "Cast Glass Sculpture - Aurora - $2400", fortmyers_art_index600_060909.posts[3].label
|
192
192
|
assert_equal "Naples", fortmyers_art_index600_060909.posts[3].location
|
193
193
|
assert_equal [], fortmyers_art_index600_060909.posts[3].pics
|
194
|
-
assert_equal
|
195
|
-
assert_equal
|
194
|
+
assert_equal Date.parse('2008/09/02'), fortmyers_art_index600_060909.posts[3].post_date
|
195
|
+
assert_equal DateTime.parse('2008-09-02T10:35:00-04:00'), fortmyers_art_index600_060909.posts[3].post_time
|
196
196
|
assert_equal 823516079, fortmyers_art_index600_060909.posts[3].posting_id
|
197
197
|
assert_equal 2400.0, fortmyers_art_index600_060909.posts[3].price
|
198
198
|
assert_equal "sale-823516079@craigslist.org", fortmyers_art_index600_060909.posts[3].reply_to
|
@@ -210,47 +210,47 @@ class CraigslistListingTest < Test::Unit::TestCase
|
|
210
210
|
|
211
211
|
mia_search_kitten031510 = CraigScrape::Listings.new relative_uri_for('listing_samples/mia_search_kitten.3.15.10.html')
|
212
212
|
assert_equal "Adopt a 7 month on kitten- $75", mia_search_kitten031510.posts[0].label
|
213
|
-
assert_equal
|
213
|
+
assert_equal Date.parse('03/15'), mia_search_kitten031510.posts[0].post_date
|
214
214
|
assert_equal "Adorable Kitten! Free!!!", mia_search_kitten031510.posts[1].label
|
215
|
-
assert_equal
|
215
|
+
assert_equal Date.parse('03/15'), mia_search_kitten031510.posts[1].post_date
|
216
216
|
assert_equal "KITTENS,5 months, 1 Russian blue, 1 grey & white,vac spy/neu,$35fee ea", mia_search_kitten031510.posts[2].label
|
217
|
-
assert_equal
|
217
|
+
assert_equal Date.parse('3/13'), mia_search_kitten031510.posts[2].post_date
|
218
218
|
assert_equal "Kitties need a good home", mia_search_kitten031510.posts[3].label
|
219
|
-
assert_equal
|
219
|
+
assert_equal Date.parse('3/13'), mia_search_kitten031510.posts[3].post_date
|
220
220
|
assert_equal "7 week old kittens for adoption", mia_search_kitten031510.posts[4].label
|
221
|
-
assert_equal
|
221
|
+
assert_equal Date.parse('3/13'), mia_search_kitten031510.posts[4].post_date
|
222
222
|
assert_equal "Adorable Orange Kitten Free to Good Home", mia_search_kitten031510.posts[5].label
|
223
|
-
assert_equal
|
223
|
+
assert_equal Date.parse('3/12'), mia_search_kitten031510.posts[5].post_date
|
224
224
|
assert_equal "7 month old kitten free to good home", mia_search_kitten031510.posts[6].label
|
225
|
-
assert_equal
|
225
|
+
assert_equal Date.parse('3/12'), mia_search_kitten031510.posts[6].post_date
|
226
226
|
assert_equal "FEMALE KITTEN FOR GOOD HOME", mia_search_kitten031510.posts[7].label
|
227
|
-
assert_equal
|
227
|
+
assert_equal Date.parse('3/9'), mia_search_kitten031510.posts[7].post_date
|
228
228
|
assert_equal "Kitten", mia_search_kitten031510.posts[8].label
|
229
|
-
assert_equal
|
229
|
+
assert_equal Date.parse('3/4'), mia_search_kitten031510.posts[8].post_date
|
230
230
|
assert_equal "Kitties need a good home", mia_search_kitten031510.posts[9].label
|
231
|
-
assert_equal
|
231
|
+
assert_equal Date.parse('3/4'), mia_search_kitten031510.posts[9].post_date
|
232
232
|
assert_equal "Persain Cat And Tabby Cat", mia_search_kitten031510.posts[10].label
|
233
|
-
assert_equal
|
233
|
+
assert_equal Date.parse('3/1'), mia_search_kitten031510.posts[10].post_date
|
234
234
|
assert_equal "Tabby female kitten in a parking lot needs your help", mia_search_kitten031510.posts[11].label
|
235
|
-
assert_equal
|
235
|
+
assert_equal Date.parse('2/23'), mia_search_kitten031510.posts[11].post_date
|
236
236
|
assert_equal "Spring is almost officially here, grow your family, adopt a kitty!", mia_search_kitten031510.posts[12].label
|
237
|
-
assert_equal
|
237
|
+
assert_equal Date.parse('2/22'), mia_search_kitten031510.posts[12].post_date
|
238
238
|
assert_equal "Many adorable kittens for adoption!", mia_search_kitten031510.posts[13].label
|
239
|
-
assert_equal
|
239
|
+
assert_equal Date.parse('2/22'), mia_search_kitten031510.posts[13].post_date
|
240
240
|
assert_equal "2 free cats/kitten to good home", mia_search_kitten031510.posts[14].label
|
241
|
-
assert_equal
|
241
|
+
assert_equal Date.parse('2/19'), mia_search_kitten031510.posts[14].post_date
|
242
242
|
assert_equal "BEAUTIFUL KITTENS", mia_search_kitten031510.posts[15].label
|
243
|
-
assert_equal
|
243
|
+
assert_equal Date.parse('2/19'), mia_search_kitten031510.posts[15].post_date
|
244
244
|
assert_equal "MANY new adorable kittens for good homes!!!", mia_search_kitten031510.posts[16].label
|
245
|
-
assert_equal
|
245
|
+
assert_equal Date.parse('2/18'), mia_search_kitten031510.posts[16].post_date
|
246
246
|
assert_equal "Kitten living in a parking lot needs your help", mia_search_kitten031510.posts[17].label
|
247
|
-
assert_equal
|
247
|
+
assert_equal Date.parse('2/16'), mia_search_kitten031510.posts[17].post_date
|
248
248
|
assert_equal "BEAUTIFUL 8 WEEK KITTENS", mia_search_kitten031510.posts[18].label
|
249
|
-
assert_equal
|
249
|
+
assert_equal Date.parse('2/16'), mia_search_kitten031510.posts[18].post_date
|
250
250
|
assert_equal "ORANGE TABBY KITTEN", mia_search_kitten031510.posts[19].label
|
251
|
-
assert_equal
|
251
|
+
assert_equal Date.parse('2/13'), mia_search_kitten031510.posts[19].post_date
|
252
252
|
assert_equal "Lots of kittens to choose from! Pics!!", mia_search_kitten031510.posts[20].label
|
253
|
-
assert_equal
|
253
|
+
assert_equal Date.parse('2/13'), mia_search_kitten031510.posts[20].post_date
|
254
254
|
|
255
255
|
end
|
256
256
|
|
@@ -359,4 +359,4 @@ class CraigslistListingTest < Test::Unit::TestCase
|
|
359
359
|
assert_equal "Denim Fabric Blocks for Crafts", new_listing_span051710.posts[99].label
|
360
360
|
end
|
361
361
|
|
362
|
-
end
|
362
|
+
end
|