libcraigscrape 0.6.5 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +17 -0
- data/Rakefile +1 -1
- data/bin/craigwatch +10 -10
- data/bin/report_mailer/craigslist_report.html.erb +2 -2
- data/bin/report_mailer/craigslist_report.plain.erb +2 -2
- data/lib/libcraigscrape.rb +585 -342
- data/test/geolisting_samples/geo_listing_ca070209.html +76 -0
- data/test/geolisting_samples/geo_listing_ca_sk070209.html +31 -0
- data/test/geolisting_samples/geo_listing_cn070209.html +35 -0
- data/test/geolisting_samples/geo_listing_us070209.html +355 -0
- data/test/libcraigscrape_test_helpers.rb +31 -0
- data/test/listing_samples/fortmyers_art_index.060909/1046596324.html +93 -0
- data/test/listing_samples/fortmyers_art_index.060909/1053085283.html +92 -0
- data/test/listing_samples/fortmyers_art_index.060909/1112522674.html +89 -0
- data/test/listing_samples/fortmyers_art_index.060909/823516079.html +92 -0
- data/test/listing_samples/fortmyers_art_index.060909/825684735.html +89 -0
- data/test/listing_samples/fortmyers_art_index.060909/891513957.html +94 -0
- data/test/listing_samples/fortmyers_art_index.060909/897549505.html +99 -0
- data/test/listing_samples/fortmyers_art_index.060909/960826026.html +89 -0
- data/test/listing_samples/fortmyers_art_index.060909/993256300.html +89 -0
- data/test/listing_samples/fortmyers_art_index.060909/fortmyers_art_index500.060909.html +237 -0
- data/test/listing_samples/fortmyers_art_index.060909/fortmyers_art_index600.060909.html +132 -0
- data/test/listing_samples/miami_search_sss_rack.6.18.09/miami_search_sss_rack1000.6.18.09.html +144 -0
- data/test/listing_samples/miami_search_sss_rack.6.18.09/miami_search_sss_rack900.6.18.09.html +146 -0
- data/test/post_samples/brw_reb_1224008903.html +101 -0
- data/test/post_samples/sfbay_art_1223614914.html +94 -0
- data/test/test_craigslist_geolisting.rb +425 -0
- data/test/test_craigslist_listing.rb +179 -260
- data/test/test_craigslist_posting.rb +306 -0
- metadata +29 -2
@@ -1,300 +1,219 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
2
|
|
3
|
-
require File.dirname(__FILE__)+'/../lib/libcraigscrape'
|
4
3
|
require 'test/unit'
|
4
|
+
require File.dirname(__FILE__)+'/../lib/libcraigscrape'
|
5
|
+
require File.dirname(__FILE__)+'/libcraigscrape_test_helpers'
|
5
6
|
|
6
7
|
class CraigslistListingTest < Test::Unit::TestCase
|
7
|
-
|
8
|
+
include LibcraigscrapeTestHelpers
|
9
|
+
|
8
10
|
def test_pukes
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
assert_raise(CraigScrape::ParseError){ CraigScrape::Listings.new google }
|
13
|
-
assert_raise(CraigScrape::ParseError){ CraigScrape::PostFull.new google }
|
14
|
-
end
|
15
|
-
|
16
|
-
def test_listing_parse
|
17
|
-
search_html_one = <<EOD
|
18
|
-
<p> Apr 18 - <a href="/brw/reb/1128608404.html">Losing your house? You'll need this New Loan Mod Video -</a><font size="-1"> (W. Woodland)</font> <span class="p"> img</span> <<<i><a href="/reb/">real estate - by broker</a></i></p>
|
19
|
-
EOD
|
20
|
-
search_html_two = <<EOD
|
21
|
-
<p> Jan 4 - <a href="/mdc/reb/1128609783.html">$348000 / 1br - Large 1/1 plus office on 49th Floor. 5-Star NEW Condo. Great Views -</a><font size="-1"> (Miami)</font> <span class="p"> pic img</span> <<<i><a href="/reb/">real estate - by broker</a></i></p>
|
22
|
-
EOD
|
23
|
-
search_html_three = <<EOD
|
24
|
-
<p> Dec 31 - <a href="/mdc/reb/1128520894.html">$22,000 HOME -ADULT COMMUNITY BOYNTON BEACH -</a> <span class="p"> pic</span> <<<i><a href="/reb/">real estate - by broker</a></i></p>
|
25
|
-
EOD
|
26
|
-
search_html_four = <<EOD
|
27
|
-
<p> Jul 22 - <a href="/mdc/reb/1128474725.html">$325000 / 3br - GOOD DEAL GREAT HOUSE AND LOCATION -</a><font size="-1"> (CORAL GABLES)</font> <<<i><a href="/reb/">real estate - by broker</a></i></p>
|
28
|
-
EOD
|
29
|
-
search_html_five = <<EOD
|
30
|
-
<p> Apr 9 - <a href="/pbc/boa/1115308178.html">40' SILVERTON CONVERTIBLE DIESEL - $105000 -</a><font size="-1"> (HOBE SOUND)</font> <span class="p"> pic</span></p>
|
31
|
-
EOD
|
32
|
-
category_listing_one = <<EOD
|
33
|
-
<p><a href="/pbc/reb/1128661387.html">$2995000 / 5br - Downtown Boca New Home To Be Built -</a><font size="-1"> (Boca Raton)</font> <span class="p"> pic</span> <<<i><a href="/reb/">real estate - by broker</a></i></p>
|
34
|
-
EOD
|
35
|
-
category_listing_two = <<EOD
|
36
|
-
<p><a href="/mdc/jwl/1128691192.html">925 Sterling Silver Dragonfly Charm Bracelet - $25 -</a> <span class="p"> img</span></p>
|
37
|
-
EOD
|
38
|
-
|
39
|
-
one = CraigScrape::PostSummary.new Hpricot.parse(search_html_one).at('p')
|
40
|
-
assert_equal true, one.has_img?
|
41
|
-
assert_equal false, one.has_pic?
|
42
|
-
assert_equal true, one.has_pic_or_img?
|
43
|
-
assert_equal '/brw/reb/1128608404.html', one.href
|
44
|
-
assert_equal "Losing your house? You'll need this New Loan Mod Video", one.label
|
45
|
-
assert_equal "real\302\240estate - by broker", one.section
|
46
|
-
assert_equal "W. Woodland", one.location
|
47
|
-
assert_equal 4, one.date.month
|
48
|
-
assert_equal 18, one.date.day
|
49
|
-
assert_equal nil, one.price
|
50
|
-
|
51
|
-
two = CraigScrape::PostSummary.new Hpricot.parse(search_html_two).at('p')
|
52
|
-
assert_equal true, two.has_img?
|
53
|
-
assert_equal true, two.has_pic?
|
54
|
-
assert_equal true, two.has_pic_or_img?
|
55
|
-
assert_equal '/mdc/reb/1128609783.html', two.href
|
56
|
-
assert_equal "$348000 / 1br - Large 1/1 plus office on 49th Floor. 5-Star NEW Condo. Great Views", two.label
|
57
|
-
assert_equal "real\302\240estate - by broker", two.section
|
58
|
-
assert_equal "Miami", two.location
|
59
|
-
assert_equal 1, two.date.month
|
60
|
-
assert_equal 4, two.date.day
|
61
|
-
assert_equal 348000.0, two.price
|
62
|
-
|
63
|
-
three = CraigScrape::PostSummary.new Hpricot.parse(search_html_three).at('p')
|
64
|
-
assert_equal false, three.has_img?
|
65
|
-
assert_equal true, three.has_pic?
|
66
|
-
assert_equal true, three.has_pic_or_img?
|
67
|
-
assert_equal '/mdc/reb/1128520894.html', three.href
|
68
|
-
assert_equal "$22,000 HOME -ADULT COMMUNITY BOYNTON BEACH", three.label
|
69
|
-
assert_equal "real\302\240estate - by broker", three.section
|
70
|
-
assert_equal nil, three.location
|
71
|
-
assert_equal 12, three.date.month
|
72
|
-
assert_equal 31, three.date.day
|
73
|
-
assert_equal 22.0, three.price
|
74
|
-
|
75
|
-
four = CraigScrape::PostSummary.new Hpricot.parse(search_html_four).at('p')
|
76
|
-
assert_equal false, four.has_img?
|
77
|
-
assert_equal false, four.has_pic?
|
78
|
-
assert_equal false, four.has_pic_or_img?
|
79
|
-
assert_equal '/mdc/reb/1128474725.html', four.href
|
80
|
-
assert_equal "$325000 / 3br - GOOD DEAL GREAT HOUSE AND LOCATION", four.label
|
81
|
-
assert_equal "real\302\240estate - by broker", four.section
|
82
|
-
assert_equal "CORAL GABLES", four.location
|
83
|
-
assert_equal 7, four.date.month
|
84
|
-
assert_equal 22, four.date.day
|
85
|
-
assert_equal 325000.0, four.price
|
86
|
-
|
87
|
-
five = CraigScrape::PostSummary.new Hpricot.parse(search_html_five).at('p')
|
88
|
-
assert_equal false, five.has_img?
|
89
|
-
assert_equal true, five.has_pic?
|
90
|
-
assert_equal true, five.has_pic_or_img?
|
91
|
-
assert_equal '/pbc/boa/1115308178.html', five.href
|
92
|
-
assert_equal "40' SILVERTON CONVERTIBLE DIESEL - $105000", five.label
|
93
|
-
assert_equal nil, five.section
|
94
|
-
assert_equal "HOBE SOUND", five.location
|
95
|
-
assert_equal 4, five.date.month
|
96
|
-
assert_equal 9, five.date.day
|
97
|
-
assert_equal 105000.0, five.price
|
98
|
-
|
99
|
-
five = CraigScrape::PostSummary.new Hpricot.parse(category_listing_one).at('p')
|
100
|
-
assert_equal false, five.has_img?
|
101
|
-
assert_equal true, five.has_pic?
|
102
|
-
assert_equal true, five.has_pic_or_img?
|
103
|
-
assert_equal '/pbc/reb/1128661387.html', five.href
|
104
|
-
assert_equal "$2995000 / 5br - Downtown Boca New Home To Be Built", five.label
|
105
|
-
assert_equal "real\302\240estate - by broker", five.section
|
106
|
-
assert_equal "Boca Raton", five.location
|
107
|
-
assert_equal nil, five.date
|
108
|
-
assert_equal 2995000.0, five.price
|
109
|
-
|
110
|
-
six = CraigScrape::PostSummary.new Hpricot.parse(category_listing_two).at('p')
|
111
|
-
assert_equal true, six.has_img?
|
112
|
-
assert_equal false, six.has_pic?
|
113
|
-
assert_equal true, six.has_pic_or_img?
|
114
|
-
assert_equal '/mdc/jwl/1128691192.html', six.href
|
115
|
-
assert_equal "925 Sterling Silver Dragonfly Charm Bracelet - $25", six.label
|
116
|
-
assert_equal nil, six.section
|
117
|
-
assert_equal nil, six.location
|
118
|
-
assert_equal nil, six.date
|
119
|
-
assert_equal 25.0, six.price
|
11
|
+
assert_raise(CraigScrape::Scraper::ParseError) do
|
12
|
+
CraigScrape::Listings.new( relative_uri_for('google.html') ).posts
|
13
|
+
end
|
120
14
|
end
|
121
15
|
|
122
16
|
def test_listings_parse
|
123
|
-
category = CraigScrape.
|
17
|
+
category = CraigScrape::Listings.new relative_uri_for('listing_samples/category_output.html')
|
124
18
|
assert_equal 'index100.html', category.next_page_href
|
125
19
|
assert_equal 100, category.posts.length
|
20
|
+
|
126
21
|
category.posts[0..80].each do |l|
|
127
|
-
assert_equal 4, l.
|
128
|
-
assert_equal 18, l.
|
22
|
+
assert_equal 4, l.post_date.month
|
23
|
+
assert_equal 18, l.post_date.day
|
129
24
|
end
|
130
25
|
|
131
|
-
category2 = CraigScrape.
|
26
|
+
category2 = CraigScrape::Listings.new relative_uri_for('listing_samples/category_output_2.html')
|
132
27
|
assert_equal 'index900.html', category2.next_page_href
|
133
28
|
assert_equal 100, category2.posts.length
|
134
29
|
|
135
|
-
long_search = CraigScrape.
|
30
|
+
long_search = CraigScrape::Listings.new relative_uri_for('listing_samples/long_search_output.html')
|
136
31
|
assert_equal '/search/rea?query=house&minAsk=min&maxAsk=max&bedrooms=&s=800', long_search.next_page_href
|
137
32
|
assert_equal 100, long_search.posts.length
|
138
33
|
|
139
|
-
short_search = CraigScrape.
|
34
|
+
short_search = CraigScrape::Listings.new relative_uri_for('listing_samples/short_search_output.html')
|
140
35
|
assert_equal nil, short_search.next_page_href
|
141
36
|
assert_equal 93, short_search.posts.length
|
142
37
|
|
143
|
-
mia_fua_index8900_052109 = CraigScrape.
|
38
|
+
mia_fua_index8900_052109 = CraigScrape::Listings.new relative_uri_for('listing_samples/mia_fua_index8900.5.21.09.html')
|
144
39
|
assert_equal 'index9000.html', mia_fua_index8900_052109.next_page_href
|
145
40
|
assert_equal 100, mia_fua_index8900_052109.posts.length
|
41
|
+
# NOTE: This tests a subtle condition where there's a blank h4 tag, and we shouldn't need to eager-load,. since a solid inference can be made on the date, since its not the last h4 on the page
|
42
|
+
# This actually happens quite a bit...
|
146
43
|
mia_fua_index8900_052109.posts[0..13].each do |l|
|
147
|
-
assert_equal 5, l.
|
148
|
-
assert_equal 15, l.
|
44
|
+
assert_equal 5, l.post_date.month
|
45
|
+
assert_equal 15, l.post_date.day
|
149
46
|
end
|
150
47
|
mia_fua_index8900_052109.posts[14..99].each do |l|
|
151
|
-
assert_equal 5, l.
|
152
|
-
assert_equal 14, l.
|
48
|
+
assert_equal 5, l.post_date.month
|
49
|
+
assert_equal 14, l.post_date.day
|
153
50
|
end
|
154
51
|
|
155
|
-
empty_listings = CraigScrape.
|
52
|
+
empty_listings = CraigScrape::Listings.new relative_uri_for('listing_samples/empty_listings.html')
|
156
53
|
assert_equal nil, empty_listings.next_page_href
|
157
54
|
assert_equal [], empty_listings.posts
|
158
55
|
end
|
159
|
-
|
160
|
-
def test_posting_parse
|
161
|
-
posting0 = CraigScrape.scrape_full_post relative_uri_for('post_samples/posting0.html')
|
162
|
-
assert_equal "Has storage for videos/dvds. About 2 ft high by 21/2 ft widw. Almond/light beige color", posting0.contents
|
163
|
-
assert_equal ["south florida craigslist", "miami / dade", "furniture - by owner"], posting0.full_section
|
164
|
-
assert_equal "tv cart on wheels - $35 (NMB)", posting0.header
|
165
|
-
assert_equal "tv cart on wheels", posting0.title
|
166
|
-
assert_equal "NMB", posting0.location
|
167
|
-
assert_equal 1131363612, posting0.posting_id
|
168
|
-
assert_equal "sale-ktf9w-1131363612@craigslist.org", posting0.reply_to
|
169
|
-
assert_equal [0, 21, 13, 20, 4, 2009, 1, 110, true, "EDT"], posting0.post_time.to_a
|
170
|
-
assert_equal [], posting0.images
|
171
|
-
assert_equal "Has storage for videos/dvds. About 2 ft high by 21/2 ft widw. Almond/light beige color",posting0.contents_as_plain
|
172
|
-
assert_equal 35.0, posting0.price
|
173
|
-
|
174
|
-
posting1 = CraigScrape.scrape_full_post relative_uri_for('post_samples/posting1.html')
|
175
|
-
assert_equal "Residential income property\227Investors this property is for you! This duplex has a 2bedroom/1bath unit on each side. It features updated kitchens and baths (new tubs, toilet, sink, vanities), ceramic tile flooring throughout, separate water and electric meters and on site laundry facilities. It is also closed to the Galleria, beaches and downtown Fort Lauderdale! \r<br />\n\r<br />\nJe parle le Fran\347ais\r<br />\n\r<br />\nThis property is being offered by Blaunch Perrier, Broker Associate, Atlantic Properties International. Blaunch can be reached at 954-593-0077. For additional property information you may also visit www.garylanham.com\r<br />\n\r<br />", posting1.contents
|
176
|
-
assert_equal ["south florida craigslist", "broward county", "real estate - by broker"], posting1.full_section
|
177
|
-
assert_equal "$189900 / 4br - Investment Property--Duplex in Fort Lauderdale", posting1.header
|
178
|
-
assert_equal "Investment Property--Duplex in Fort Lauderdale", posting1.title
|
179
|
-
assert_equal '1000 NE 14th Pl', posting1.location
|
180
|
-
assert_equal 1131242195, posting1.posting_id
|
181
|
-
assert_equal "hous-5nzhq-1131242195@craigslist.org", posting1.reply_to
|
182
|
-
assert_equal [0, 33, 13, 20, 4, 2009, 1, 110, true, "EDT"], posting1.post_time.to_a
|
183
|
-
assert_equal %w(http://images.craigslist.org/3n83o33l5ZZZZZZZZZ94k913ac1582d4b1fa4.jpg http://images.craigslist.org/3n93p63obZZZZZZZZZ94k19d5e32eb3b610c2.jpg http://images.craigslist.org/3n93m03l6ZZZZZZZZZ94k6e9785e37a1b1f3f.jpg http://images.craigslist.org/3ma3oc3l4ZZZZZZZZZ94kbfecbcd2fb2e19cc.jpg), posting1.images
|
184
|
-
assert_equal "Residential income property\227Investors this property is for you! This duplex has a 2bedroom/1bath unit on each side. It features updated kitchens and baths (new tubs, toilet, sink, vanities), ceramic tile flooring throughout, separate water and electric meters and on site laundry facilities. It is also closed to the Galleria, beaches and downtown Fort Lauderdale! \r\n\r\nJe parle le Fran\347ais\r\n\r\nThis property is being offered by Blaunch Perrier, Broker Associate, Atlantic Properties International. Blaunch can be reached at 954-593-0077. For additional property information you may also visit www.garylanham.com\r\n\r", posting1.contents_as_plain
|
185
|
-
assert_equal 189900.0, posting1.price
|
186
|
-
|
187
|
-
posting2 = CraigScrape.scrape_full_post relative_uri_for('post_samples/posting2.html')
|
188
|
-
assert_equal 15775, posting2.contents.length # This is easy, and probably fine enough
|
189
|
-
assert_equal ["south florida craigslist", "broward county", "cars & trucks - by dealer"], posting2.full_section
|
190
|
-
assert_equal "PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEE - $23975 (Fort Lauderdale)", posting2.header
|
191
|
-
assert_equal "PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEE", posting2.title
|
192
|
-
assert_equal 'Fort Lauderdale', posting2.location
|
193
|
-
assert_equal 1127037648, posting2.posting_id
|
194
|
-
assert_equal nil, posting2.reply_to
|
195
|
-
assert_equal [0, 16, 14, 17, 4, 2009, 5, 107, true, "EDT"], posting2.post_time.to_a
|
196
|
-
assert_equal [], posting2.images
|
197
|
-
assert_equal "\302\240 Sheehan Buick Pontiac GMC \302\240 Pompano Beach, FL(754) 224-3257 \302\240PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!2002 Chevrolet Corvette Z06 Florida Driven AutoCheck Certified 5.7L V8 6sp2 Door Coupe.\302\240Price: \302\240 $23,975Exterior:Electron Blue MetallicInterior:BlackStock#:P5110AVIN:1G1YY12S625129021FREE AutoCheck Vehicle ReportMileage:63,560Transmission:6 Speed ManualEngine:V8 5.7L OHVWarranty:Limited WarrantyTitle:Clear\302\273\302\240View All 58 Photos\302\273\302\240View Full Vehicle Details\302\273\302\240Ask the Seller a Question\302\273\302\240E-mail this to a Friend\302\240 DescriptionPRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!\r\n\r\nLOADED WITH BLACK LEATHER BUCKET SEATS, POWER DRIVERS SEAT, DUAL ZONE CLIMATE CONTROL, 4 WHEEL ABS BRAKES, POWER STEERING AND BRAKES, REAR LIMITED SLIP DIFFERENTIAL, STABILITY CONTROL, CRUISE CONTROL, TLT STEERING WHEEL, POWER WINDOWS AND LOCKS, AUTOMATIC ON/OFF HEADLAMPS, FOG LIGHTS, DUAL AIR BAG SAFETY, AM/FM STEREO CD PLAYER, INTERMITTENT WINDSHIELD WIPERS AND SO MUCH MORE - THIS CAR IS TOTALLY HOT WITH GREAT LOW MILES!\r\n\r\nPlease call us to make your deal now at 1-888-453-5244. Please visit our Website at www.sheehanautoplex.com ***View 50+ Pictures of this vehicle - a complete description including standard features and all added options & a FREE AUTO CHECK REPORT at www.sheehanautoplex.com. ***Financing for Everyone - Good credit - bad credit - divorce - charge off's - NO PROBLEM. To complete a secure credit application, please visit our website at www.sheehanautoplex.com ***The largest Dealer in the State of Florida - We export all over the world - For details please visit www.sheehanautoplex.com ***Sheehan Autoplex takes great pride in our outstanding customer service and has been recognized by the following associations - BBB (Better Business Bureau) - NIADA - and the FIADA. Call us to get your best deal. CALL NOW. 1-888-453-5244\302\240 Contact Sheehan Buick Pontiac GMCPhone:(754) 224-3257Fax:(954) 781-9050Phone:(754) 224-3257E-mail:sales@proauto.comBusiness HoursWeekdays:9:00 AM to 9:00 PMSat:9:00 AM to 6:00 PMSun:",posting2.contents_as_plain
|
198
|
-
assert_equal 23975.0, posting2.price
|
199
|
-
|
200
|
-
posting3 = CraigScrape.scrape_full_post relative_uri_for('post_samples/posting3.html')
|
201
|
-
assert_equal "1992 Twin Turbo 300ZX. This car is pearl white outside and Camel leather interior with suede accents. Motor was re-done from the ground up two years ago. 23,000 on new motor rebuild! New Leather seats and center arm rest done also two years ago. Has Alpine Am/Fm Cd with Ipod cable, Viper pager alarm New! JL Audio Amp & JLAudio sub box custom made. Mtx mids& highs component speakers sparate tweeter. Car runs strong & straight. Just detailed the interior. Exterior should be painted. This car once painted will sell for over $10,000. \r<br />\nCome get a great deal now! offers and trades will be considered. 786-303-6550 Manny", posting3.contents
|
202
|
-
assert_equal ["south florida craigslist", "miami / dade", "cars & trucks - by owner"], posting3.full_section
|
203
|
-
assert_equal "300ZX Nissan Twin Turbo 1992 - $5800 (N.Miami/ Hialeah)", posting3.header
|
204
|
-
assert_equal "300ZX Nissan Twin Turbo 1992", posting3.title
|
205
|
-
assert_equal "N.Miami/ Hialeah", posting3.location
|
206
|
-
assert_equal 1130212403, posting3.posting_id
|
207
|
-
assert_equal "sale-c9bpa-1130212403@craigslist.org", posting3.reply_to
|
208
|
-
assert_equal [0, 21, 18, 19, 4, 2009, 0, 109, true, "EDT"], posting3.post_time.to_a
|
209
|
-
assert_equal %w(http://images.craigslist.org/3n23kf3lfZZZZZZZZZ94j1160e7d7b0601934.jpg http://images.craigslist.org/3nc3kf3p2ZZZZZZZZZ94j04fbc71e0a551ace.jpg http://images.craigslist.org/3nc3k33l7ZZZZZZZZZ94k13d8d7b1024e1e0e.jpg http://images.craigslist.org/3n23k63mfZZZZZZZZZ94k7838ae5d48d91eb8.jpg), posting3.images
|
210
|
-
assert_equal "1992 Twin Turbo 300ZX. This car is pearl white outside and Camel leather interior with suede accents. Motor was re-done from the ground up two years ago. 23,000 on new motor rebuild! New Leather seats and center arm rest done also two years ago. Has Alpine Am/Fm Cd with Ipod cable, Viper pager alarm New! JL Audio Amp & JLAudio sub box custom made. Mtx mids& highs component speakers sparate tweeter. Car runs strong & straight. Just detailed the interior. Exterior should be painted. This car once painted will sell for over $10,000. \r\nCome get a great deal now! offers and trades will be considered. 786-303-6550 Manny",posting3.contents_as_plain
|
211
|
-
assert_equal 5800.0, posting3.price
|
212
56
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
assert_equal "$225000 / 3br - Palm Aire Golf Corner Unit!", posting4.header
|
218
|
-
assert_equal "Palm Aire Golf Corner Unit!", posting4.title
|
219
|
-
assert_equal nil, posting4.location
|
220
|
-
assert_equal 1139303170, posting4.posting_id
|
221
|
-
assert_equal "hous-sk9f2-1139303170@craigslist.org", posting4.reply_to
|
222
|
-
assert_equal [0, 8, 9, 25, 4, 2009, 6, 115, true, "EDT"], posting4.post_time.to_a
|
223
|
-
assert_equal [], posting4.images
|
224
|
-
assert_equal 6399,posting4.contents_as_plain.length
|
225
|
-
assert_equal 225000.0, posting4.price
|
57
|
+
def test_eager_post_loading
|
58
|
+
# libcraigscrape is supposed to 'smart' when downloading postings that don't make 'sense' solely by looking at the listings.
|
59
|
+
# I'm only seen this on occasion, but its annoying and craigslist seems to use a lot of approximations sometimes
|
60
|
+
# The test page supplied is slightly adjusted to compensate for the lack of a web server when readng pages form the filesystem.
|
226
61
|
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
62
|
+
fortmyers_art_index500_060909 = CraigScrape::Listings.new relative_uri_for('listing_samples/fortmyers_art_index.060909/fortmyers_art_index500.060909.html')
|
63
|
+
fortmyers_art_index500_060909.posts[0..12].each do |l|
|
64
|
+
assert_equal 5, l.post_date.month
|
65
|
+
assert_equal 16, l.post_date.day
|
66
|
+
end
|
67
|
+
fortmyers_art_index500_060909.posts[13..36].each do |l|
|
68
|
+
assert_equal 5, l.post_date.month
|
69
|
+
assert_equal 15, l.post_date.day
|
70
|
+
end
|
71
|
+
fortmyers_art_index500_060909.posts[37..41].each do |l|
|
72
|
+
assert_equal 5, l.post_date.month
|
73
|
+
assert_equal 14, l.post_date.day
|
74
|
+
end
|
75
|
+
fortmyers_art_index500_060909.posts[42..55].each do |l|
|
76
|
+
assert_equal 5, l.post_date.month
|
77
|
+
assert_equal 13, l.post_date.day
|
78
|
+
end
|
79
|
+
fortmyers_art_index500_060909.posts[56..65].each do |l|
|
80
|
+
assert_equal 5, l.post_date.month
|
81
|
+
assert_equal 12, l.post_date.day
|
82
|
+
end
|
83
|
+
fortmyers_art_index500_060909.posts[66..87].each do |l|
|
84
|
+
assert_equal 5, l.post_date.month
|
85
|
+
assert_equal 11, l.post_date.day
|
86
|
+
end
|
87
|
+
fortmyers_art_index500_060909.posts[88..94].each do |l|
|
88
|
+
assert_equal 5, l.post_date.month
|
89
|
+
assert_equal 10, l.post_date.day
|
90
|
+
end
|
91
|
+
assert_equal 4, fortmyers_art_index500_060909.posts[95].post_date.month
|
92
|
+
assert_equal 8, fortmyers_art_index500_060909.posts[95].post_date.day
|
93
|
+
assert_equal 2, fortmyers_art_index500_060909.posts[96].post_date.month
|
94
|
+
assert_equal 27, fortmyers_art_index500_060909.posts[96].post_date.day
|
95
|
+
assert_equal 2, fortmyers_art_index500_060909.posts[97].post_date.month
|
96
|
+
assert_equal 23, fortmyers_art_index500_060909.posts[97].post_date.day
|
97
|
+
assert_equal 1, fortmyers_art_index500_060909.posts[98].post_date.month
|
98
|
+
assert_equal 14, fortmyers_art_index500_060909.posts[98].post_date.day
|
99
|
+
assert_equal 12, fortmyers_art_index500_060909.posts[99].post_date.month
|
100
|
+
assert_equal 16, fortmyers_art_index500_060909.posts[99].post_date.day
|
101
|
+
|
102
|
+
# Now we'll do one of these elusive 'trailer' pages which don't seem to really make much sense.
|
103
|
+
# Best I can tell, it only comes after a page like the one tested just above
|
104
|
+
fortmyers_art_index600_060909 = CraigScrape::Listings.new relative_uri_for('listing_samples/fortmyers_art_index.060909/fortmyers_art_index600.060909.html')
|
105
|
+
assert_equal "Husqvarna Viking Rose: Used Embroidery/Sewing Machine. Instruction book, Video, Embroidery Unit, 4\" 4\" hoop, designs, tool box with accessories including 8 feet (A, B, C, D, E, J, P, U and zipper foot). $400.00 Firm. (941) 347-8014 or (352)638-4707.", fortmyers_art_index600_060909.posts[0].contents
|
106
|
+
assert_equal "Husqvarna Viking Rose: Used Embroidery/Sewing Machine. Instruction book, Video, Embroidery Unit, 4\" 4\" hoop, designs, tool box with accessories including 8 feet (A, B, C, D, E, J, P, U and zipper foot). $400.00 Firm. (941) 347-8014 or (352)638-4707.", fortmyers_art_index600_060909.posts[0].contents_as_plain
|
107
|
+
assert_equal false, fortmyers_art_index600_060909.posts[0].deleted_by_author?
|
108
|
+
assert_equal true, fortmyers_art_index600_060909.posts[0].downloaded?
|
109
|
+
assert_equal false, fortmyers_art_index600_060909.posts[0].flagged_for_removal?
|
110
|
+
assert_equal ["fort myers craigslist", "art & crafts"], fortmyers_art_index600_060909.posts[0].full_section
|
111
|
+
assert_equal false, fortmyers_art_index600_060909.posts[0].has_img?
|
112
|
+
assert_equal true, fortmyers_art_index600_060909.posts[0].has_pic?
|
113
|
+
assert_equal true, fortmyers_art_index600_060909.posts[0].has_pic_or_img?
|
114
|
+
assert_equal "Husqvarna Viking Rose Embroidery-Sewing Machine - $400 (Punta Gorda, Charlotte County)", fortmyers_art_index600_060909.posts[0].header
|
115
|
+
assert_equal "Husqvarna Viking Rose Embroidery-Sewing Machine - $400 (Punta Gorda, Charlotte County)", fortmyers_art_index600_060909.posts[0].header_as_plain
|
116
|
+
assert_equal "897549505.html", fortmyers_art_index600_060909.posts[0].href
|
117
|
+
assert_equal [], fortmyers_art_index600_060909.posts[0].images
|
118
|
+
assert_equal [:pic], fortmyers_art_index600_060909.posts[0].img_types
|
119
|
+
assert_equal "Husqvarna Viking Rose Embroidery-Sewing Machine - $400", fortmyers_art_index600_060909.posts[0].label
|
120
|
+
assert_equal "Punta Gorda, Charlotte County", fortmyers_art_index600_060909.posts[0].location
|
121
|
+
assert_equal [], fortmyers_art_index600_060909.posts[0].pics
|
122
|
+
assert_equal [0, 0, 0, 28, 10, 2008, 2, 302, true, "EDT"], fortmyers_art_index600_060909.posts[0].post_date.to_a
|
123
|
+
assert_equal [0, 51, 21, 28, 10, 2008, 2, 302, true, "EDT"], fortmyers_art_index600_060909.posts[0].post_time.to_a
|
124
|
+
assert_equal 897549505, fortmyers_art_index600_060909.posts[0].posting_id
|
125
|
+
assert_equal 400.0, fortmyers_art_index600_060909.posts[0].price
|
126
|
+
assert_equal nil, fortmyers_art_index600_060909.posts[0].reply_to
|
127
|
+
assert_equal "art & crafts", fortmyers_art_index600_060909.posts[0].section
|
128
|
+
assert_equal false, fortmyers_art_index600_060909.posts[0].system_post?
|
129
|
+
assert_equal "Husqvarna Viking Rose Embroidery-Sewing Machine", fortmyers_art_index600_060909.posts[0].title
|
240
130
|
|
241
|
-
|
242
|
-
assert_equal
|
243
|
-
assert_equal
|
244
|
-
assert_equal
|
245
|
-
assert_equal
|
246
|
-
assert_equal
|
247
|
-
assert_equal
|
248
|
-
assert_equal
|
249
|
-
assert_equal
|
250
|
-
assert_equal
|
251
|
-
assert_equal []
|
252
|
-
assert_equal
|
253
|
-
assert_equal
|
254
|
-
|
255
|
-
|
256
|
-
assert_equal "
|
257
|
-
assert_equal
|
258
|
-
assert_equal
|
259
|
-
assert_equal
|
260
|
-
assert_equal
|
261
|
-
assert_equal
|
262
|
-
assert_equal
|
263
|
-
assert_equal
|
264
|
-
assert_equal
|
265
|
-
assert_equal
|
266
|
-
|
267
|
-
assert_equal "
|
268
|
-
assert_equal "
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
)
|
131
|
+
assert_equal "Multiple artists' moving sale. Lots of unusual items including art, art supplies, ceramics and ceramic glazes, furniture, clothes, books, electronics, cd's and much more. Also for sale is alot of restaurant equpment.\r<br />\n\r<br />\nSale to be held at 3570 Bayshore Dr. next to Bayshore Coffee Co.\r<br />\n\r<br />\nSaturday 8:00 a.m. until 2:00 Rain or shine.\r<br />", fortmyers_art_index600_060909.posts[1].contents
|
132
|
+
assert_equal "Multiple artists' moving sale. Lots of unusual items including art, art supplies, ceramics and ceramic glazes, furniture, clothes, books, electronics, cd's and much more. Also for sale is alot of restaurant equpment.\r\n\r\nSale to be held at 3570 Bayshore Dr. next to Bayshore Coffee Co.\r\n\r\nSaturday 8:00 a.m. until 2:00 Rain or shine.\r", fortmyers_art_index600_060909.posts[1].contents_as_plain
|
133
|
+
assert_equal false, fortmyers_art_index600_060909.posts[1].deleted_by_author?
|
134
|
+
assert_equal true, fortmyers_art_index600_060909.posts[1].downloaded?
|
135
|
+
assert_equal false, fortmyers_art_index600_060909.posts[1].flagged_for_removal?
|
136
|
+
assert_equal ["fort myers craigslist", "art & crafts"], fortmyers_art_index600_060909.posts[1].full_section
|
137
|
+
assert_equal false, fortmyers_art_index600_060909.posts[1].has_img?
|
138
|
+
assert_equal false, fortmyers_art_index600_060909.posts[1].has_pic?
|
139
|
+
assert_equal false, fortmyers_art_index600_060909.posts[1].has_pic_or_img?
|
140
|
+
assert_equal "ARTISTS' MOVING SALE-BAYSHORE (Naples)", fortmyers_art_index600_060909.posts[1].header
|
141
|
+
assert_equal "ARTISTS' MOVING SALE-BAYSHORE (Naples)", fortmyers_art_index600_060909.posts[1].header_as_plain
|
142
|
+
assert_equal "891513957.html", fortmyers_art_index600_060909.posts[1].href
|
143
|
+
assert_equal [], fortmyers_art_index600_060909.posts[1].images
|
144
|
+
assert_equal [], fortmyers_art_index600_060909.posts[1].img_types
|
145
|
+
assert_equal "ARTISTS' MOVING SALE-BAYSHORE", fortmyers_art_index600_060909.posts[1].label
|
146
|
+
assert_equal "Naples", fortmyers_art_index600_060909.posts[1].location
|
147
|
+
assert_equal [], fortmyers_art_index600_060909.posts[1].pics
|
148
|
+
assert_equal [0, 0, 0, 24, 10, 2008, 5, 298, true, "EDT"], fortmyers_art_index600_060909.posts[1].post_date.to_a
|
149
|
+
assert_equal [0, 31, 9, 24, 10, 2008, 5, 298, true, "EDT"], fortmyers_art_index600_060909.posts[1].post_time.to_a
|
150
|
+
assert_equal 891513957, fortmyers_art_index600_060909.posts[1].posting_id
|
151
|
+
assert_equal nil, fortmyers_art_index600_060909.posts[1].price
|
152
|
+
assert_equal "sale-891513957@craigslist.org", fortmyers_art_index600_060909.posts[1].reply_to
|
153
|
+
assert_equal "art & crafts", fortmyers_art_index600_060909.posts[1].section
|
154
|
+
assert_equal false, fortmyers_art_index600_060909.posts[1].system_post?
|
155
|
+
assert_equal "ARTISTS' MOVING SALE-BAYSHORE", fortmyers_art_index600_060909.posts[1].title
|
156
|
+
|
157
|
+
assert_equal "Tapestry sewing machine and embroidery arm luggage for Viking designer sewing machine. Two years old in excellent condition.", fortmyers_art_index600_060909.posts[2].contents
|
158
|
+
assert_equal "Tapestry sewing machine and embroidery arm luggage for Viking designer sewing machine. Two years old in excellent condition.", fortmyers_art_index600_060909.posts[2].contents_as_plain
|
159
|
+
assert_equal false, fortmyers_art_index600_060909.posts[2].deleted_by_author?
|
160
|
+
assert_equal true, fortmyers_art_index600_060909.posts[2].downloaded?
|
161
|
+
assert_equal false, fortmyers_art_index600_060909.posts[2].flagged_for_removal?
|
162
|
+
assert_equal ["fort myers craigslist", "art & crafts"], fortmyers_art_index600_060909.posts[2].full_section
|
163
|
+
assert_equal false, fortmyers_art_index600_060909.posts[2].has_img?
|
164
|
+
assert_equal false, fortmyers_art_index600_060909.posts[2].has_pic?
|
165
|
+
assert_equal false, fortmyers_art_index600_060909.posts[2].has_pic_or_img?
|
166
|
+
assert_equal "tapestry sewing machine and embroidery arm luggage - $250 (Punta Gorda)", fortmyers_art_index600_060909.posts[2].header
|
167
|
+
assert_equal "tapestry sewing machine and embroidery arm luggage - $250 (Punta Gorda)", fortmyers_art_index600_060909.posts[2].header_as_plain
|
168
|
+
assert_equal "825684735.html", fortmyers_art_index600_060909.posts[2].href
|
169
|
+
assert_equal [], fortmyers_art_index600_060909.posts[2].images
|
170
|
+
assert_equal [], fortmyers_art_index600_060909.posts[2].img_types
|
171
|
+
assert_equal "tapestry sewing machine and embroidery arm luggage - $250", fortmyers_art_index600_060909.posts[2].label
|
172
|
+
assert_equal "Punta Gorda", fortmyers_art_index600_060909.posts[2].location
|
173
|
+
assert_equal [], fortmyers_art_index600_060909.posts[2].pics
|
174
|
+
assert_equal [0, 0, 0, 3, 9, 2008, 3, 247, true, "EDT"], fortmyers_art_index600_060909.posts[2].post_date.to_a
|
175
|
+
assert_equal [0, 31, 15, 3, 9, 2008, 3, 247, true, "EDT"], fortmyers_art_index600_060909.posts[2].post_time.to_a
|
176
|
+
assert_equal 825684735, fortmyers_art_index600_060909.posts[2].posting_id
|
177
|
+
assert_equal 250.0, fortmyers_art_index600_060909.posts[2].price
|
178
|
+
assert_equal "sale-825684735@craigslist.org", fortmyers_art_index600_060909.posts[2].reply_to
|
179
|
+
assert_equal "art & crafts", fortmyers_art_index600_060909.posts[2].section
|
180
|
+
assert_equal false, fortmyers_art_index600_060909.posts[2].system_post?
|
181
|
+
assert_equal "tapestry sewing machine and embroidery arm luggage", fortmyers_art_index600_060909.posts[2].title
|
182
|
+
|
183
|
+
assert_equal "Gorgeous and one of a kind! Museum-collected artist Jay von Koffler's Aurora Series - cast glass nude sculpture - Aurora. Mounted on marble and enhanced with bronze beak. \r<br />\n\r<br />\nDimensions: 30x16x6\r<br />\nCall for appointment for studio viewing - 239.595.1793", fortmyers_art_index600_060909.posts[3].contents
|
184
|
+
assert_equal "Gorgeous and one of a kind! Museum-collected artist Jay von Koffler's Aurora Series - cast glass nude sculpture - Aurora. Mounted on marble and enhanced with bronze beak. \r\n\r\nDimensions: 30x16x6\r\nCall for appointment for studio viewing - 239.595.1793", fortmyers_art_index600_060909.posts[3].contents_as_plain
|
185
|
+
assert_equal false, fortmyers_art_index600_060909.posts[3].deleted_by_author?
|
186
|
+
assert_equal true, fortmyers_art_index600_060909.posts[3].downloaded?
|
187
|
+
assert_equal false, fortmyers_art_index600_060909.posts[3].flagged_for_removal?
|
188
|
+
assert_equal ["fort myers craigslist", "art & crafts"], fortmyers_art_index600_060909.posts[3].full_section
|
189
|
+
assert_equal false, fortmyers_art_index600_060909.posts[3].has_img?
|
190
|
+
assert_equal true, fortmyers_art_index600_060909.posts[3].has_pic?
|
191
|
+
assert_equal true, fortmyers_art_index600_060909.posts[3].has_pic_or_img?
|
192
|
+
assert_equal "Cast Glass Sculpture - Aurora - $2400 (Naples)", fortmyers_art_index600_060909.posts[3].header
|
193
|
+
assert_equal "Cast Glass Sculpture - Aurora - $2400 (Naples)", fortmyers_art_index600_060909.posts[3].header_as_plain
|
194
|
+
assert_equal "823516079.html", fortmyers_art_index600_060909.posts[3].href
|
195
|
+
assert_equal [], fortmyers_art_index600_060909.posts[3].images
|
196
|
+
assert_equal [:pic], fortmyers_art_index600_060909.posts[3].img_types
|
197
|
+
assert_equal "Cast Glass Sculpture - Aurora - $2400", fortmyers_art_index600_060909.posts[3].label
|
198
|
+
assert_equal "Naples", fortmyers_art_index600_060909.posts[3].location
|
199
|
+
assert_equal [], fortmyers_art_index600_060909.posts[3].pics
|
200
|
+
assert_equal [0, 0, 0, 2, 9, 2008, 2, 246, true, "EDT"], fortmyers_art_index600_060909.posts[3].post_date.to_a
|
201
|
+
assert_equal [0, 35, 10, 2, 9, 2008, 2, 246, true, "EDT"], fortmyers_art_index600_060909.posts[3].post_time.to_a
|
202
|
+
assert_equal 823516079, fortmyers_art_index600_060909.posts[3].posting_id
|
203
|
+
assert_equal 2400.0, fortmyers_art_index600_060909.posts[3].price
|
204
|
+
assert_equal "sale-823516079@craigslist.org", fortmyers_art_index600_060909.posts[3].reply_to
|
205
|
+
assert_equal "art & crafts", fortmyers_art_index600_060909.posts[3].section
|
206
|
+
assert_equal false, fortmyers_art_index600_060909.posts[3].system_post?
|
207
|
+
assert_equal "Cast Glass Sculpture - Aurora", fortmyers_art_index600_060909.posts[3].title
|
277
208
|
end
|
278
209
|
|
279
|
-
def
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
puts
|
287
|
-
probable_accessors.sort.each do |m|
|
288
|
-
val = obj.send(m.to_sym)
|
289
|
-
|
290
|
-
# There's a good number of transformations worth doing here, I'll just start like this for now:
|
291
|
-
if val.kind_of? Time
|
292
|
-
# I've decided this is the the easiest way to understand and test a time
|
293
|
-
val = val.to_a
|
294
|
-
m = "#{m}.to_a"
|
295
|
-
end
|
296
|
-
|
297
|
-
puts "assert_equal %s, %s.%s" % [val.inspect,obj_name,m]
|
298
|
-
end
|
210
|
+
def test_nasty_search_listings
|
211
|
+
miami_search_sss_rack900_061809 = CraigScrape::Listings.new relative_uri_for('listing_samples/miami_search_sss_rack.6.18.09/miami_search_sss_rack900.6.18.09.html')
|
212
|
+
assert_equal '/search/sss?query=rack&s=1000', miami_search_sss_rack900_061809.next_page_href
|
213
|
+
|
214
|
+
miami_search_sss_rack1000_061809 = CraigScrape::Listings.new relative_uri_for('listing_samples/miami_search_sss_rack.6.18.09/miami_search_sss_rack1000.6.18.09.html')
|
215
|
+
assert_equal nil, miami_search_sss_rack1000_061809.next_page_href
|
299
216
|
end
|
217
|
+
|
218
|
+
|
300
219
|
end
|