libcraigscrape 1.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +12 -1
- data/Gemfile +12 -0
- data/Rakefile +1 -54
- data/bin/craig_report_schema.yml +4 -1
- data/bin/craigwatch +148 -146
- data/bin/report_mailer/report.html.erb +20 -0
- data/bin/report_mailer/{craigslist_report.plain.erb → report.text.erb} +7 -6
- data/lib/geo_listings.rb +1 -1
- data/lib/libcraigscrape.rb +52 -59
- data/lib/listings.rb +75 -39
- data/lib/posting.rb +120 -63
- data/lib/scraper.rb +43 -63
- data/spec/assets/geolisting_iso_us_120412.html +441 -0
- data/spec/assets/listing_cta_ftl_112612.html +1470 -0
- data/spec/assets/listing_rea_miami_123012.html +1397 -0
- data/spec/assets/listing_search_ppa_nyc_121212.html +1584 -0
- data/spec/assets/posting_daytona_art_120512-2.html +160 -0
- data/spec/assets/posting_daytona_art_120512.html +153 -0
- data/spec/assets/posting_mdc_cto_ftl_112612.html +170 -0
- data/spec/assets/posting_mdc_reb_120612.html +183 -0
- data/spec/assets/posting_sfbay_1226.html +157 -0
- data/spec/assets/posting_sya_121012-2.html +122 -0
- data/spec/assets/posting_sya_121012.html +165 -0
- data/spec/assets/this_post_has_expired_old.html +48 -0
- data/spec/geolisting_spec.rb +9 -0
- data/spec/listings_spec.rb +77 -0
- data/spec/postings_spec.rb +157 -0
- data/spec/spec_helper.rb +8 -0
- data/test/test_craigslist_geolisting.rb +5 -5
- data/test/test_craigslist_listing.rb +30 -30
- data/test/test_craigslist_posting.rb +25 -145
- metadata +200 -114
- data/bin/report_mailer/craigslist_report.html.erb +0 -17
@@ -1,4 +1,5 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
|
+
# encoding: UTF-8
|
2
3
|
|
3
4
|
require 'test/unit'
|
4
5
|
require File.dirname(__FILE__)+'/../lib/libcraigscrape'
|
@@ -14,127 +15,6 @@ class CraigslistPostingTest < Test::Unit::TestCase
|
|
14
15
|
end
|
15
16
|
end
|
16
17
|
|
17
|
-
def test_listing_parse
|
18
|
-
search_html_one = <<EOD
|
19
|
-
<p> Apr 18 - <a href="/brw/reb/1128608404.html">Losing your house? You'll need this New Loan Mod Video -</a><font size="-1"> (W. Woodland)</font> <span class="p"> img</span> <<<i><a href="/reb/">real estate - by broker</a></i></p>
|
20
|
-
EOD
|
21
|
-
search_html_two = <<EOD
|
22
|
-
<p> Jan 4 - <a href="/mdc/reb/1128609783.html">$348000 / 1br - Large 1/1 plus office on 49th Floor. 5-Star NEW Condo. Great Views -</a><font size="-1"> (Miami)</font> <span class="p"> pic img</span> <<<i><a href="/reb/">real estate - by broker</a></i></p>
|
23
|
-
EOD
|
24
|
-
search_html_three = <<EOD
|
25
|
-
<p> Dec 31 - <a href="/mdc/reb/1128520894.html">$22,000 HOME -ADULT COMMUNITY BOYNTON BEACH -</a> <span class="p"> pic</span> <<<i><a href="/reb/">real estate - by broker</a></i></p>
|
26
|
-
EOD
|
27
|
-
search_html_four = <<EOD
|
28
|
-
<p> Jul 22 - <a href="/mdc/reb/1128474725.html">$325000 / 3br - GOOD DEAL GREAT HOUSE AND LOCATION -</a><font size="-1"> (CORAL GABLES)</font> <<<i><a href="/reb/">real estate - by broker</a></i></p>
|
29
|
-
EOD
|
30
|
-
search_html_five = <<EOD
|
31
|
-
<p> Apr 9 - <a href="/pbc/boa/1115308178.html">40' SILVERTON CONVERTIBLE DIESEL - $105000 -</a><font size="-1"> (HOBE SOUND)</font> <span class="p"> pic</span></p>
|
32
|
-
EOD
|
33
|
-
category_listing_one = <<EOD
|
34
|
-
<p><a href="/pbc/reb/1128661387.html">$2995000 / 5br - Downtown Boca New Home To Be Built -</a><font size="-1"> (Boca Raton)</font> <span class="p"> pic</span> <<<i><a href="/reb/">real estate - by broker</a></i></p>
|
35
|
-
EOD
|
36
|
-
category_listing_two = <<EOD
|
37
|
-
<p><a href="/mdc/jwl/1128691192.html">925 Sterling Silver Dragonfly Charm Bracelet - $25 -</a> <span class="p"> img</span></p>
|
38
|
-
EOD
|
39
|
-
|
40
|
-
one = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(
|
41
|
-
Nokogiri::HTML(search_html_one, nil, CraigScrape::Scraper::HTML_ENCODING).at('p')
|
42
|
-
)
|
43
|
-
assert_equal true, one.has_img?
|
44
|
-
assert_equal false, one.has_pic?
|
45
|
-
assert_equal true, one.has_pic_or_img?
|
46
|
-
assert_equal '/brw/reb/1128608404.html', one.href
|
47
|
-
assert_equal "Losing your house? You'll need this New Loan Mod Video", one.label
|
48
|
-
assert_equal "real estate - by broker", one.section
|
49
|
-
assert_equal "W. Woodland", one.location
|
50
|
-
assert_equal 4, one.post_date.month
|
51
|
-
assert_equal 18, one.post_date.day
|
52
|
-
assert_equal nil, one.price
|
53
|
-
|
54
|
-
two = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(
|
55
|
-
Nokogiri::HTML(search_html_two, nil, CraigScrape::Scraper::HTML_ENCODING).at('p')
|
56
|
-
)
|
57
|
-
assert_equal true, two.has_img?
|
58
|
-
assert_equal true, two.has_pic?
|
59
|
-
assert_equal true, two.has_pic_or_img?
|
60
|
-
assert_equal '/mdc/reb/1128609783.html', two.href
|
61
|
-
assert_equal "$348000 / 1br - Large 1/1 plus office on 49th Floor. 5-Star NEW Condo. Great Views", two.label
|
62
|
-
assert_equal "real estate - by broker", two.section
|
63
|
-
assert_equal "Miami", two.location
|
64
|
-
assert_equal 1, two.post_date.month
|
65
|
-
assert_equal 4, two.post_date.day
|
66
|
-
assert_equal 348000.0, two.price
|
67
|
-
|
68
|
-
three = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(
|
69
|
-
Nokogiri::HTML(search_html_three, nil, CraigScrape::Scraper::HTML_ENCODING).at('p')
|
70
|
-
)
|
71
|
-
assert_equal false, three.has_img?
|
72
|
-
assert_equal true, three.has_pic?
|
73
|
-
assert_equal true, three.has_pic_or_img?
|
74
|
-
assert_equal '/mdc/reb/1128520894.html', three.href
|
75
|
-
assert_equal "$22,000 HOME -ADULT COMMUNITY BOYNTON BEACH", three.label
|
76
|
-
assert_equal "real estate - by broker", three.section
|
77
|
-
assert_equal nil, three.location
|
78
|
-
assert_equal 12, three.post_date.month
|
79
|
-
assert_equal 31, three.post_date.day
|
80
|
-
assert_equal 22.0, three.price
|
81
|
-
|
82
|
-
four = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(
|
83
|
-
Nokogiri::HTML(search_html_four, nil, CraigScrape::Scraper::HTML_ENCODING).at('p')
|
84
|
-
)
|
85
|
-
assert_equal false, four.has_img?
|
86
|
-
assert_equal false, four.has_pic?
|
87
|
-
assert_equal false, four.has_pic_or_img?
|
88
|
-
assert_equal '/mdc/reb/1128474725.html', four.href
|
89
|
-
assert_equal "$325000 / 3br - GOOD DEAL GREAT HOUSE AND LOCATION", four.label
|
90
|
-
assert_equal "real estate - by broker", four.section
|
91
|
-
assert_equal "CORAL GABLES", four.location
|
92
|
-
assert_equal 7, four.post_date.month
|
93
|
-
assert_equal 22, four.post_date.day
|
94
|
-
assert_equal 325000.0, four.price
|
95
|
-
|
96
|
-
five = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(
|
97
|
-
Nokogiri::HTML(search_html_five, nil, CraigScrape::Scraper::HTML_ENCODING).at('p')
|
98
|
-
)
|
99
|
-
assert_equal false, five.has_img?
|
100
|
-
assert_equal true, five.has_pic?
|
101
|
-
assert_equal true, five.has_pic_or_img?
|
102
|
-
assert_equal '/pbc/boa/1115308178.html', five.href
|
103
|
-
assert_equal "40' SILVERTON CONVERTIBLE DIESEL - $105000", five.label
|
104
|
-
assert_equal nil, five.section
|
105
|
-
assert_equal "HOBE SOUND", five.location
|
106
|
-
assert_equal 4, five.post_date.month
|
107
|
-
assert_equal 9, five.post_date.day
|
108
|
-
assert_equal 105000.0, five.price
|
109
|
-
|
110
|
-
six = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(
|
111
|
-
Nokogiri::HTML(category_listing_one, nil, CraigScrape::Scraper::HTML_ENCODING).at('p')
|
112
|
-
)
|
113
|
-
assert_equal false, six.has_img?
|
114
|
-
assert_equal true, six.has_pic?
|
115
|
-
assert_equal true, six.has_pic_or_img?
|
116
|
-
assert_equal '/pbc/reb/1128661387.html', six.href
|
117
|
-
assert_equal "$2995000 / 5br - Downtown Boca New Home To Be Built", six.label
|
118
|
-
assert_equal "real estate - by broker", six.section
|
119
|
-
assert_equal "Boca Raton", six.location
|
120
|
-
assert_equal nil, six.post_date
|
121
|
-
assert_equal 2995000.0, six.price
|
122
|
-
|
123
|
-
seven = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(
|
124
|
-
Nokogiri::HTML(category_listing_two, nil, CraigScrape::Scraper::HTML_ENCODING).at('p')
|
125
|
-
)
|
126
|
-
assert_equal true, seven.has_img?
|
127
|
-
assert_equal false, seven.has_pic?
|
128
|
-
assert_equal true, seven.has_pic_or_img?
|
129
|
-
assert_equal '/mdc/jwl/1128691192.html', seven.href
|
130
|
-
assert_equal "925 Sterling Silver Dragonfly Charm Bracelet - $25", seven.label
|
131
|
-
assert_equal nil, seven.section
|
132
|
-
assert_equal nil, seven.location
|
133
|
-
assert_equal nil, seven.post_date
|
134
|
-
assert_equal 25.0, seven.price
|
135
|
-
end
|
136
|
-
|
137
|
-
|
138
18
|
def test_posting_parse
|
139
19
|
posting0 = CraigScrape::Posting.new relative_uri_for('post_samples/posting0.html')
|
140
20
|
assert_equal "Has storage for videos/dvds. About 2 ft high by 21/2 ft widw. Almond/light beige color", posting0.contents
|
@@ -145,7 +25,7 @@ EOD
|
|
145
25
|
assert_equal "NMB", posting0.location
|
146
26
|
assert_equal 1131363612, posting0.posting_id
|
147
27
|
assert_equal "sale-ktf9w-1131363612@craigslist.org", posting0.reply_to
|
148
|
-
assert_equal
|
28
|
+
assert_equal DateTime.parse('2009-04-20T13:21:00-04:00'), posting0.post_time
|
149
29
|
assert_equal [], posting0.pics
|
150
30
|
assert_equal "Has storage for videos/dvds. About 2 ft high by 21/2 ft widw. Almond/light beige color",posting0.contents_as_plain
|
151
31
|
assert_equal 35.0, posting0.price
|
@@ -153,7 +33,7 @@ EOD
|
|
153
33
|
assert_equal [], posting0.img_types
|
154
34
|
|
155
35
|
posting1 = CraigScrape::Posting.new relative_uri_for('post_samples/posting1.html')
|
156
|
-
assert_equal "Residential income property\
|
36
|
+
assert_equal "Residential income property\u0097Investors this property is for you! This duplex has a 2bedroom/1bath unit on each side. It features updated kitchens and baths (new tubs, toilet, sink, vanities), ceramic tile flooring throughout, separate water and electric meters and on site laundry facilities. It is also closed to the Galleria, beaches and downtown Fort Lauderdale! \r<br>\n\r<br>\nJe parle le Français\r<br>\n\r<br>\nThis property is being offered by Blaunch Perrier, Broker Associate, Atlantic Properties International. Blaunch can be reached at 954-593-0077. For additional property information you may also visit www.garylanham.com\r<br>\n\r<br>", posting1.contents
|
157
37
|
assert_equal ["south florida craigslist", "broward county", "real estate - by broker"], posting1.full_section
|
158
38
|
assert_equal "$189900 / 4br - Investment Property--Duplex in Fort Lauderdale", posting1.header
|
159
39
|
assert_equal "$189900 / 4br - Investment Property--Duplex in Fort Lauderdale", posting1.label
|
@@ -161,16 +41,16 @@ EOD
|
|
161
41
|
assert_equal '1000 NE 14th Pl', posting1.location
|
162
42
|
assert_equal 1131242195, posting1.posting_id
|
163
43
|
assert_equal "hous-5nzhq-1131242195@craigslist.org", posting1.reply_to
|
164
|
-
assert_equal
|
44
|
+
assert_equal DateTime.parse('2009-04-20T13:33:00-04:00'), posting1.post_time
|
165
45
|
assert_equal %w(http://images.craigslist.org/3n83o33l5ZZZZZZZZZ94k913ac1582d4b1fa4.jpg http://images.craigslist.org/3n93p63obZZZZZZZZZ94k19d5e32eb3b610c2.jpg http://images.craigslist.org/3n93m03l6ZZZZZZZZZ94k6e9785e37a1b1f3f.jpg http://images.craigslist.org/3ma3oc3l4ZZZZZZZZZ94kbfecbcd2fb2e19cc.jpg), posting1.pics
|
166
|
-
assert_equal "Residential income property\
|
46
|
+
assert_equal "Residential income property\u0097Investors this property is for you! This duplex has a 2bedroom/1bath unit on each side. It features updated kitchens and baths (new tubs, toilet, sink, vanities), ceramic tile flooring throughout, separate water and electric meters and on site laundry facilities. It is also closed to the Galleria, beaches and downtown Fort Lauderdale! \r\n\r\nJe parle le Français\r\n\r\nThis property is being offered by Blaunch Perrier, Broker Associate, Atlantic Properties International. Blaunch can be reached at 954-593-0077. For additional property information you may also visit www.garylanham.com\r\n\r", posting1.contents_as_plain
|
167
47
|
assert_equal 189900.0, posting1.price
|
168
48
|
assert_equal [], posting1.images
|
169
49
|
assert_equal ["http://images.craigslist.org/3n83o33l5ZZZZZZZZZ94k913ac1582d4b1fa4.jpg", "http://images.craigslist.org/3n93p63obZZZZZZZZZ94k19d5e32eb3b610c2.jpg", "http://images.craigslist.org/3n93m03l6ZZZZZZZZZ94k6e9785e37a1b1f3f.jpg", "http://images.craigslist.org/3ma3oc3l4ZZZZZZZZZ94kbfecbcd2fb2e19cc.jpg"], posting1.pics
|
170
50
|
assert_equal [:pic], posting1.img_types
|
171
51
|
|
172
52
|
posting2 = CraigScrape::Posting.new relative_uri_for('post_samples/posting2.html')
|
173
|
-
assert_equal
|
53
|
+
assert_equal 15473, posting2.contents.length # This is easy, and probably fine enough
|
174
54
|
assert_equal ["south florida craigslist", "broward county", "cars & trucks - by dealer"], posting2.full_section
|
175
55
|
assert_equal "PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEE - $23975 (Fort Lauderdale)", posting2.header
|
176
56
|
assert_equal "PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEE - $23975", posting2.label
|
@@ -178,7 +58,7 @@ EOD
|
|
178
58
|
assert_equal 'Fort Lauderdale', posting2.location
|
179
59
|
assert_equal 1127037648, posting2.posting_id
|
180
60
|
assert_equal nil, posting2.reply_to
|
181
|
-
assert_equal
|
61
|
+
assert_equal DateTime.parse('2009-04-17T14:16:00-04:00'), posting2.post_time
|
182
62
|
assert_equal [], posting2.pics
|
183
63
|
assert_equal "\302\240 Sheehan Buick Pontiac GMC \302\240 Pompano Beach, FL(754) 224-3257 \302\240PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!2002 Chevrolet Corvette Z06 Florida Driven AutoCheck Certified 5.7L V8 6sp2 Door Coupe.\302\240Price: \302\240 $23,975Exterior:Electron Blue MetallicInterior:BlackStock#:P5110AVIN:1G1YY12S625129021FREE AutoCheck Vehicle ReportMileage:63,560Transmission:6 Speed ManualEngine:V8 5.7L OHVWarranty:Limited WarrantyTitle:Clear\302\273\302\240View All 58 Photos\302\273\302\240View Full Vehicle Details\302\273\302\240Ask the Seller a Question\302\273\302\240E-mail this to a Friend\302\240 DescriptionPRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!\r\n\r\nLOADED WITH BLACK LEATHER BUCKET SEATS, POWER DRIVERS SEAT, DUAL ZONE CLIMATE CONTROL, 4 WHEEL ABS BRAKES, POWER STEERING AND BRAKES, REAR LIMITED SLIP DIFFERENTIAL, STABILITY CONTROL, CRUISE CONTROL, TLT STEERING WHEEL, POWER WINDOWS AND LOCKS, AUTOMATIC ON/OFF HEADLAMPS, FOG LIGHTS, DUAL AIR BAG SAFETY, AM/FM STEREO CD PLAYER, INTERMITTENT WINDSHIELD WIPERS AND SO MUCH MORE - THIS CAR IS TOTALLY HOT WITH GREAT LOW MILES!\r\n\r\nPlease call us to make your deal now at 1-888-453-5244. Please visit our Website at www.sheehanautoplex.com ***View 50+ Pictures of this vehicle - a complete description including standard features and all added options & a FREE AUTO CHECK REPORT at www.sheehanautoplex.com. ***Financing for Everyone - Good credit - bad credit - divorce - charge off's - NO PROBLEM. To complete a secure credit application, please visit our website at www.sheehanautoplex.com ***The largest Dealer in the State of Florida - We export all over the world - For details please visit www.sheehanautoplex.com ***Sheehan Autoplex takes great pride in our outstanding customer service and has been recognized by the following associations - BBB (Better Business Bureau) - NIADA - and the FIADA. Call us to get your best deal. CALL NOW. 1-888-453-5244\302\240 Contact Sheehan Buick Pontiac GMCPhone:(754) 224-3257Fax:(954) 781-9050Phone:(754) 224-3257E-mail:sales@proauto.comBusiness HoursWeekdays:9:00 AM to 9:00 PMSat:9:00 AM to 6:00 PMSun:",posting2.contents_as_plain
|
184
64
|
assert_equal 23975.0, posting2.price
|
@@ -194,7 +74,7 @@ EOD
|
|
194
74
|
assert_equal "N.Miami/ Hialeah", posting3.location
|
195
75
|
assert_equal 1130212403, posting3.posting_id
|
196
76
|
assert_equal "sale-c9bpa-1130212403@craigslist.org", posting3.reply_to
|
197
|
-
assert_equal
|
77
|
+
assert_equal DateTime.parse('2009-04-19T18:21:00-04:00'), posting3.post_time
|
198
78
|
assert_equal %w(http://images.craigslist.org/3n23kf3lfZZZZZZZZZ94j1160e7d7b0601934.jpg http://images.craigslist.org/3nc3kf3p2ZZZZZZZZZ94j04fbc71e0a551ace.jpg http://images.craigslist.org/3nc3k33l7ZZZZZZZZZ94k13d8d7b1024e1e0e.jpg http://images.craigslist.org/3n23k63mfZZZZZZZZZ94k7838ae5d48d91eb8.jpg), posting3.pics
|
199
79
|
assert_equal "1992 Twin Turbo 300ZX. This car is pearl white outside and Camel leather interior with suede accents. Motor was re-done from the ground up two years ago. 23,000 on new motor rebuild! New Leather seats and center arm rest done also two years ago. Has Alpine Am/Fm Cd with Ipod cable, Viper pager alarm New! JL Audio Amp & JLAudio sub box custom made. Mtx mids& highs component speakers sparate tweeter. Car runs strong & straight. Just detailed the interior. Exterior should be painted. This car once painted will sell for over $10,000. \r\nCome get a great deal now! offers and trades will be considered. 786-303-6550 Manny",posting3.contents_as_plain
|
200
80
|
assert_equal 5800.0, posting3.price
|
@@ -204,7 +84,7 @@ EOD
|
|
204
84
|
|
205
85
|
# This one ended up being quite a curveball since the user uploaded HTML was such junk:
|
206
86
|
posting4 = CraigScrape::Posting.new relative_uri_for('post_samples/posting4.html')
|
207
|
-
assert_equal
|
87
|
+
assert_equal 19337, posting4.contents.length
|
208
88
|
assert_equal ["south florida craigslist", "broward county", "real estate - by broker"], posting4.full_section
|
209
89
|
assert_equal "$225000 / 3br - Palm Aire Golf Corner Unit!", posting4.header
|
210
90
|
assert_equal "Palm Aire Golf Corner Unit!", posting4.title
|
@@ -212,9 +92,9 @@ EOD
|
|
212
92
|
assert_equal nil, posting4.location
|
213
93
|
assert_equal 1139303170, posting4.posting_id
|
214
94
|
assert_equal "hous-sk9f2-1139303170@craigslist.org", posting4.reply_to
|
215
|
-
assert_equal
|
95
|
+
assert_equal DateTime.parse('2009-04-25T09:08:00-04:00'), posting4.post_time
|
216
96
|
assert_equal [], posting4.pics
|
217
|
-
assert_equal
|
97
|
+
assert_equal 6321,posting4.contents_as_plain.length
|
218
98
|
assert_equal 225000.0, posting4.price
|
219
99
|
assert_equal ["http://fortlauderdaleareahomesales.com/myfiles/5.jpg", "http://fortlauderdaleareahomesales.com/myfiles/4.jpg", "http://fortlauderdaleareahomesales.com/myfiles/7.jpg", "http://fortlauderdaleareahomesales.com/myfiles/10.jpg", "http://fortlauderdaleareahomesales.com/myfiles/1.jpg", "http://fortlauderdaleareahomesales.com/myfiles/2.jpg", "http://fortlauderdaleareahomesales.com/myfiles/3.jpg", "http://fortlauderdaleareahomesales.com/myfiles/8.jpg", "http://fortlauderdaleareahomesales.com/myfiles/9.jpg", "http://fortlauderdaleareahomesales.com/myfiles/11.jpg", "http://fortlauderdaleareahomesales.com/myfiles/14.jpg", "http://fortlauderdaleareahomesales.com/myfiles/6.jpg"], posting4.images
|
220
100
|
assert_equal [:img], posting4.img_types
|
@@ -263,7 +143,7 @@ EOD
|
|
263
143
|
assert_equal "$1350 / 3br - 2bth for no deposit req",posting6.label
|
264
144
|
assert_equal ["http://images.craigslist.org/3k43pe3o8ZZZZZZZZZ9655022102a3ea51624.jpg", "http://images.craigslist.org/3n13m53p6ZZZZZZZZZ96596515e51237a179c.jpg", "http://images.craigslist.org/3od3p33leZZZZZZZZZ9656d614da8e3a51dd9.jpg", "http://images.craigslist.org/3pb3oa3leZZZZZZZZZ965eb60e4d2344019fb.jpg"],posting6.pics
|
265
145
|
assert_equal 'Coral Springs',posting6.location
|
266
|
-
assert_equal
|
146
|
+
assert_equal DateTime.parse('2009-06-05T18:56:00-04:00'),posting6.post_time
|
267
147
|
assert_equal 1207457727,posting6.posting_id
|
268
148
|
assert_equal 1350.0,posting6.price
|
269
149
|
assert_equal "hous-ccpap-1207457727@craigslist.org",posting6.reply_to
|
@@ -282,7 +162,7 @@ EOD
|
|
282
162
|
assert_equal "$189999 / 3br - Nice 3 Bedroom/ 2 Bathroom House with Garage in Sunrise (Sunrise) (map)", brw_reb_1224008903.header_as_plain
|
283
163
|
assert_equal ["http://images.craigslist.org/3ma3o93laZZZZZZZZZ96g5ee7cc528f1818a8.jpg", "http://images.craigslist.org/3nf3m03oeZZZZZZZZZ96gb267b7db57d91f60.jpg", "http://images.craigslist.org/3m63oc3p1ZZZZZZZZZ96g521443416aea1cac.jpg", "http://images.craigslist.org/3nc3p53l5ZZZZZZZZZ96g8706fce2c0bb17e9.jpg"], brw_reb_1224008903.pics
|
284
164
|
assert_equal "Sunrise", brw_reb_1224008903.location
|
285
|
-
assert_equal
|
165
|
+
assert_equal DateTime.parse('2009-06-16T18:43:00-04:00'), brw_reb_1224008903.post_time
|
286
166
|
assert_equal 1224008903, brw_reb_1224008903.posting_id
|
287
167
|
assert_equal 189999.0, brw_reb_1224008903.price
|
288
168
|
assert_equal "1971CJS@Bellsouth.net", brw_reb_1224008903.reply_to
|
@@ -294,8 +174,8 @@ EOD
|
|
294
174
|
assert_equal [:pic], brw_reb_1224008903.img_types
|
295
175
|
|
296
176
|
sfbay_art_1223614914 = CraigScrape::Posting.new relative_uri_for('post_samples/sfbay_art_1223614914.html')
|
297
|
-
assert_equal "Bombay Company Beautiful Art Postered Painting \r<br>\n\
|
298
|
-
assert_equal "Bombay Company Beautiful Art Postered Painting \r\n\
|
177
|
+
assert_equal "Bombay Company Beautiful Art Postered Painting \r<br>\n\u0095\tThe most beautiful piece of art you could have\r<br>\n\u0095\tMatches with any type of furnishing and decoration\r<br>\n\u0095\tA must see/Only one year old\r<br>\n\u0095\tRegular Price @ $1500.00\r<br>\n\u0095\tSale Price @ $650.00\r<br>", sfbay_art_1223614914.contents
|
178
|
+
assert_equal "Bombay Company Beautiful Art Postered Painting \r\n\u0095\tThe most beautiful piece of art you could have\r\n\u0095\tMatches with any type of furnishing and decoration\r\n\u0095\tA must see/Only one year old\r\n\u0095\tRegular Price @ $1500.00\r\n\u0095\tSale Price @ $650.00\r", sfbay_art_1223614914.contents_as_plain
|
299
179
|
assert_equal false, sfbay_art_1223614914.deleted_by_author?
|
300
180
|
assert_equal false, sfbay_art_1223614914.flagged_for_removal?
|
301
181
|
assert_equal ["SF bay area craigslist", "south bay", "art & crafts"], sfbay_art_1223614914.full_section
|
@@ -304,8 +184,8 @@ EOD
|
|
304
184
|
assert_equal ["http://images.craigslist.org/3kf3o93laZZZZZZZZZ96fbc594a6ceb1f1025.jpg"], sfbay_art_1223614914.pics
|
305
185
|
assert_equal "Bombay Company Art Painting - $650", sfbay_art_1223614914.label
|
306
186
|
assert_equal 'saratoga', sfbay_art_1223614914.location
|
307
|
-
assert_equal
|
308
|
-
assert_equal
|
187
|
+
assert_equal Date.new(2009, 6, 15), sfbay_art_1223614914.post_date
|
188
|
+
assert_equal DateTime.parse('2009-06-15T19:38:00-07:00'), sfbay_art_1223614914.post_time
|
309
189
|
assert_equal 1223614914, sfbay_art_1223614914.posting_id
|
310
190
|
assert_equal 650.0, sfbay_art_1223614914.price
|
311
191
|
assert_equal "sale-trzm8-1223614914@craigslist.org", sfbay_art_1223614914.reply_to
|
@@ -337,8 +217,8 @@ EOD
|
|
337
217
|
assert_equal "*****SOFTWARE****", posting_061710.label
|
338
218
|
assert_equal "Dade/Broward", posting_061710.location
|
339
219
|
assert_equal [], posting_061710.pics
|
340
|
-
assert_equal
|
341
|
-
assert_equal
|
220
|
+
assert_equal Date.new(2010, 6, 17), posting_061710.post_date
|
221
|
+
assert_equal DateTime.parse('2010-06-17T13:22:00-04:00'), posting_061710.post_time
|
342
222
|
assert_equal 1796890756, posting_061710.posting_id
|
343
223
|
assert_equal nil, posting_061710.price
|
344
224
|
assert_equal nil, posting_061710.reply_to
|
@@ -362,8 +242,8 @@ EOD
|
|
362
242
|
assert_equal "*Software*AdobeCS5*RosettaStone*AutoCAD*Windows7*Office2010*&* More", posting1808219423.label
|
363
243
|
assert_equal "Dade/Broward", posting1808219423.location
|
364
244
|
assert_equal [], posting1808219423.pics
|
365
|
-
assert_equal
|
366
|
-
assert_equal
|
245
|
+
assert_equal Date.new(2010, 6, 24), posting1808219423.post_date
|
246
|
+
assert_equal DateTime.parse('2010-06-24T07:35:00-04:00'), posting1808219423.post_time
|
367
247
|
assert_equal 1808219423, posting1808219423.posting_id
|
368
248
|
assert_equal nil, posting1808219423.price
|
369
249
|
assert_equal nil, posting1808219423.reply_to
|
@@ -375,7 +255,7 @@ EOD
|
|
375
255
|
def test_bug_found090610
|
376
256
|
posting_090610 = CraigScrape::Posting.new relative_uri_for('post_samples/posting1938291834-090610.html')
|
377
257
|
|
378
|
-
assert_equal
|
258
|
+
assert_equal 27628, posting_090610.contents.length
|
379
259
|
assert_equal 2326, posting_090610.contents_as_plain.length
|
380
260
|
assert_equal false, posting_090610.deleted_by_author?
|
381
261
|
assert_equal true, posting_090610.downloaded?
|
@@ -392,8 +272,8 @@ EOD
|
|
392
272
|
assert_equal "2008 GMC Sierra 2500HD - $14800", posting_090610.label
|
393
273
|
assert_equal "boston", posting_090610.location
|
394
274
|
assert_equal [], posting_090610.pics
|
395
|
-
assert_equal
|
396
|
-
assert_equal
|
275
|
+
assert_equal Date.new(2010, 9, 5), posting_090610.post_date
|
276
|
+
assert_equal DateTime.parse('2010-09-05T18:29:00-04:00'), posting_090610.post_time
|
397
277
|
assert_equal 1938291834, posting_090610.posting_id
|
398
278
|
assert_equal 14800.0, posting_090610.price
|
399
279
|
assert_equal nil, posting_090610.reply_to
|
@@ -423,4 +303,4 @@ EOD
|
|
423
303
|
|
424
304
|
end
|
425
305
|
|
426
|
-
end
|
306
|
+
end
|