libcraigscrape 0.6.5 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. data/CHANGELOG +17 -0
  2. data/Rakefile +1 -1
  3. data/bin/craigwatch +10 -10
  4. data/bin/report_mailer/craigslist_report.html.erb +2 -2
  5. data/bin/report_mailer/craigslist_report.plain.erb +2 -2
  6. data/lib/libcraigscrape.rb +585 -342
  7. data/test/geolisting_samples/geo_listing_ca070209.html +76 -0
  8. data/test/geolisting_samples/geo_listing_ca_sk070209.html +31 -0
  9. data/test/geolisting_samples/geo_listing_cn070209.html +35 -0
  10. data/test/geolisting_samples/geo_listing_us070209.html +355 -0
  11. data/test/libcraigscrape_test_helpers.rb +31 -0
  12. data/test/listing_samples/fortmyers_art_index.060909/1046596324.html +93 -0
  13. data/test/listing_samples/fortmyers_art_index.060909/1053085283.html +92 -0
  14. data/test/listing_samples/fortmyers_art_index.060909/1112522674.html +89 -0
  15. data/test/listing_samples/fortmyers_art_index.060909/823516079.html +92 -0
  16. data/test/listing_samples/fortmyers_art_index.060909/825684735.html +89 -0
  17. data/test/listing_samples/fortmyers_art_index.060909/891513957.html +94 -0
  18. data/test/listing_samples/fortmyers_art_index.060909/897549505.html +99 -0
  19. data/test/listing_samples/fortmyers_art_index.060909/960826026.html +89 -0
  20. data/test/listing_samples/fortmyers_art_index.060909/993256300.html +89 -0
  21. data/test/listing_samples/fortmyers_art_index.060909/fortmyers_art_index500.060909.html +237 -0
  22. data/test/listing_samples/fortmyers_art_index.060909/fortmyers_art_index600.060909.html +132 -0
  23. data/test/listing_samples/miami_search_sss_rack.6.18.09/miami_search_sss_rack1000.6.18.09.html +144 -0
  24. data/test/listing_samples/miami_search_sss_rack.6.18.09/miami_search_sss_rack900.6.18.09.html +146 -0
  25. data/test/post_samples/brw_reb_1224008903.html +101 -0
  26. data/test/post_samples/sfbay_art_1223614914.html +94 -0
  27. data/test/test_craigslist_geolisting.rb +425 -0
  28. data/test/test_craigslist_listing.rb +179 -260
  29. data/test/test_craigslist_posting.rb +306 -0
  30. metadata +29 -2
@@ -0,0 +1,306 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'test/unit'
4
+ require File.dirname(__FILE__)+'/../lib/libcraigscrape'
5
+ require File.dirname(__FILE__)+'/libcraigscrape_test_helpers'
6
+
7
+
8
+ class CraigslistPostingTest < Test::Unit::TestCase
9
+ include LibcraigscrapeTestHelpers
10
+
11
+ def test_pukes
12
+ assert_raise(CraigScrape::Scraper::ParseError) do
13
+ CraigScrape::Posting.new( relative_uri_for('google.html') ).contents
14
+ end
15
+ end
16
+
17
+ def test_listing_parse
18
+ search_html_one = <<EOD
19
+ <p> Apr 18 - <a href="/brw/reb/1128608404.html">Losing your house? You'll need this New Loan Mod Video -</a><font size="-1"> (W. Woodland)</font> <span class="p"> img</span> &lt;&lt;<i><a href="/reb/">real&nbsp;estate - by broker</a></i></p>
20
+ EOD
21
+ search_html_two = <<EOD
22
+ <p> Jan 4 - <a href="/mdc/reb/1128609783.html">$348000 / 1br - Large 1/1 plus office on 49th Floor. 5-Star NEW Condo. Great Views -</a><font size="-1"> (Miami)</font> <span class="p"> pic&nbsp;img</span> &lt;&lt;<i><a href="/reb/">real&nbsp;estate - by broker</a></i></p>
23
+ EOD
24
+ search_html_three = <<EOD
25
+ <p> Dec 31 - <a href="/mdc/reb/1128520894.html">$22,000 HOME -ADULT COMMUNITY BOYNTON BEACH -</a> <span class="p"> pic</span> &lt;&lt;<i><a href="/reb/">real&nbsp;estate - by broker</a></i></p>
26
+ EOD
27
+ search_html_four = <<EOD
28
+ <p> Jul 22 - <a href="/mdc/reb/1128474725.html">$325000 / 3br - GOOD DEAL GREAT HOUSE AND LOCATION -</a><font size="-1"> (CORAL GABLES)</font> &lt;&lt;<i><a href="/reb/">real&nbsp;estate - by broker</a></i></p>
29
+ EOD
30
+ search_html_five = <<EOD
31
+ <p> Apr 9 - <a href="/pbc/boa/1115308178.html">40' SILVERTON CONVERTIBLE DIESEL - $105000 -</a><font size="-1"> (HOBE SOUND)</font> <span class="p"> pic</span></p>
32
+ EOD
33
+ category_listing_one = <<EOD
34
+ <p><a href="/pbc/reb/1128661387.html">$2995000 / 5br - Downtown Boca New Home To Be Built -</a><font size="-1"> (Boca Raton)</font> <span class="p"> pic</span> &lt;&lt;<i><a href="/reb/">real&nbsp;estate - by broker</a></i></p>
35
+ EOD
36
+ category_listing_two = <<EOD
37
+ <p><a href="/mdc/jwl/1128691192.html">925 Sterling Silver Dragonfly Charm Bracelet - $25 -</a> <span class="p"> img</span></p>
38
+ EOD
39
+
40
+ one = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(Hpricot.parse(search_html_one).at('p'))
41
+ assert_equal true, one.has_img?
42
+ assert_equal false, one.has_pic?
43
+ assert_equal true, one.has_pic_or_img?
44
+ assert_equal '/brw/reb/1128608404.html', one.href
45
+ assert_equal "Losing your house? You'll need this New Loan Mod Video", one.label
46
+ assert_equal "real estate - by broker", one.section
47
+ assert_equal "W. Woodland", one.location
48
+ assert_equal 4, one.post_date.month
49
+ assert_equal 18, one.post_date.day
50
+ assert_equal nil, one.price
51
+
52
+ two = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(Hpricot.parse(search_html_two).at('p'))
53
+ assert_equal true, two.has_img?
54
+ assert_equal true, two.has_pic?
55
+ assert_equal true, two.has_pic_or_img?
56
+ assert_equal '/mdc/reb/1128609783.html', two.href
57
+ assert_equal "$348000 / 1br - Large 1/1 plus office on 49th Floor. 5-Star NEW Condo. Great Views", two.label
58
+ assert_equal "real estate - by broker", two.section
59
+ assert_equal "Miami", two.location
60
+ assert_equal 1, two.post_date.month
61
+ assert_equal 4, two.post_date.day
62
+ assert_equal 348000.0, two.price
63
+
64
+ three = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(Hpricot.parse(search_html_three).at('p'))
65
+ assert_equal false, three.has_img?
66
+ assert_equal true, three.has_pic?
67
+ assert_equal true, three.has_pic_or_img?
68
+ assert_equal '/mdc/reb/1128520894.html', three.href
69
+ assert_equal "$22,000 HOME -ADULT COMMUNITY BOYNTON BEACH", three.label
70
+ assert_equal "real estate - by broker", three.section
71
+ assert_equal nil, three.location
72
+ assert_equal 12, three.post_date.month
73
+ assert_equal 31, three.post_date.day
74
+ assert_equal 22.0, three.price
75
+
76
+ four = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(Hpricot.parse(search_html_four).at('p'))
77
+ assert_equal false, four.has_img?
78
+ assert_equal false, four.has_pic?
79
+ assert_equal false, four.has_pic_or_img?
80
+ assert_equal '/mdc/reb/1128474725.html', four.href
81
+ assert_equal "$325000 / 3br - GOOD DEAL GREAT HOUSE AND LOCATION", four.label
82
+ assert_equal "real estate - by broker", four.section
83
+ assert_equal "CORAL GABLES", four.location
84
+ assert_equal 7, four.post_date.month
85
+ assert_equal 22, four.post_date.day
86
+ assert_equal 325000.0, four.price
87
+
88
+ five = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(Hpricot.parse(search_html_five).at('p'))
89
+ assert_equal false, five.has_img?
90
+ assert_equal true, five.has_pic?
91
+ assert_equal true, five.has_pic_or_img?
92
+ assert_equal '/pbc/boa/1115308178.html', five.href
93
+ assert_equal "40' SILVERTON CONVERTIBLE DIESEL - $105000", five.label
94
+ assert_equal nil, five.section
95
+ assert_equal "HOBE SOUND", five.location
96
+ assert_equal 4, five.post_date.month
97
+ assert_equal 9, five.post_date.day
98
+ assert_equal 105000.0, five.price
99
+
100
+ five = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(Hpricot.parse(category_listing_one).at('p'))
101
+ assert_equal false, five.has_img?
102
+ assert_equal true, five.has_pic?
103
+ assert_equal true, five.has_pic_or_img?
104
+ assert_equal '/pbc/reb/1128661387.html', five.href
105
+ assert_equal "$2995000 / 5br - Downtown Boca New Home To Be Built", five.label
106
+ assert_equal "real estate - by broker", five.section
107
+ assert_equal "Boca Raton", five.location
108
+ assert_equal nil, five.post_date
109
+ assert_equal 2995000.0, five.price
110
+
111
+ six = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(Hpricot.parse(category_listing_two).at('p'))
112
+ assert_equal true, six.has_img?
113
+ assert_equal false, six.has_pic?
114
+ assert_equal true, six.has_pic_or_img?
115
+ assert_equal '/mdc/jwl/1128691192.html', six.href
116
+ assert_equal "925 Sterling Silver Dragonfly Charm Bracelet - $25", six.label
117
+ assert_equal nil, six.section
118
+ assert_equal nil, six.location
119
+ assert_equal nil, six.post_date
120
+ assert_equal 25.0, six.price
121
+ end
122
+
123
+
124
+ def test_posting_parse
125
+ posting0 = CraigScrape::Posting.new relative_uri_for('post_samples/posting0.html')
126
+ assert_equal "Has storage for videos/dvds. About 2 ft high by 21/2 ft widw. Almond/light beige color", posting0.contents
127
+ assert_equal ["south florida craigslist", "miami / dade", "furniture - by owner"], posting0.full_section
128
+ assert_equal "tv cart on wheels - $35 (NMB)", posting0.header
129
+ assert_equal "tv cart on wheels - $35", posting0.label
130
+ assert_equal "tv cart on wheels", posting0.title
131
+ assert_equal "NMB", posting0.location
132
+ assert_equal 1131363612, posting0.posting_id
133
+ assert_equal "sale-ktf9w-1131363612@craigslist.org", posting0.reply_to
134
+ assert_equal [0, 21, 13, 20, 4, 2009, 1, 110, true, "EDT"], posting0.post_time.to_a
135
+ assert_equal [], posting0.pics
136
+ assert_equal "Has storage for videos/dvds. About 2 ft high by 21/2 ft widw. Almond/light beige color",posting0.contents_as_plain
137
+ assert_equal 35.0, posting0.price
138
+ assert_equal [], posting0.images
139
+ assert_equal [], posting0.img_types
140
+
141
+ posting1 = CraigScrape::Posting.new relative_uri_for('post_samples/posting1.html')
142
+ assert_equal "Residential income property\227Investors this property is for you! This duplex has a 2bedroom/1bath unit on each side. It features updated kitchens and baths (new tubs, toilet, sink, vanities), ceramic tile flooring throughout, separate water and electric meters and on site laundry facilities. It is also closed to the Galleria, beaches and downtown Fort Lauderdale! \r<br />\n\r<br />\nJe parle le Fran\347ais\r<br />\n\r<br />\nThis property is being offered by Blaunch Perrier, Broker Associate, Atlantic Properties International. Blaunch can be reached at 954-593-0077. For additional property information you may also visit www.garylanham.com\r<br />\n\r<br />", posting1.contents
143
+ assert_equal ["south florida craigslist", "broward county", "real estate - by broker"], posting1.full_section
144
+ assert_equal "$189900 / 4br - Investment Property--Duplex in Fort Lauderdale", posting1.header
145
+ assert_equal "$189900 / 4br - Investment Property--Duplex in Fort Lauderdale", posting1.label
146
+ assert_equal "Investment Property--Duplex in Fort Lauderdale", posting1.title
147
+ assert_equal '1000 NE 14th Pl', posting1.location
148
+ assert_equal 1131242195, posting1.posting_id
149
+ assert_equal "hous-5nzhq-1131242195@craigslist.org", posting1.reply_to
150
+ assert_equal [0, 33, 13, 20, 4, 2009, 1, 110, true, "EDT"], posting1.post_time.to_a
151
+ assert_equal %w(http://images.craigslist.org/3n83o33l5ZZZZZZZZZ94k913ac1582d4b1fa4.jpg http://images.craigslist.org/3n93p63obZZZZZZZZZ94k19d5e32eb3b610c2.jpg http://images.craigslist.org/3n93m03l6ZZZZZZZZZ94k6e9785e37a1b1f3f.jpg http://images.craigslist.org/3ma3oc3l4ZZZZZZZZZ94kbfecbcd2fb2e19cc.jpg), posting1.pics
152
+ assert_equal "Residential income property\227Investors this property is for you! This duplex has a 2bedroom/1bath unit on each side. It features updated kitchens and baths (new tubs, toilet, sink, vanities), ceramic tile flooring throughout, separate water and electric meters and on site laundry facilities. It is also closed to the Galleria, beaches and downtown Fort Lauderdale! \r\n\r\nJe parle le Fran\347ais\r\n\r\nThis property is being offered by Blaunch Perrier, Broker Associate, Atlantic Properties International. Blaunch can be reached at 954-593-0077. For additional property information you may also visit www.garylanham.com\r\n\r", posting1.contents_as_plain
153
+ assert_equal 189900.0, posting1.price
154
+ assert_equal [], posting1.images
155
+ assert_equal ["http://images.craigslist.org/3n83o33l5ZZZZZZZZZ94k913ac1582d4b1fa4.jpg", "http://images.craigslist.org/3n93p63obZZZZZZZZZ94k19d5e32eb3b610c2.jpg", "http://images.craigslist.org/3n93m03l6ZZZZZZZZZ94k6e9785e37a1b1f3f.jpg", "http://images.craigslist.org/3ma3oc3l4ZZZZZZZZZ94kbfecbcd2fb2e19cc.jpg"], posting1.pics
156
+ assert_equal [:pic], posting1.img_types
157
+
158
+ posting2 = CraigScrape::Posting.new relative_uri_for('post_samples/posting2.html')
159
+ assert_equal 15775, posting2.contents.length # This is easy, and probably fine enough
160
+ assert_equal ["south florida craigslist", "broward county", "cars & trucks - by dealer"], posting2.full_section
161
+ assert_equal "PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEE - $23975 (Fort Lauderdale)", posting2.header
162
+ assert_equal "PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEE - $23975", posting2.label
163
+ assert_equal "PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEE", posting2.title
164
+ assert_equal 'Fort Lauderdale', posting2.location
165
+ assert_equal 1127037648, posting2.posting_id
166
+ assert_equal nil, posting2.reply_to
167
+ assert_equal [0, 16, 14, 17, 4, 2009, 5, 107, true, "EDT"], posting2.post_time.to_a
168
+ assert_equal [], posting2.pics
169
+ assert_equal "\302\240 Sheehan Buick Pontiac GMC \302\240 Pompano Beach, FL(754) 224-3257 \302\240PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!2002 Chevrolet Corvette Z06 Florida Driven AutoCheck Certified 5.7L V8 6sp2 Door Coupe.\302\240Price: \302\240 $23,975Exterior:Electron Blue MetallicInterior:BlackStock#:P5110AVIN:1G1YY12S625129021FREE AutoCheck Vehicle ReportMileage:63,560Transmission:6 Speed ManualEngine:V8 5.7L OHVWarranty:Limited WarrantyTitle:Clear\302\273\302\240View All 58 Photos\302\273\302\240View Full Vehicle Details\302\273\302\240Ask the Seller a Question\302\273\302\240E-mail this to a Friend\302\240 DescriptionPRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!\r\n\r\nLOADED WITH BLACK LEATHER BUCKET SEATS, POWER DRIVERS SEAT, DUAL ZONE CLIMATE CONTROL, 4 WHEEL ABS BRAKES, POWER STEERING AND BRAKES, REAR LIMITED SLIP DIFFERENTIAL, STABILITY CONTROL, CRUISE CONTROL, TLT STEERING WHEEL, POWER WINDOWS AND LOCKS, AUTOMATIC ON/OFF HEADLAMPS, FOG LIGHTS, DUAL AIR BAG SAFETY, AM/FM STEREO CD PLAYER, INTERMITTENT WINDSHIELD WIPERS AND SO MUCH MORE - THIS CAR IS TOTALLY HOT WITH GREAT LOW MILES!\r\n\r\nPlease call us to make your deal now at 1-888-453-5244. Please visit our Website at www.sheehanautoplex.com ***View 50+ Pictures of this vehicle - a complete description including standard features and all added options & a FREE AUTO CHECK REPORT at www.sheehanautoplex.com. ***Financing for Everyone - Good credit - bad credit - divorce - charge off's - NO PROBLEM. To complete a secure credit application, please visit our website at www.sheehanautoplex.com ***The largest Dealer in the State of Florida - We export all over the world - For details please visit www.sheehanautoplex.com ***Sheehan Autoplex takes great pride in our outstanding customer service and has been recognized by the following associations - BBB (Better Business Bureau) - NIADA - and the FIADA. Call us to get your best deal. CALL NOW. 1-888-453-5244\302\240 Contact Sheehan Buick Pontiac GMCPhone:(754) 224-3257Fax:(954) 781-9050Phone:(754) 224-3257E-mail:sales@proauto.comBusiness HoursWeekdays:9:00 AM to 9:00 PMSat:9:00 AM to 6:00 PMSun:",posting2.contents_as_plain
170
+ assert_equal 23975.0, posting2.price
171
+ assert_equal ["http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/19bce8e86c_355.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/ff9b026b06_355.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/6b75d87620_355.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/53b025e472_355.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/0d1befded7_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/95477f92bb_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/2850b2f160_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/a4281c6c91_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/862ee4ce71_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/74cadeff2e_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/63b05a0c76_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/00f84ea5bf_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/fe29734ab5_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/7f714d5159_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/720ddcc0a1_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/fc90fba588_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/d576661767_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/3423fb4814_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/5f0a0e85f8_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/d3ca0e29cc_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/23888ae8bc_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/93fc7d2373_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/9ac9da47b8_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/b1a84ca79e_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/6d219b534d_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/8bfe03d99b_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/d1086ab561_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/ab7a050466_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/9ea616d5d7_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/4b91de556d_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/1cefd8873a_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/8aec930e90_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/76b603822f_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/2d1b6d8a13_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/4fc82180ab_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/843c9e41ae_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/9d91990245_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/f34b8cfaed_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/765dae1031_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/7463a88d92_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/afe5801857_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/25abb2bd26_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/bc2fdaa3ea_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/e2a9b0dc69_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/08c2ca66b6_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/5e46230ec6_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/0b45184c58_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/311457aed0_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/43090899dc_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/c33b7f4c2a_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/24f419b851_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/50d3e2126d_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/6c125ffc51_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/93db0546fd_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/00e0d91652_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/2b242fbc58_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/8ee3c932a2_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/64103fe7bd_105.jpg"], posting2.images
172
+ assert_equal [:img], posting2.img_types
173
+
174
+ posting3 = CraigScrape::Posting.new relative_uri_for('post_samples/posting3.html')
175
+ assert_equal "1992 Twin Turbo 300ZX. This car is pearl white outside and Camel leather interior with suede accents. Motor was re-done from the ground up two years ago. 23,000 on new motor rebuild! New Leather seats and center arm rest done also two years ago. Has Alpine Am/Fm Cd with Ipod cable, Viper pager alarm New! JL Audio Amp & JLAudio sub box custom made. Mtx mids& highs component speakers sparate tweeter. Car runs strong & straight. Just detailed the interior. Exterior should be painted. This car once painted will sell for over $10,000. \r<br />\nCome get a great deal now! offers and trades will be considered. 786-303-6550 Manny", posting3.contents
176
+ assert_equal ["south florida craigslist", "miami / dade", "cars & trucks - by owner"], posting3.full_section
177
+ assert_equal "300ZX Nissan Twin Turbo 1992 - $5800 (N.Miami/ Hialeah)", posting3.header
178
+ assert_equal "300ZX Nissan Twin Turbo 1992 - $5800", posting3.label
179
+ assert_equal "300ZX Nissan Twin Turbo 1992", posting3.title
180
+ assert_equal "N.Miami/ Hialeah", posting3.location
181
+ assert_equal 1130212403, posting3.posting_id
182
+ assert_equal "sale-c9bpa-1130212403@craigslist.org", posting3.reply_to
183
+ assert_equal [0, 21, 18, 19, 4, 2009, 0, 109, true, "EDT"], posting3.post_time.to_a
184
+ assert_equal %w(http://images.craigslist.org/3n23kf3lfZZZZZZZZZ94j1160e7d7b0601934.jpg http://images.craigslist.org/3nc3kf3p2ZZZZZZZZZ94j04fbc71e0a551ace.jpg http://images.craigslist.org/3nc3k33l7ZZZZZZZZZ94k13d8d7b1024e1e0e.jpg http://images.craigslist.org/3n23k63mfZZZZZZZZZ94k7838ae5d48d91eb8.jpg), posting3.pics
185
+ assert_equal "1992 Twin Turbo 300ZX. This car is pearl white outside and Camel leather interior with suede accents. Motor was re-done from the ground up two years ago. 23,000 on new motor rebuild! New Leather seats and center arm rest done also two years ago. Has Alpine Am/Fm Cd with Ipod cable, Viper pager alarm New! JL Audio Amp & JLAudio sub box custom made. Mtx mids& highs component speakers sparate tweeter. Car runs strong & straight. Just detailed the interior. Exterior should be painted. This car once painted will sell for over $10,000. \r\nCome get a great deal now! offers and trades will be considered. 786-303-6550 Manny",posting3.contents_as_plain
186
+ assert_equal 5800.0, posting3.price
187
+ assert_equal [], posting3.images
188
+ assert_equal ["http://images.craigslist.org/3n23kf3lfZZZZZZZZZ94j1160e7d7b0601934.jpg", "http://images.craigslist.org/3nc3kf3p2ZZZZZZZZZ94j04fbc71e0a551ace.jpg", "http://images.craigslist.org/3nc3k33l7ZZZZZZZZZ94k13d8d7b1024e1e0e.jpg", "http://images.craigslist.org/3n23k63mfZZZZZZZZZ94k7838ae5d48d91eb8.jpg"], posting3.pics
189
+ assert_equal [:pic], posting3.img_types
190
+
191
+ # This one ended up being quite a curveball since the user uploaded HTML was such junk:
192
+ posting4 = CraigScrape::Posting.new relative_uri_for('post_samples/posting4.html')
193
+ assert_equal 20640, posting4.contents.length
194
+ assert_equal ["south florida craigslist", "broward county", "real estate - by broker"], posting4.full_section
195
+ assert_equal "$225000 / 3br - Palm Aire Golf Corner Unit!", posting4.header
196
+ assert_equal "Palm Aire Golf Corner Unit!", posting4.title
197
+ assert_equal "$225000 / 3br - Palm Aire Golf Corner Unit!", posting4.label
198
+ assert_equal nil, posting4.location
199
+ assert_equal 1139303170, posting4.posting_id
200
+ assert_equal "hous-sk9f2-1139303170@craigslist.org", posting4.reply_to
201
+ assert_equal [0, 8, 9, 25, 4, 2009, 6, 115, true, "EDT"], posting4.post_time.to_a
202
+ assert_equal [], posting4.pics
203
+ assert_equal 6399,posting4.contents_as_plain.length
204
+ assert_equal 225000.0, posting4.price
205
+ assert_equal ["http://fortlauderdaleareahomesales.com/myfiles/5.jpg", "http://fortlauderdaleareahomesales.com/myfiles/4.jpg", "http://fortlauderdaleareahomesales.com/myfiles/7.jpg", "http://fortlauderdaleareahomesales.com/myfiles/10.jpg", "http://fortlauderdaleareahomesales.com/myfiles/1.jpg", "http://fortlauderdaleareahomesales.com/myfiles/2.jpg", "http://fortlauderdaleareahomesales.com/myfiles/3.jpg", "http://fortlauderdaleareahomesales.com/myfiles/8.jpg", "http://fortlauderdaleareahomesales.com/myfiles/9.jpg", "http://fortlauderdaleareahomesales.com/myfiles/11.jpg", "http://fortlauderdaleareahomesales.com/myfiles/14.jpg", "http://fortlauderdaleareahomesales.com/myfiles/6.jpg"], posting4.images
206
+ assert_equal [:img], posting4.img_types
207
+
208
+ posting5 = CraigScrape::Posting.new relative_uri_for('post_samples/posting5.html')
209
+ assert_equal true, posting5.flagged_for_removal?
210
+ assert_equal nil, posting5.contents
211
+ assert_equal ["south florida craigslist", "palm beach co", "apts/housing for rent"], posting5.full_section
212
+ assert_equal "This posting has been <a href=\"http://www.craigslist.org/about/help/flags_and_community_moderation\">flagged</a> for removal", posting5.header
213
+ assert_equal nil, posting5.title
214
+ assert_equal nil, posting5.label
215
+ assert_equal nil, posting5.location
216
+ assert_equal nil, posting5.posting_id
217
+ assert_equal nil, posting5.reply_to
218
+ assert_equal nil, posting5.post_time
219
+ assert_equal [], posting5.pics
220
+ assert_equal nil, posting5.contents_as_plain
221
+ assert_equal nil, posting5.price
222
+ assert_equal [], posting5.images
223
+ assert_equal [], posting5.img_types
224
+
225
+ posting_deleted = CraigScrape::Posting.new relative_uri_for('post_samples/this_post_has_been_deleted_by_its_author.html')
226
+ assert_equal true, posting_deleted.deleted_by_author?
227
+ assert_equal nil, posting_deleted.contents
228
+ assert_equal ["south florida craigslist", "broward county", "cars & trucks - by owner"], posting_deleted.full_section
229
+ assert_equal "This posting has been deleted by its author.", posting_deleted.header
230
+ assert_equal nil, posting_deleted.label
231
+ assert_equal nil, posting_deleted.title
232
+ assert_equal nil, posting_deleted.location
233
+ assert_equal nil, posting_deleted.posting_id
234
+ assert_equal nil, posting_deleted.reply_to
235
+ assert_equal nil, posting_deleted.post_time
236
+ assert_equal [], posting_deleted.pics
237
+ assert_equal nil, posting_deleted.contents_as_plain
238
+ assert_equal nil, posting_deleted.price
239
+ assert_equal [], posting_deleted.images
240
+ assert_equal [], posting_deleted.img_types
241
+
242
+ posting6 = CraigScrape::Posting.new relative_uri_for('post_samples/1207457727.html')
243
+ assert_equal "<p><br />Call!! asking for a new owner.<br /> no deposit required rent to own properties. <br /> <br /> Defaulting payment records are not a problem, <br /> we will help you protect the previous owners credit history! 202-567-6371 <br /><br /></p>",posting6.contents
244
+ assert_equal "Call!! asking for a new owner. no deposit required rent to own properties. Defaulting payment records are not a problem, we will help you protect the previous owners credit history! 202-567-6371 ",posting6.contents_as_plain
245
+ assert_equal false,posting6.deleted_by_author?
246
+ assert_equal false,posting6.flagged_for_removal?
247
+ assert_equal ["south florida craigslist", "broward county", "apts/housing for rent"],posting6.full_section
248
+ assert_equal "$1350 / 3br - 2bth for no deposit req (Coral Springs)",posting6.header
249
+ assert_equal "$1350 / 3br - 2bth for no deposit req",posting6.label
250
+ assert_equal ["http://images.craigslist.org/3k43pe3o8ZZZZZZZZZ9655022102a3ea51624.jpg", "http://images.craigslist.org/3n13m53p6ZZZZZZZZZ96596515e51237a179c.jpg", "http://images.craigslist.org/3od3p33leZZZZZZZZZ9656d614da8e3a51dd9.jpg", "http://images.craigslist.org/3pb3oa3leZZZZZZZZZ965eb60e4d2344019fb.jpg"],posting6.pics
251
+ assert_equal 'Coral Springs',posting6.location
252
+ assert_equal [0, 56, 18, 5, 6, 2009, 5, 156, true, "EDT"],posting6.post_time.to_a
253
+ assert_equal 1207457727,posting6.posting_id
254
+ assert_equal 1350.0,posting6.price
255
+ assert_equal "hous-ccpap-1207457727@craigslist.org",posting6.reply_to
256
+ assert_equal "2bth for no deposit req",posting6.title
257
+ assert_equal [], posting6.images
258
+ assert_equal ["http://images.craigslist.org/3k43pe3o8ZZZZZZZZZ9655022102a3ea51624.jpg", "http://images.craigslist.org/3n13m53p6ZZZZZZZZZ96596515e51237a179c.jpg", "http://images.craigslist.org/3od3p33leZZZZZZZZZ9656d614da8e3a51dd9.jpg", "http://images.craigslist.org/3pb3oa3leZZZZZZZZZ965eb60e4d2344019fb.jpg"], posting6.pics
259
+ assert_equal [:pic], posting6.img_types
260
+
261
+ brw_reb_1224008903 = CraigScrape::Posting.new relative_uri_for('post_samples/brw_reb_1224008903.html')
262
+ assert_equal "Nice 3 Bedroom/ 2 Bathroom/ Garage Home in Sunrise. 1,134 square feet of living area with a 6,000 square foot lot. Wood laminate flooring throughout the entire house. House has been updated. Stamped concrete driveway which leads to garage. Big back yard. Central AC. Washer/Dryer. Not a short sale or foreclosure. Asking $189,999. Call Charles Schneider (The Best Damn Real Estate Company Period!) at 954-478-4784.\r<br />\n\r<br />\nDirections: Take Pine Island Road north off of Sunrise Boulevard (past Sunset Strip) to N.W. 25th Court. Head west (left) on N.W. 25th Court to N.W. 91st Lane. Head north (right) on N.W. 91st Lane to N.W. 26th Street. Head east (right) on N.W. 26th Street to the property- 9163 N.W. 26th Street, Sunrise, FL 33322", brw_reb_1224008903.contents
263
+ assert_equal "Nice 3 Bedroom/ 2 Bathroom/ Garage Home in Sunrise. 1,134 square feet of living area with a 6,000 square foot lot. Wood laminate flooring throughout the entire house. House has been updated. Stamped concrete driveway which leads to garage. Big back yard. Central AC. Washer/Dryer. Not a short sale or foreclosure. Asking $189,999. Call Charles Schneider (The Best Damn Real Estate Company Period!) at 954-478-4784.\r\n\r\nDirections: Take Pine Island Road north off of Sunrise Boulevard (past Sunset Strip) to N.W. 25th Court. Head west (left) on N.W. 25th Court to N.W. 91st Lane. Head north (right) on N.W. 91st Lane to N.W. 26th Street. Head east (right) on N.W. 26th Street to the property- 9163 N.W. 26th Street, Sunrise, FL 33322", brw_reb_1224008903.contents_as_plain
264
+ assert_equal false, brw_reb_1224008903.deleted_by_author?
265
+ assert_equal false, brw_reb_1224008903.flagged_for_removal?
266
+ assert_equal ["south florida craigslist", "broward county", "real estate - by broker"], brw_reb_1224008903.full_section
267
+ assert_equal "$189999 / 3br - Nice 3 Bedroom/ 2 Bathroom House with Garage in Sunrise (Sunrise) (map)", brw_reb_1224008903.header
268
+ assert_equal "$189999 / 3br - Nice 3 Bedroom/ 2 Bathroom House with Garage in Sunrise (Sunrise) (map)", brw_reb_1224008903.header_as_plain
269
+ assert_equal ["http://images.craigslist.org/3ma3o93laZZZZZZZZZ96g5ee7cc528f1818a8.jpg", "http://images.craigslist.org/3nf3m03oeZZZZZZZZZ96gb267b7db57d91f60.jpg", "http://images.craigslist.org/3m63oc3p1ZZZZZZZZZ96g521443416aea1cac.jpg", "http://images.craigslist.org/3nc3p53l5ZZZZZZZZZ96g8706fce2c0bb17e9.jpg"], brw_reb_1224008903.pics
270
+ assert_equal "Sunrise", brw_reb_1224008903.location
271
+ assert_equal [0, 43, 18, 16, 6, 2009, 2, 167, true, "EDT"], brw_reb_1224008903.post_time.to_a
272
+ assert_equal 1224008903, brw_reb_1224008903.posting_id
273
+ assert_equal 189999.0, brw_reb_1224008903.price
274
+ assert_equal "1971CJS@Bellsouth.net", brw_reb_1224008903.reply_to
275
+ assert_equal false, brw_reb_1224008903.system_post?
276
+ assert_equal "Nice 3 Bedroom/ 2 Bathroom House with Garage in Sunrise", brw_reb_1224008903.title
277
+ assert_equal "$189999 / 3br - Nice 3 Bedroom/ 2 Bathroom House with Garage in Sunrise", brw_reb_1224008903.label
278
+ assert_equal [], brw_reb_1224008903.images
279
+ assert_equal ["http://images.craigslist.org/3ma3o93laZZZZZZZZZ96g5ee7cc528f1818a8.jpg", "http://images.craigslist.org/3nf3m03oeZZZZZZZZZ96gb267b7db57d91f60.jpg", "http://images.craigslist.org/3m63oc3p1ZZZZZZZZZ96g521443416aea1cac.jpg", "http://images.craigslist.org/3nc3p53l5ZZZZZZZZZ96g8706fce2c0bb17e9.jpg"], brw_reb_1224008903.pics
280
+ assert_equal [:pic], brw_reb_1224008903.img_types
281
+
282
+ sfbay_art_1223614914 = CraigScrape::Posting.new relative_uri_for('post_samples/sfbay_art_1223614914.html')
283
+ assert_equal "Bombay Company Beautiful Art Postered Painting \r<br />\n\225\tThe most beautiful piece of art you could have\r<br />\n\225\tMatches with any type of furnishing and decoration\r<br />\n\225\tA must see/Only one year old\r<br />\n\225\tRegular Price @ $1500.00\r<br />\n\225\tSale Price @ $650.00\r<br />", sfbay_art_1223614914.contents
284
+ assert_equal "Bombay Company Beautiful Art Postered Painting \r\n\225\tThe most beautiful piece of art you could have\r\n\225\tMatches with any type of furnishing and decoration\r\n\225\tA must see/Only one year old\r\n\225\tRegular Price @ $1500.00\r\n\225\tSale Price @ $650.00\r", sfbay_art_1223614914.contents_as_plain
285
+ assert_equal false, sfbay_art_1223614914.deleted_by_author?
286
+ assert_equal false, sfbay_art_1223614914.flagged_for_removal?
287
+ assert_equal ["SF bay area craigslist", "south bay", "art & crafts"], sfbay_art_1223614914.full_section
288
+ assert_equal "Bombay Company Art Painting - $650 (saratoga)", sfbay_art_1223614914.header
289
+ assert_equal "Bombay Company Art Painting - $650 (saratoga)", sfbay_art_1223614914.header_as_plain
290
+ assert_equal ["http://images.craigslist.org/3kf3o93laZZZZZZZZZ96fbc594a6ceb1f1025.jpg"], sfbay_art_1223614914.pics
291
+ assert_equal "Bombay Company Art Painting - $650", sfbay_art_1223614914.label
292
+ assert_equal 'saratoga', sfbay_art_1223614914.location
293
+ assert_equal [0, 0, 0, 15, 6, 2009, 1, 166, true, "EDT"], sfbay_art_1223614914.post_date.to_a
294
+ assert_equal [0, 38, 22, 15, 6, 2009, 1, 166, true, "EDT"], sfbay_art_1223614914.post_time.to_a
295
+ assert_equal 1223614914, sfbay_art_1223614914.posting_id
296
+ assert_equal 650.0, sfbay_art_1223614914.price
297
+ assert_equal "sale-trzm8-1223614914@craigslist.org", sfbay_art_1223614914.reply_to
298
+ assert_equal false, sfbay_art_1223614914.system_post?
299
+ assert_equal "Bombay Company Art Painting", sfbay_art_1223614914.title
300
+ assert_equal [], sfbay_art_1223614914.images
301
+ assert_equal ["http://images.craigslist.org/3kf3o93laZZZZZZZZZ96fbc594a6ceb1f1025.jpg"], sfbay_art_1223614914.pics
302
+ assert_equal [:pic], sfbay_art_1223614914.img_types
303
+
304
+ end
305
+
306
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libcraigscrape
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.5
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris DeRose, DeRose Technologies, Inc.
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-08 00:00:00 -04:00
12
+ date: 2009-07-05 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -65,24 +65,49 @@ files:
65
65
  - bin/report_mailer
66
66
  - bin/report_mailer/craigslist_report.html.erb
67
67
  - bin/report_mailer/craigslist_report.plain.erb
68
+ - test/libcraigscrape_test_helpers.rb
69
+ - test/test_craigslist_posting.rb
68
70
  - test/listing_samples
69
71
  - test/listing_samples/category_output.html
70
72
  - test/listing_samples/short_search_output.html
71
73
  - test/listing_samples/empty_listings.html
72
74
  - test/listing_samples/mia_fua_index8900.5.21.09.html
75
+ - test/listing_samples/fortmyers_art_index.060909
76
+ - test/listing_samples/fortmyers_art_index.060909/891513957.html
77
+ - test/listing_samples/fortmyers_art_index.060909/1053085283.html
78
+ - test/listing_samples/fortmyers_art_index.060909/897549505.html
79
+ - test/listing_samples/fortmyers_art_index.060909/825684735.html
80
+ - test/listing_samples/fortmyers_art_index.060909/fortmyers_art_index500.060909.html
81
+ - test/listing_samples/fortmyers_art_index.060909/fortmyers_art_index600.060909.html
82
+ - test/listing_samples/fortmyers_art_index.060909/960826026.html
83
+ - test/listing_samples/fortmyers_art_index.060909/1046596324.html
84
+ - test/listing_samples/fortmyers_art_index.060909/993256300.html
85
+ - test/listing_samples/fortmyers_art_index.060909/1112522674.html
86
+ - test/listing_samples/fortmyers_art_index.060909/823516079.html
73
87
  - test/listing_samples/category_output_2.html
74
88
  - test/listing_samples/long_search_output.html
89
+ - test/listing_samples/miami_search_sss_rack.6.18.09
90
+ - test/listing_samples/miami_search_sss_rack.6.18.09/miami_search_sss_rack900.6.18.09.html
91
+ - test/listing_samples/miami_search_sss_rack.6.18.09/miami_search_sss_rack1000.6.18.09.html
75
92
  - test/test_craigslist_listing.rb
76
93
  - test/post_samples
77
94
  - test/post_samples/posting4.html
95
+ - test/post_samples/brw_reb_1224008903.html
78
96
  - test/post_samples/posting1.html
79
97
  - test/post_samples/posting0.html
80
98
  - test/post_samples/posting5.html
81
99
  - test/post_samples/posting3.html
100
+ - test/post_samples/sfbay_art_1223614914.html
82
101
  - test/post_samples/this_post_has_been_deleted_by_its_author.html
83
102
  - test/post_samples/1207457727.html
84
103
  - test/post_samples/posting2.html
85
104
  - test/google.html
105
+ - test/test_craigslist_geolisting.rb
106
+ - test/geolisting_samples
107
+ - test/geolisting_samples/geo_listing_ca_sk070209.html
108
+ - test/geolisting_samples/geo_listing_ca070209.html
109
+ - test/geolisting_samples/geo_listing_cn070209.html
110
+ - test/geolisting_samples/geo_listing_us070209.html
86
111
  - lib/libcraigscrape.rb
87
112
  has_rdoc: true
88
113
  homepage: http://www.derosetechnologies.com/community/libcraigscrape
@@ -116,4 +141,6 @@ signing_key:
116
141
  specification_version: 2
117
142
  summary: quick, easy, craigslist parsing library that takes the monotony out of working with craigslist posts and listings
118
143
  test_files:
144
+ - test/test_craigslist_posting.rb
119
145
  - test/test_craigslist_listing.rb
146
+ - test/test_craigslist_geolisting.rb