libcraigscrape 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +4 -0
- data/COPYING +674 -0
- data/COPYING.LESSER +165 -0
- data/README +69 -0
- data/Rakefile +72 -0
- data/bin/craig_report_schema.yml +57 -0
- data/bin/craigwatch +374 -0
- data/bin/report_mailer/craigslist_report.html.erb +14 -0
- data/bin/report_mailer/craigslist_report.plain.erb +15 -0
- data/lib/libcraigscrape.rb +352 -0
- data/test/google.html +8 -0
- data/test/listing_samples/category_output.html +231 -0
- data/test/listing_samples/category_output_2.html +217 -0
- data/test/listing_samples/long_search_output.html +137 -0
- data/test/listing_samples/short_search_output.html +133 -0
- data/test/post_samples/posting0.html +91 -0
- data/test/post_samples/posting1.html +106 -0
- data/test/post_samples/posting2.html +107 -0
- data/test/post_samples/posting3.html +92 -0
- data/test/post_samples/posting4.html +993 -0
- data/test/post_samples/posting5.html +38 -0
- data/test/test_craigslist_listing.rb +234 -0
- metadata +115 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<title></title>
|
5
|
+
<meta name="robots" content="NOARCHIVE,NOFOLLOW">
|
6
|
+
<link rel="stylesheet" title="craigslist" href="http://www.craigslist.org/styles/craigslist.css" type="text/css" media="all">
|
7
|
+
</head>
|
8
|
+
|
9
|
+
<body onload="initFlag(1139838814)" class="posting">
|
10
|
+
|
11
|
+
<div class="bchead">
|
12
|
+
|
13
|
+
<a href="http://miami.craigslist.org">south florida craigslist</a>
|
14
|
+
> <a href="/pbc/">palm beach co</a> > <a href="/pbc/apa/">apts/housing for rent</a>
|
15
|
+
</div>
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
<hr>
|
20
|
+
<br>
|
21
|
+
<br>
|
22
|
+
<h2>This posting has been <a href="http://www.craigslist.org/about/help/flags_and_community_moderation">flagged</a> for removal</h2>
|
23
|
+
<h5>(The title on the listings page will be removed in just a few minutes.)</h5>
|
24
|
+
|
25
|
+
<br><br>
|
26
|
+
|
27
|
+
<hr>
|
28
|
+
<ul class="clfooter">
|
29
|
+
<li>Copyright © 2009 craigslist, inc.</li>
|
30
|
+
<li><a href="http://www.craigslist.org/about/terms.of.use.html">terms of use</a></li>
|
31
|
+
<li><a href="http://www.craigslist.org/about/privacy_policy">privacy policy</a></li>
|
32
|
+
<li><a href="/forums/?forumID=8">feedback forum</a></li>
|
33
|
+
</ul>
|
34
|
+
<script type="text/javascript" src="http://www.craigslist.org/js/jquery.js"></script>
|
35
|
+
<script type="text/javascript" src="http://www.craigslist.org/js/postings.js"></script>
|
36
|
+
</body>
|
37
|
+
</html>
|
38
|
+
|
@@ -0,0 +1,234 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require File.dirname(__FILE__)+'/../lib/libcraigscrape'
|
4
|
+
require 'test/unit'
|
5
|
+
|
6
|
+
class CraigslistListingTest < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_no_puke
|
9
|
+
google = read_as_hpricot('google.html')
|
10
|
+
|
11
|
+
assert_nothing_raised{ CraigScrape::PostSummary.new google}
|
12
|
+
|
13
|
+
assert_nothing_raised{ CraigScrape::Listings.new google }
|
14
|
+
|
15
|
+
assert_nothing_raised{ CraigScrape::PostFull.new google }
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_listing_parse
|
19
|
+
search_html_one = <<EOD
|
20
|
+
<p> Apr 18 - <a href="/brw/reb/1128608404.html">Losing your house? You'll need this New Loan Mod Video -</a><font size="-1"> (W. Woodland)</font> <span class="p"> img</span> <<<i><a href="/reb/">real estate - by broker</a></i></p>
|
21
|
+
EOD
|
22
|
+
search_html_two = <<EOD
|
23
|
+
<p> Jan 4 - <a href="/mdc/reb/1128609783.html">$348000 / 1br - Large 1/1 plus office on 49th Floor. 5-Star NEW Condo. Great Views -</a><font size="-1"> (Miami)</font> <span class="p"> pic img</span> <<<i><a href="/reb/">real estate - by broker</a></i></p>
|
24
|
+
EOD
|
25
|
+
search_html_three = <<EOD
|
26
|
+
<p> Dec 31 - <a href="/mdc/reb/1128520894.html">$22,000 HOME -ADULT COMMUNITY BOYNTON BEACH -</a> <span class="p"> pic</span> <<<i><a href="/reb/">real estate - by broker</a></i></p>
|
27
|
+
EOD
|
28
|
+
search_html_four = <<EOD
|
29
|
+
<p> Jul 22 - <a href="/mdc/reb/1128474725.html">$325000 / 3br - GOOD DEAL GREAT HOUSE AND LOCATION -</a><font size="-1"> (CORAL GABLES)</font> <<<i><a href="/reb/">real estate - by broker</a></i></p>
|
30
|
+
EOD
|
31
|
+
search_html_five = <<EOD
|
32
|
+
<p> Apr 9 - <a href="/pbc/boa/1115308178.html">40' SILVERTON CONVERTIBLE DIESEL - $105000 -</a><font size="-1"> (HOBE SOUND)</font> <span class="p"> pic</span></p>
|
33
|
+
EOD
|
34
|
+
category_listing_one = <<EOD
|
35
|
+
<p><a href="/pbc/reb/1128661387.html">$2995000 / 5br - Downtown Boca New Home To Be Built -</a><font size="-1"> (Boca Raton)</font> <span class="p"> pic</span> <<<i><a href="/reb/">real estate - by broker</a></i></p>
|
36
|
+
EOD
|
37
|
+
category_listing_two = <<EOD
|
38
|
+
<p><a href="/mdc/jwl/1128691192.html">925 Sterling Silver Dragonfly Charm Bracelet - $25 -</a> <span class="p"> img</span></p>
|
39
|
+
EOD
|
40
|
+
|
41
|
+
one = CraigScrape::PostSummary.new Hpricot.parse(search_html_one).at('p')
|
42
|
+
assert_equal true, one.has_img?
|
43
|
+
assert_equal false, one.has_pic?
|
44
|
+
assert_equal true, one.has_pic_or_img?
|
45
|
+
assert_equal '/brw/reb/1128608404.html', one.href
|
46
|
+
assert_equal "Losing your house? You'll need this New Loan Mod Video", one.label
|
47
|
+
assert_equal "real\302\240estate - by broker", one.section
|
48
|
+
assert_equal "W. Woodland", one.location
|
49
|
+
assert_equal 4, one.date.month
|
50
|
+
assert_equal 18, one.date.day
|
51
|
+
assert_equal nil, one.price
|
52
|
+
|
53
|
+
two = CraigScrape::PostSummary.new Hpricot.parse(search_html_two).at('p')
|
54
|
+
assert_equal true, two.has_img?
|
55
|
+
assert_equal true, two.has_pic?
|
56
|
+
assert_equal true, two.has_pic_or_img?
|
57
|
+
assert_equal '/mdc/reb/1128609783.html', two.href
|
58
|
+
assert_equal "$348000 / 1br - Large 1/1 plus office on 49th Floor. 5-Star NEW Condo. Great Views", two.label
|
59
|
+
assert_equal "real\302\240estate - by broker", two.section
|
60
|
+
assert_equal "Miami", two.location
|
61
|
+
assert_equal 1, two.date.month
|
62
|
+
assert_equal 4, two.date.day
|
63
|
+
assert_equal 348000.0, two.price
|
64
|
+
|
65
|
+
three = CraigScrape::PostSummary.new Hpricot.parse(search_html_three).at('p')
|
66
|
+
assert_equal false, three.has_img?
|
67
|
+
assert_equal true, three.has_pic?
|
68
|
+
assert_equal true, three.has_pic_or_img?
|
69
|
+
assert_equal '/mdc/reb/1128520894.html', three.href
|
70
|
+
assert_equal "$22,000 HOME -ADULT COMMUNITY BOYNTON BEACH", three.label
|
71
|
+
assert_equal "real\302\240estate - by broker", three.section
|
72
|
+
assert_equal nil, three.location
|
73
|
+
assert_equal 12, three.date.month
|
74
|
+
assert_equal 31, three.date.day
|
75
|
+
assert_equal 22.0, three.price
|
76
|
+
|
77
|
+
four = CraigScrape::PostSummary.new Hpricot.parse(search_html_four).at('p')
|
78
|
+
assert_equal false, four.has_img?
|
79
|
+
assert_equal false, four.has_pic?
|
80
|
+
assert_equal false, four.has_pic_or_img?
|
81
|
+
assert_equal '/mdc/reb/1128474725.html', four.href
|
82
|
+
assert_equal "$325000 / 3br - GOOD DEAL GREAT HOUSE AND LOCATION", four.label
|
83
|
+
assert_equal "real\302\240estate - by broker", four.section
|
84
|
+
assert_equal "CORAL GABLES", four.location
|
85
|
+
assert_equal 7, four.date.month
|
86
|
+
assert_equal 22, four.date.day
|
87
|
+
assert_equal 325000.0, four.price
|
88
|
+
|
89
|
+
five = CraigScrape::PostSummary.new Hpricot.parse(search_html_five).at('p')
|
90
|
+
assert_equal false, five.has_img?
|
91
|
+
assert_equal true, five.has_pic?
|
92
|
+
assert_equal true, five.has_pic_or_img?
|
93
|
+
assert_equal '/pbc/boa/1115308178.html', five.href
|
94
|
+
assert_equal "40' SILVERTON CONVERTIBLE DIESEL - $105000", five.label
|
95
|
+
assert_equal nil, five.section
|
96
|
+
assert_equal "HOBE SOUND", five.location
|
97
|
+
assert_equal 4, five.date.month
|
98
|
+
assert_equal 9, five.date.day
|
99
|
+
assert_equal 105000.0, five.price
|
100
|
+
|
101
|
+
five = CraigScrape::PostSummary.new Hpricot.parse(category_listing_one).at('p')
|
102
|
+
assert_equal false, five.has_img?
|
103
|
+
assert_equal true, five.has_pic?
|
104
|
+
assert_equal true, five.has_pic_or_img?
|
105
|
+
assert_equal '/pbc/reb/1128661387.html', five.href
|
106
|
+
assert_equal "$2995000 / 5br - Downtown Boca New Home To Be Built", five.label
|
107
|
+
assert_equal "real\302\240estate - by broker", five.section
|
108
|
+
assert_equal "Boca Raton", five.location
|
109
|
+
assert_equal nil, five.date
|
110
|
+
assert_equal 2995000.0, five.price
|
111
|
+
|
112
|
+
six = CraigScrape::PostSummary.new Hpricot.parse(category_listing_two).at('p')
|
113
|
+
assert_equal true, six.has_img?
|
114
|
+
assert_equal false, six.has_pic?
|
115
|
+
assert_equal true, six.has_pic_or_img?
|
116
|
+
assert_equal '/mdc/jwl/1128691192.html', six.href
|
117
|
+
assert_equal "925 Sterling Silver Dragonfly Charm Bracelet - $25", six.label
|
118
|
+
assert_equal nil, six.section
|
119
|
+
assert_equal nil, six.location
|
120
|
+
assert_equal nil, six.date
|
121
|
+
assert_equal 25.0, six.price
|
122
|
+
end
|
123
|
+
|
124
|
+
def test_listings_parse
|
125
|
+
category = CraigScrape::Listings.new read_as_hpricot('listing_samples/category_output.html')
|
126
|
+
assert_equal 'index100.html', category.next_page_href
|
127
|
+
assert_equal 100, category.posts.length
|
128
|
+
category.posts[0..80].each do |l|
|
129
|
+
assert_equal 4, l.date.month
|
130
|
+
assert_equal 18, l.date.day
|
131
|
+
end
|
132
|
+
|
133
|
+
category2 = CraigScrape::Listings.new read_as_hpricot('listing_samples/category_output_2.html')
|
134
|
+
assert_equal 'index900.html', category2.next_page_href
|
135
|
+
assert_equal 100, category2.posts.length
|
136
|
+
|
137
|
+
long_search = CraigScrape::Listings.new read_as_hpricot('listing_samples/long_search_output.html')
|
138
|
+
assert_equal '/search/rea?query=house&minAsk=min&maxAsk=max&bedrooms=&s=800', long_search.next_page_href
|
139
|
+
assert_equal 100, long_search.posts.length
|
140
|
+
|
141
|
+
short_search = CraigScrape::Listings.new read_as_hpricot('listing_samples/short_search_output.html')
|
142
|
+
assert_equal nil, short_search.next_page_href
|
143
|
+
assert_equal 93, short_search.posts.length
|
144
|
+
end
|
145
|
+
|
146
|
+
def test_posting_parse
|
147
|
+
posting0 = CraigScrape::PostFull.new read_as_hpricot('post_samples/posting0.html')
|
148
|
+
assert_equal "Has storage for videos/dvds. About 2 ft high by 21/2 ft widw. Almond/light beige color", posting0.contents
|
149
|
+
assert_equal ["south florida craigslist", "miami / dade", "furniture - by owner"], posting0.full_section
|
150
|
+
assert_equal "tv cart on wheels - $35 (NMB)", posting0.header
|
151
|
+
assert_equal "tv cart on wheels", posting0.title
|
152
|
+
assert_equal "NMB", posting0.location
|
153
|
+
assert_equal 1131363612, posting0.posting_id
|
154
|
+
assert_equal "sale-ktf9w-1131363612@craigslist.org", posting0.reply_to
|
155
|
+
assert_equal [0, 21, 13, 20, 4, 2009, 1, 110, true, "EDT"], posting0.post_time.to_a
|
156
|
+
assert_equal [], posting0.images
|
157
|
+
assert_equal "Has storage for videos/dvds. About 2 ft high by 21/2 ft widw. Almond/light beige color",posting0.contents_as_plain
|
158
|
+
assert_equal 35.0, posting0.price
|
159
|
+
|
160
|
+
posting1 = CraigScrape::PostFull.new read_as_hpricot('post_samples/posting1.html')
|
161
|
+
assert_equal "Residential income property\227Investors this property is for you! This duplex has a 2bedroom/1bath unit on each side. It features updated kitchens and baths (new tubs, toilet, sink, vanities), ceramic tile flooring throughout, separate water and electric meters and on site laundry facilities. It is also closed to the Galleria, beaches and downtown Fort Lauderdale! \r<br />\n\r<br />\nJe parle le Fran\347ais\r<br />\n\r<br />\nThis property is being offered by Blaunch Perrier, Broker Associate, Atlantic Properties International. Blaunch can be reached at 954-593-0077. For additional property information you may also visit www.garylanham.com\r<br />\n\r<br />", posting1.contents
|
162
|
+
assert_equal ["south florida craigslist", "broward county", "real estate - by broker"], posting1.full_section
|
163
|
+
assert_equal "$189900 / 4br - Investment Property--Duplex in Fort Lauderdale", posting1.header
|
164
|
+
assert_equal "Investment Property--Duplex in Fort Lauderdale", posting1.title
|
165
|
+
assert_equal '1000 NE 14th Pl', posting1.location
|
166
|
+
assert_equal 1131242195, posting1.posting_id
|
167
|
+
assert_equal "hous-5nzhq-1131242195@craigslist.org", posting1.reply_to
|
168
|
+
assert_equal [0, 33, 13, 20, 4, 2009, 1, 110, true, "EDT"], posting1.post_time.to_a
|
169
|
+
assert_equal %w(http://images.craigslist.org/3n83o33l5ZZZZZZZZZ94k913ac1582d4b1fa4.jpg http://images.craigslist.org/3n93p63obZZZZZZZZZ94k19d5e32eb3b610c2.jpg http://images.craigslist.org/3n93m03l6ZZZZZZZZZ94k6e9785e37a1b1f3f.jpg http://images.craigslist.org/3ma3oc3l4ZZZZZZZZZ94kbfecbcd2fb2e19cc.jpg), posting1.images
|
170
|
+
assert_equal "Residential income property\227Investors this property is for you! This duplex has a 2bedroom/1bath unit on each side. It features updated kitchens and baths (new tubs, toilet, sink, vanities), ceramic tile flooring throughout, separate water and electric meters and on site laundry facilities. It is also closed to the Galleria, beaches and downtown Fort Lauderdale! \r\n\r\nJe parle le Fran\347ais\r\n\r\nThis property is being offered by Blaunch Perrier, Broker Associate, Atlantic Properties International. Blaunch can be reached at 954-593-0077. For additional property information you may also visit www.garylanham.com\r\n\r", posting1.contents_as_plain
|
171
|
+
assert_equal 189900.0, posting1.price
|
172
|
+
|
173
|
+
posting2 = CraigScrape::PostFull.new read_as_hpricot('post_samples/posting2.html')
|
174
|
+
assert_equal 15775, posting2.contents.length # This is easy, and probably fine enough
|
175
|
+
assert_equal ["south florida craigslist", "broward county", "cars & trucks - by dealer"], posting2.full_section
|
176
|
+
assert_equal "PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEE - $23975 (Fort Lauderdale)", posting2.header
|
177
|
+
assert_equal "PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEE", posting2.title
|
178
|
+
assert_equal 'Fort Lauderdale', posting2.location
|
179
|
+
assert_equal 1127037648, posting2.posting_id
|
180
|
+
assert_equal nil, posting2.reply_to
|
181
|
+
assert_equal [0, 16, 14, 17, 4, 2009, 5, 107, true, "EDT"], posting2.post_time.to_a
|
182
|
+
assert_equal [], posting2.images
|
183
|
+
assert_equal "\302\240 Sheehan Buick Pontiac GMC \302\240 Pompano Beach, FL(754) 224-3257 \302\240PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!2002 Chevrolet Corvette Z06 Florida Driven AutoCheck Certified 5.7L V8 6sp2 Door Coupe.\302\240Price: \302\240 $23,975Exterior:Electron Blue MetallicInterior:BlackStock#:P5110AVIN:1G1YY12S625129021FREE AutoCheck Vehicle ReportMileage:63,560Transmission:6 Speed ManualEngine:V8 5.7L OHVWarranty:Limited WarrantyTitle:Clear\302\273\302\240View All 58 Photos\302\273\302\240View Full Vehicle Details\302\273\302\240Ask the Seller a Question\302\273\302\240E-mail this to a Friend\302\240 DescriptionPRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!\r\n\r\nLOADED WITH BLACK LEATHER BUCKET SEATS, POWER DRIVERS SEAT, DUAL ZONE CLIMATE CONTROL, 4 WHEEL ABS BRAKES, POWER STEERING AND BRAKES, REAR LIMITED SLIP DIFFERENTIAL, STABILITY CONTROL, CRUISE CONTROL, TLT STEERING WHEEL, POWER WINDOWS AND LOCKS, AUTOMATIC ON/OFF HEADLAMPS, FOG LIGHTS, DUAL AIR BAG SAFETY, AM/FM STEREO CD PLAYER, INTERMITTENT WINDSHIELD WIPERS AND SO MUCH MORE - THIS CAR IS TOTALLY HOT WITH GREAT LOW MILES!\r\n\r\nPlease call us to make your deal now at 1-888-453-5244. Please visit our Website at www.sheehanautoplex.com ***View 50+ Pictures of this vehicle - a complete description including standard features and all added options & a FREE AUTO CHECK REPORT at www.sheehanautoplex.com. ***Financing for Everyone - Good credit - bad credit - divorce - charge off's - NO PROBLEM. To complete a secure credit application, please visit our website at www.sheehanautoplex.com ***The largest Dealer in the State of Florida - We export all over the world - For details please visit www.sheehanautoplex.com ***Sheehan Autoplex takes great pride in our outstanding customer service and has been recognized by the following associations - BBB (Better Business Bureau) - NIADA - and the FIADA. Call us to get your best deal. CALL NOW. 1-888-453-5244\302\240 Contact Sheehan Buick Pontiac GMCPhone:(754) 224-3257Fax:(954) 781-9050Phone:(754) 224-3257E-mail:sales@proauto.comBusiness HoursWeekdays:9:00 AM to 9:00 PMSat:9:00 AM to 6:00 PMSun:",posting2.contents_as_plain
|
184
|
+
assert_equal 23975.0, posting2.price
|
185
|
+
|
186
|
+
posting3 = CraigScrape::PostFull.new read_as_hpricot('post_samples/posting3.html')
|
187
|
+
assert_equal "1992 Twin Turbo 300ZX. This car is pearl white outside and Camel leather interior with suede accents. Motor was re-done from the ground up two years ago. 23,000 on new motor rebuild! New Leather seats and center arm rest done also two years ago. Has Alpine Am/Fm Cd with Ipod cable, Viper pager alarm New! JL Audio Amp & JLAudio sub box custom made. Mtx mids& highs component speakers sparate tweeter. Car runs strong & straight. Just detailed the interior. Exterior should be painted. This car once painted will sell for over $10,000. \r<br />\nCome get a great deal now! offers and trades will be considered. 786-303-6550 Manny", posting3.contents
|
188
|
+
assert_equal ["south florida craigslist", "miami / dade", "cars & trucks - by owner"], posting3.full_section
|
189
|
+
assert_equal "300ZX Nissan Twin Turbo 1992 - $5800 (N.Miami/ Hialeah)", posting3.header
|
190
|
+
assert_equal "300ZX Nissan Twin Turbo 1992", posting3.title
|
191
|
+
assert_equal "N.Miami/ Hialeah", posting3.location
|
192
|
+
assert_equal 1130212403, posting3.posting_id
|
193
|
+
assert_equal "sale-c9bpa-1130212403@craigslist.org", posting3.reply_to
|
194
|
+
assert_equal [0, 21, 18, 19, 4, 2009, 0, 109, true, "EDT"], posting3.post_time.to_a
|
195
|
+
assert_equal %w(http://images.craigslist.org/3n23kf3lfZZZZZZZZZ94j1160e7d7b0601934.jpg http://images.craigslist.org/3nc3kf3p2ZZZZZZZZZ94j04fbc71e0a551ace.jpg http://images.craigslist.org/3nc3k33l7ZZZZZZZZZ94k13d8d7b1024e1e0e.jpg http://images.craigslist.org/3n23k63mfZZZZZZZZZ94k7838ae5d48d91eb8.jpg), posting3.images
|
196
|
+
assert_equal "1992 Twin Turbo 300ZX. This car is pearl white outside and Camel leather interior with suede accents. Motor was re-done from the ground up two years ago. 23,000 on new motor rebuild! New Leather seats and center arm rest done also two years ago. Has Alpine Am/Fm Cd with Ipod cable, Viper pager alarm New! JL Audio Amp & JLAudio sub box custom made. Mtx mids& highs component speakers sparate tweeter. Car runs strong & straight. Just detailed the interior. Exterior should be painted. This car once painted will sell for over $10,000. \r\nCome get a great deal now! offers and trades will be considered. 786-303-6550 Manny",posting3.contents_as_plain
|
197
|
+
assert_equal 5800.0, posting3.price
|
198
|
+
|
199
|
+
# This one ended up being quite a curveball since the user uploaded HTML was such junk:
|
200
|
+
posting4 = CraigScrape::PostFull.new read_as_hpricot('post_samples/posting4.html')
|
201
|
+
assert_equal 20640, posting4.contents.length
|
202
|
+
assert_equal ["south florida craigslist", "broward county", "real estate - by broker"], posting4.full_section
|
203
|
+
assert_equal "$225000 / 3br - Palm Aire Golf Corner Unit!", posting4.header
|
204
|
+
assert_equal "Palm Aire Golf Corner Unit!", posting4.title
|
205
|
+
assert_equal nil, posting4.location
|
206
|
+
assert_equal 1139303170, posting4.posting_id
|
207
|
+
assert_equal "hous-sk9f2-1139303170@craigslist.org", posting4.reply_to
|
208
|
+
assert_equal [0, 8, 9, 25, 4, 2009, 6, 115, true, "EDT"], posting4.post_time.to_a
|
209
|
+
assert_equal [], posting4.images
|
210
|
+
assert_equal 6399,posting4.contents_as_plain.length
|
211
|
+
assert_equal 225000.0, posting4.price
|
212
|
+
|
213
|
+
posting5 = CraigScrape::PostFull.new read_as_hpricot('post_samples/posting5.html')
|
214
|
+
assert_equal nil, posting5.contents
|
215
|
+
assert_equal ["south florida craigslist", "palm beach co", "apts/housing for rent"], posting5.full_section
|
216
|
+
assert_equal "This posting has been <a href=\"http://www.craigslist.org/about/help/flags_and_community_moderation\">flagged</a> for removal", posting5.header
|
217
|
+
assert_equal nil, posting5.title
|
218
|
+
assert_equal nil, posting5.location
|
219
|
+
assert_equal nil, posting5.posting_id
|
220
|
+
assert_equal nil, posting5.reply_to
|
221
|
+
assert_equal nil, posting5.post_time
|
222
|
+
assert_equal [], posting5.images
|
223
|
+
assert_equal nil, posting5.contents_as_plain
|
224
|
+
assert_equal nil, posting5.price
|
225
|
+
end
|
226
|
+
|
227
|
+
private
|
228
|
+
|
229
|
+
def read_as_hpricot(test_file)
|
230
|
+
Hpricot.parse(
|
231
|
+
File.open('%s/%s' % [File.dirname(__FILE__), test_file]).read
|
232
|
+
)
|
233
|
+
end
|
234
|
+
end
|
metadata
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: libcraigscrape
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.5"
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Chris DeRose, DeRose Technologies, Inc.
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-05-05 00:00:00 -04:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: htmlentities
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: activesupport
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0"
|
44
|
+
version:
|
45
|
+
description: quick, easy, craigslist parsing library that takes the monotony out of working with craigslist posts and listings
|
46
|
+
email: cderose@derosetechnologies.com
|
47
|
+
executables:
|
48
|
+
- craigwatch
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files:
|
52
|
+
- README
|
53
|
+
- CHANGELOG
|
54
|
+
- COPYING
|
55
|
+
- COPYING.LESSER
|
56
|
+
- bin/craigwatch
|
57
|
+
files:
|
58
|
+
- CHANGELOG
|
59
|
+
- COPYING
|
60
|
+
- COPYING.LESSER
|
61
|
+
- Rakefile
|
62
|
+
- bin/craig_report_schema.yml
|
63
|
+
- bin/report_mailer
|
64
|
+
- bin/report_mailer/craigslist_report.html.erb
|
65
|
+
- bin/report_mailer/craigslist_report.plain.erb
|
66
|
+
- bin/craigwatch
|
67
|
+
- test/listing_samples
|
68
|
+
- test/listing_samples/category_output.html
|
69
|
+
- test/listing_samples/short_search_output.html
|
70
|
+
- test/listing_samples/category_output_2.html
|
71
|
+
- test/listing_samples/long_search_output.html
|
72
|
+
- test/test_craigslist_listing.rb
|
73
|
+
- test/post_samples
|
74
|
+
- test/post_samples/posting4.html
|
75
|
+
- test/post_samples/posting1.html
|
76
|
+
- test/post_samples/posting0.html
|
77
|
+
- test/post_samples/posting5.html
|
78
|
+
- test/post_samples/posting3.html
|
79
|
+
- test/post_samples/posting2.html
|
80
|
+
- test/google.html
|
81
|
+
- lib/libcraigscrape.rb
|
82
|
+
- README
|
83
|
+
has_rdoc: true
|
84
|
+
homepage: http://www.derosetechnologies.com/community/libcraigscrape
|
85
|
+
post_install_message:
|
86
|
+
rdoc_options:
|
87
|
+
- --quiet
|
88
|
+
- --title
|
89
|
+
- The libcraigscrape Reference
|
90
|
+
- --main
|
91
|
+
- README
|
92
|
+
- --inline-source
|
93
|
+
require_paths:
|
94
|
+
- lib
|
95
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: "0"
|
100
|
+
version:
|
101
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: "0"
|
106
|
+
version:
|
107
|
+
requirements: []
|
108
|
+
|
109
|
+
rubyforge_project: libcraigwatch
|
110
|
+
rubygems_version: 1.3.1
|
111
|
+
signing_key:
|
112
|
+
specification_version: 2
|
113
|
+
summary: quick, easy, craigslist parsing library that takes the monotony out of working with craigslist posts and listings
|
114
|
+
test_files:
|
115
|
+
- test/test_craigslist_listing.rb
|