olek-libcraigscrape 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. data/CHANGELOG +94 -0
  2. data/COPYING +674 -0
  3. data/COPYING.LESSER +165 -0
  4. data/README +89 -0
  5. data/Rakefile +125 -0
  6. data/bin/craig_report_schema.yml +68 -0
  7. data/bin/craigwatch +581 -0
  8. data/bin/report_mailer/craigslist_report.html.erb +17 -0
  9. data/bin/report_mailer/craigslist_report.plain.erb +18 -0
  10. data/lib/geo_listings.rb +144 -0
  11. data/lib/libcraigscrape.rb +217 -0
  12. data/lib/listings.rb +160 -0
  13. data/lib/posting.rb +324 -0
  14. data/lib/scraper.rb +212 -0
  15. data/test/geolisting_samples/geo_listing_ca070209.html +76 -0
  16. data/test/geolisting_samples/geo_listing_ca_sk070209.html +31 -0
  17. data/test/geolisting_samples/geo_listing_cn070209.html +35 -0
  18. data/test/geolisting_samples/geo_listing_us070209.html +355 -0
  19. data/test/geolisting_samples/hierarchy_test071009/index.html +31 -0
  20. data/test/geolisting_samples/hierarchy_test071009/us/fl/ft%20myers%20%5C/%20SW%20florida/index.html +46 -0
  21. data/test/geolisting_samples/hierarchy_test071009/us/fl/ft%20myers%20%5C/index.html +46 -0
  22. data/test/geolisting_samples/hierarchy_test071009/us/fl/index.html +46 -0
  23. data/test/geolisting_samples/hierarchy_test071009/us/fl/miami/index.html +46 -0
  24. data/test/geolisting_samples/hierarchy_test071009/us/fl/miami/nonsense/index.html +46 -0
  25. data/test/geolisting_samples/hierarchy_test071009/us/fl/miami/nonsense/more-nonsense/index.html +46 -0
  26. data/test/geolisting_samples/hierarchy_test071009/us/fl/nonexist/index.html +46 -0
  27. data/test/geolisting_samples/hierarchy_test071009/us/fl/nonsense/index.html +46 -0
  28. data/test/geolisting_samples/hierarchy_test071009/us/fl/south%20florida/index.html +46 -0
  29. data/test/geolisting_samples/hierarchy_test071009/us/index.html +355 -0
  30. data/test/google.html +8 -0
  31. data/test/libcraigscrape_test_helpers.rb +37 -0
  32. data/test/listing_samples/category_output.html +231 -0
  33. data/test/listing_samples/category_output_2.html +217 -0
  34. data/test/listing_samples/empty_listings.html +128 -0
  35. data/test/listing_samples/fortmyers_art_index.060909/1046596324.html +93 -0
  36. data/test/listing_samples/fortmyers_art_index.060909/1053085283.html +92 -0
  37. data/test/listing_samples/fortmyers_art_index.060909/1112522674.html +89 -0
  38. data/test/listing_samples/fortmyers_art_index.060909/823516079.html +92 -0
  39. data/test/listing_samples/fortmyers_art_index.060909/825684735.html +89 -0
  40. data/test/listing_samples/fortmyers_art_index.060909/891513957.html +94 -0
  41. data/test/listing_samples/fortmyers_art_index.060909/897549505.html +99 -0
  42. data/test/listing_samples/fortmyers_art_index.060909/960826026.html +89 -0
  43. data/test/listing_samples/fortmyers_art_index.060909/993256300.html +89 -0
  44. data/test/listing_samples/fortmyers_art_index.060909/fortmyers_art_index500.060909.html +237 -0
  45. data/test/listing_samples/fortmyers_art_index.060909/fortmyers_art_index600.060909.html +132 -0
  46. data/test/listing_samples/long_search_output.html +137 -0
  47. data/test/listing_samples/mia_fua_index8900.5.21.09.html +226 -0
  48. data/test/listing_samples/mia_search_kitten.3.15.10.html +149 -0
  49. data/test/listing_samples/miami_search_sss_rack.6.18.09/miami_search_sss_rack1000.6.18.09.html +144 -0
  50. data/test/listing_samples/miami_search_sss_rack.6.18.09/miami_search_sss_rack900.6.18.09.html +146 -0
  51. data/test/listing_samples/new_listing_span.4.17.10.html +769 -0
  52. data/test/listing_samples/short_search_output.html +133 -0
  53. data/test/post_samples/1207457727.html +92 -0
  54. data/test/post_samples/brw_reb_1224008903.html +101 -0
  55. data/test/post_samples/posting0.html +91 -0
  56. data/test/post_samples/posting1.html +106 -0
  57. data/test/post_samples/posting1796890756-061710.html +2318 -0
  58. data/test/post_samples/posting1808219423.html +2473 -0
  59. data/test/post_samples/posting1938291834-090610.html +188 -0
  60. data/test/post_samples/posting2.html +107 -0
  61. data/test/post_samples/posting3.html +92 -0
  62. data/test/post_samples/posting4.html +993 -0
  63. data/test/post_samples/posting5.html +38 -0
  64. data/test/post_samples/sfbay_art_1223614914.html +94 -0
  65. data/test/post_samples/this_post_has_been_deleted_by_its_author.html +37 -0
  66. data/test/post_samples/this_post_has_expired.html +48 -0
  67. data/test/test_craigslist_geolisting.rb +521 -0
  68. data/test/test_craigslist_listing.rb +362 -0
  69. data/test/test_craigslist_posting.rb +426 -0
  70. metadata +273 -0
@@ -0,0 +1,362 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'test/unit'
4
+ require File.dirname(__FILE__)+'/../lib/libcraigscrape'
5
+ require File.dirname(__FILE__)+'/libcraigscrape_test_helpers'
6
+
7
+ class CraigslistListingTest < Test::Unit::TestCase
8
+ include LibcraigscrapeTestHelpers
9
+
10
+ def test_listings_parse
11
+ category = CraigScrape::Listings.new relative_uri_for('listing_samples/category_output.html')
12
+ assert_equal 'index100.html', category.next_page_href
13
+ assert_equal 100, category.posts.length
14
+
15
+ category.posts[0..80].each do |l|
16
+ assert_equal 4, l.post_date.month
17
+ assert_equal 18, l.post_date.day
18
+ end
19
+
20
+ category2 = CraigScrape::Listings.new relative_uri_for('listing_samples/category_output_2.html')
21
+ assert_equal 'index900.html', category2.next_page_href
22
+ assert_equal 100, category2.posts.length
23
+
24
+ long_search = CraigScrape::Listings.new relative_uri_for('listing_samples/long_search_output.html')
25
+ assert_equal '/search/rea?query=house&minAsk=min&maxAsk=max&bedrooms=&s=800', long_search.next_page_href
26
+ assert_equal 100, long_search.posts.length
27
+
28
+ short_search = CraigScrape::Listings.new relative_uri_for('listing_samples/short_search_output.html')
29
+ assert_equal nil, short_search.next_page_href
30
+ assert_equal 93, short_search.posts.length
31
+
32
+ mia_fua_index8900_052109 = CraigScrape::Listings.new relative_uri_for('listing_samples/mia_fua_index8900.5.21.09.html')
33
+ assert_equal 'index9000.html', mia_fua_index8900_052109.next_page_href
34
+ assert_equal 100, mia_fua_index8900_052109.posts.length
35
+ # NOTE: This tests a subtle condition where there's a blank h4 tag, and we shouldn't need to eager-load,. since a solid inference can be made on the date, since its not the last h4 on the page
36
+ # This actually happens quite a bit...
37
+ mia_fua_index8900_052109.posts[0..13].each do |l|
38
+ assert_equal 5, l.post_date.month
39
+ assert_equal 15, l.post_date.day
40
+ end
41
+ mia_fua_index8900_052109.posts[14..99].each do |l|
42
+ assert_equal 5, l.post_date.month
43
+ assert_equal 14, l.post_date.day
44
+ end
45
+
46
+ empty_listings = CraigScrape::Listings.new relative_uri_for('listing_samples/empty_listings.html')
47
+ assert_equal nil, empty_listings.next_page_href
48
+ assert_equal [], empty_listings.posts
49
+ end
50
+
51
+ def test_eager_post_loading
52
+ # libcraigscrape is supposed to 'smart' when downloading postings that don't make 'sense' solely by looking at the listings.
53
+ # I'm only seen this on occasion, but its annoying and craigslist seems to use a lot of approximations sometimes
54
+ # The test page supplied is slightly adjusted to compensate for the lack of a web server when readng pages form the filesystem.
55
+
56
+ fortmyers_art_index500_060909 = CraigScrape::Listings.new relative_uri_for('listing_samples/fortmyers_art_index.060909/fortmyers_art_index500.060909.html')
57
+ fortmyers_art_index500_060909.posts[0..12].each do |l|
58
+ assert_equal 5, l.post_date.month
59
+ assert_equal 16, l.post_date.day
60
+ end
61
+ fortmyers_art_index500_060909.posts[13..36].each do |l|
62
+ assert_equal 5, l.post_date.month
63
+ assert_equal 15, l.post_date.day
64
+ end
65
+ fortmyers_art_index500_060909.posts[37..41].each do |l|
66
+ assert_equal 5, l.post_date.month
67
+ assert_equal 14, l.post_date.day
68
+ end
69
+ fortmyers_art_index500_060909.posts[42..55].each do |l|
70
+ assert_equal 5, l.post_date.month
71
+ assert_equal 13, l.post_date.day
72
+ end
73
+ fortmyers_art_index500_060909.posts[56..65].each do |l|
74
+ assert_equal 5, l.post_date.month
75
+ assert_equal 12, l.post_date.day
76
+ end
77
+ fortmyers_art_index500_060909.posts[66..87].each do |l|
78
+ assert_equal 5, l.post_date.month
79
+ assert_equal 11, l.post_date.day
80
+ end
81
+ fortmyers_art_index500_060909.posts[88..94].each do |l|
82
+ assert_equal 5, l.post_date.month
83
+ assert_equal 10, l.post_date.day
84
+ end
85
+ assert_equal 4, fortmyers_art_index500_060909.posts[95].post_date.month
86
+ assert_equal 8, fortmyers_art_index500_060909.posts[95].post_date.day
87
+ assert_equal 2, fortmyers_art_index500_060909.posts[96].post_date.month
88
+ assert_equal 27, fortmyers_art_index500_060909.posts[96].post_date.day
89
+ assert_equal 2, fortmyers_art_index500_060909.posts[97].post_date.month
90
+ assert_equal 23, fortmyers_art_index500_060909.posts[97].post_date.day
91
+ assert_equal 1, fortmyers_art_index500_060909.posts[98].post_date.month
92
+ assert_equal 14, fortmyers_art_index500_060909.posts[98].post_date.day
93
+ assert_equal 12, fortmyers_art_index500_060909.posts[99].post_date.month
94
+ assert_equal 16, fortmyers_art_index500_060909.posts[99].post_date.day
95
+
96
+ # Now we'll do one of these elusive 'trailer' pages which don't seem to really make much sense.
97
+ # Best I can tell, it only comes after a page like the one tested just above
98
+ fortmyers_art_index600_060909 = CraigScrape::Listings.new relative_uri_for('listing_samples/fortmyers_art_index.060909/fortmyers_art_index600.060909.html')
99
+ assert_equal "Husqvarna Viking Rose: Used Embroidery/Sewing Machine. Instruction book, Video, Embroidery Unit, 4\" 4\" hoop, designs, tool box with accessories including 8 feet (A, B, C, D, E, J, P, U and zipper foot). $400.00 Firm. (941) 347-8014 or (352)638-4707.", fortmyers_art_index600_060909.posts[0].contents
100
+ assert_equal "Husqvarna Viking Rose: Used Embroidery/Sewing Machine. Instruction book, Video, Embroidery Unit, 4\" 4\" hoop, designs, tool box with accessories including 8 feet (A, B, C, D, E, J, P, U and zipper foot). $400.00 Firm. (941) 347-8014 or (352)638-4707.", fortmyers_art_index600_060909.posts[0].contents_as_plain
101
+ assert_equal false, fortmyers_art_index600_060909.posts[0].deleted_by_author?
102
+ assert_equal true, fortmyers_art_index600_060909.posts[0].downloaded?
103
+ assert_equal false, fortmyers_art_index600_060909.posts[0].flagged_for_removal?
104
+ assert_equal ["fort myers craigslist", "art & crafts"], fortmyers_art_index600_060909.posts[0].full_section
105
+ assert_equal false, fortmyers_art_index600_060909.posts[0].has_img?
106
+ assert_equal true, fortmyers_art_index600_060909.posts[0].has_pic?
107
+ assert_equal true, fortmyers_art_index600_060909.posts[0].has_pic_or_img?
108
+ assert_equal "Husqvarna Viking Rose Embroidery-Sewing Machine - $400 (Punta Gorda, Charlotte County)", fortmyers_art_index600_060909.posts[0].header
109
+ assert_equal "Husqvarna Viking Rose Embroidery-Sewing Machine - $400 (Punta Gorda, Charlotte County)", fortmyers_art_index600_060909.posts[0].header_as_plain
110
+ assert_equal "897549505.html", fortmyers_art_index600_060909.posts[0].href
111
+ assert_equal [], fortmyers_art_index600_060909.posts[0].images
112
+ assert_equal [:pic], fortmyers_art_index600_060909.posts[0].img_types
113
+ assert_equal "Husqvarna Viking Rose Embroidery-Sewing Machine - $400", fortmyers_art_index600_060909.posts[0].label
114
+ assert_equal "Punta Gorda, Charlotte County", fortmyers_art_index600_060909.posts[0].location
115
+ assert_equal [], fortmyers_art_index600_060909.posts[0].pics
116
+ assert_equal [0, 0, 0, 28, 10, 2008, 2, 302, true, "EDT"], fortmyers_art_index600_060909.posts[0].post_date.to_a
117
+ assert_equal [0, 51, 21, 28, 10, 2008, 2, 302, true, "EDT"], fortmyers_art_index600_060909.posts[0].post_time.to_a
118
+ assert_equal 897549505, fortmyers_art_index600_060909.posts[0].posting_id
119
+ assert_equal 400.0, fortmyers_art_index600_060909.posts[0].price
120
+ assert_equal nil, fortmyers_art_index600_060909.posts[0].reply_to
121
+ assert_equal "art & crafts", fortmyers_art_index600_060909.posts[0].section
122
+ assert_equal false, fortmyers_art_index600_060909.posts[0].system_post?
123
+ assert_equal "Husqvarna Viking Rose Embroidery-Sewing Machine", fortmyers_art_index600_060909.posts[0].title
124
+
125
+ assert_equal "Multiple artists' moving sale. Lots of unusual items including art, art supplies, ceramics and ceramic glazes, furniture, clothes, books, electronics, cd's and much more. Also for sale is alot of restaurant equpment.\r<br>\n\r<br>\nSale to be held at 3570 Bayshore Dr. next to Bayshore Coffee Co.\r<br>\n\r<br>\nSaturday 8:00 a.m. until 2:00 Rain or shine.\r<br>", fortmyers_art_index600_060909.posts[1].contents
126
+ assert_equal "Multiple artists' moving sale. Lots of unusual items including art, art supplies, ceramics and ceramic glazes, furniture, clothes, books, electronics, cd's and much more. Also for sale is alot of restaurant equpment.\r\n\r\nSale to be held at 3570 Bayshore Dr. next to Bayshore Coffee Co.\r\n\r\nSaturday 8:00 a.m. until 2:00 Rain or shine.\r", fortmyers_art_index600_060909.posts[1].contents_as_plain
127
+ assert_equal false, fortmyers_art_index600_060909.posts[1].deleted_by_author?
128
+ assert_equal true, fortmyers_art_index600_060909.posts[1].downloaded?
129
+ assert_equal false, fortmyers_art_index600_060909.posts[1].flagged_for_removal?
130
+ assert_equal ["fort myers craigslist", "art & crafts"], fortmyers_art_index600_060909.posts[1].full_section
131
+ assert_equal false, fortmyers_art_index600_060909.posts[1].has_img?
132
+ assert_equal false, fortmyers_art_index600_060909.posts[1].has_pic?
133
+ assert_equal false, fortmyers_art_index600_060909.posts[1].has_pic_or_img?
134
+ assert_equal "ARTISTS' MOVING SALE-BAYSHORE (Naples)", fortmyers_art_index600_060909.posts[1].header
135
+ assert_equal "ARTISTS' MOVING SALE-BAYSHORE (Naples)", fortmyers_art_index600_060909.posts[1].header_as_plain
136
+ assert_equal "891513957.html", fortmyers_art_index600_060909.posts[1].href
137
+ assert_equal [], fortmyers_art_index600_060909.posts[1].images
138
+ assert_equal [], fortmyers_art_index600_060909.posts[1].img_types
139
+ assert_equal "ARTISTS' MOVING SALE-BAYSHORE", fortmyers_art_index600_060909.posts[1].label
140
+ assert_equal "Naples", fortmyers_art_index600_060909.posts[1].location
141
+ assert_equal [], fortmyers_art_index600_060909.posts[1].pics
142
+ assert_equal [0, 0, 0, 24, 10, 2008, 5, 298, true, "EDT"], fortmyers_art_index600_060909.posts[1].post_date.to_a
143
+ assert_equal [0, 31, 9, 24, 10, 2008, 5, 298, true, "EDT"], fortmyers_art_index600_060909.posts[1].post_time.to_a
144
+ assert_equal 891513957, fortmyers_art_index600_060909.posts[1].posting_id
145
+ assert_equal nil, fortmyers_art_index600_060909.posts[1].price
146
+ assert_equal "sale-891513957@craigslist.org", fortmyers_art_index600_060909.posts[1].reply_to
147
+ assert_equal "art & crafts", fortmyers_art_index600_060909.posts[1].section
148
+ assert_equal false, fortmyers_art_index600_060909.posts[1].system_post?
149
+ assert_equal "ARTISTS' MOVING SALE-BAYSHORE", fortmyers_art_index600_060909.posts[1].title
150
+
151
+ assert_equal "Tapestry sewing machine and embroidery arm luggage for Viking designer sewing machine. Two years old in excellent condition.", fortmyers_art_index600_060909.posts[2].contents
152
+ assert_equal "Tapestry sewing machine and embroidery arm luggage for Viking designer sewing machine. Two years old in excellent condition.", fortmyers_art_index600_060909.posts[2].contents_as_plain
153
+ assert_equal false, fortmyers_art_index600_060909.posts[2].deleted_by_author?
154
+ assert_equal true, fortmyers_art_index600_060909.posts[2].downloaded?
155
+ assert_equal false, fortmyers_art_index600_060909.posts[2].flagged_for_removal?
156
+ assert_equal ["fort myers craigslist", "art & crafts"], fortmyers_art_index600_060909.posts[2].full_section
157
+ assert_equal false, fortmyers_art_index600_060909.posts[2].has_img?
158
+ assert_equal false, fortmyers_art_index600_060909.posts[2].has_pic?
159
+ assert_equal false, fortmyers_art_index600_060909.posts[2].has_pic_or_img?
160
+ assert_equal "tapestry sewing machine and embroidery arm luggage - $250 (Punta Gorda)", fortmyers_art_index600_060909.posts[2].header
161
+ assert_equal "tapestry sewing machine and embroidery arm luggage - $250 (Punta Gorda)", fortmyers_art_index600_060909.posts[2].header_as_plain
162
+ assert_equal "825684735.html", fortmyers_art_index600_060909.posts[2].href
163
+ assert_equal [], fortmyers_art_index600_060909.posts[2].images
164
+ assert_equal [], fortmyers_art_index600_060909.posts[2].img_types
165
+ assert_equal "tapestry sewing machine and embroidery arm luggage - $250", fortmyers_art_index600_060909.posts[2].label
166
+ assert_equal "Punta Gorda", fortmyers_art_index600_060909.posts[2].location
167
+ assert_equal [], fortmyers_art_index600_060909.posts[2].pics
168
+ assert_equal [0, 0, 0, 3, 9, 2008, 3, 247, true, "EDT"], fortmyers_art_index600_060909.posts[2].post_date.to_a
169
+ assert_equal [0, 31, 15, 3, 9, 2008, 3, 247, true, "EDT"], fortmyers_art_index600_060909.posts[2].post_time.to_a
170
+ assert_equal 825684735, fortmyers_art_index600_060909.posts[2].posting_id
171
+ assert_equal 250.0, fortmyers_art_index600_060909.posts[2].price
172
+ assert_equal "sale-825684735@craigslist.org", fortmyers_art_index600_060909.posts[2].reply_to
173
+ assert_equal "art & crafts", fortmyers_art_index600_060909.posts[2].section
174
+ assert_equal false, fortmyers_art_index600_060909.posts[2].system_post?
175
+ assert_equal "tapestry sewing machine and embroidery arm luggage", fortmyers_art_index600_060909.posts[2].title
176
+
177
+ assert_equal "Gorgeous and one of a kind! Museum-collected artist Jay von Koffler's Aurora Series - cast glass nude sculpture - Aurora. Mounted on marble and enhanced with bronze beak. \r<br>\n\r<br>\nDimensions: 30x16x6\r<br>\nCall for appointment for studio viewing - 239.595.1793", fortmyers_art_index600_060909.posts[3].contents
178
+ assert_equal "Gorgeous and one of a kind! Museum-collected artist Jay von Koffler's Aurora Series - cast glass nude sculpture - Aurora. Mounted on marble and enhanced with bronze beak. \r\n\r\nDimensions: 30x16x6\r\nCall for appointment for studio viewing - 239.595.1793", fortmyers_art_index600_060909.posts[3].contents_as_plain
179
+ assert_equal false, fortmyers_art_index600_060909.posts[3].deleted_by_author?
180
+ assert_equal true, fortmyers_art_index600_060909.posts[3].downloaded?
181
+ assert_equal false, fortmyers_art_index600_060909.posts[3].flagged_for_removal?
182
+ assert_equal ["fort myers craigslist", "art & crafts"], fortmyers_art_index600_060909.posts[3].full_section
183
+ assert_equal false, fortmyers_art_index600_060909.posts[3].has_img?
184
+ assert_equal true, fortmyers_art_index600_060909.posts[3].has_pic?
185
+ assert_equal true, fortmyers_art_index600_060909.posts[3].has_pic_or_img?
186
+ assert_equal "Cast Glass Sculpture - Aurora - $2400 (Naples)", fortmyers_art_index600_060909.posts[3].header
187
+ assert_equal "Cast Glass Sculpture - Aurora - $2400 (Naples)", fortmyers_art_index600_060909.posts[3].header_as_plain
188
+ assert_equal "823516079.html", fortmyers_art_index600_060909.posts[3].href
189
+ assert_equal [], fortmyers_art_index600_060909.posts[3].images
190
+ assert_equal [:pic], fortmyers_art_index600_060909.posts[3].img_types
191
+ assert_equal "Cast Glass Sculpture - Aurora - $2400", fortmyers_art_index600_060909.posts[3].label
192
+ assert_equal "Naples", fortmyers_art_index600_060909.posts[3].location
193
+ assert_equal [], fortmyers_art_index600_060909.posts[3].pics
194
+ assert_equal [0, 0, 0, 2, 9, 2008, 2, 246, true, "EDT"], fortmyers_art_index600_060909.posts[3].post_date.to_a
195
+ assert_equal [0, 35, 10, 2, 9, 2008, 2, 246, true, "EDT"], fortmyers_art_index600_060909.posts[3].post_time.to_a
196
+ assert_equal 823516079, fortmyers_art_index600_060909.posts[3].posting_id
197
+ assert_equal 2400.0, fortmyers_art_index600_060909.posts[3].price
198
+ assert_equal "sale-823516079@craigslist.org", fortmyers_art_index600_060909.posts[3].reply_to
199
+ assert_equal "art & crafts", fortmyers_art_index600_060909.posts[3].section
200
+ assert_equal false, fortmyers_art_index600_060909.posts[3].system_post?
201
+ assert_equal "Cast Glass Sculpture - Aurora", fortmyers_art_index600_060909.posts[3].title
202
+ end
203
+
204
+ def test_nasty_search_listings
205
+ miami_search_sss_rack900_061809 = CraigScrape::Listings.new relative_uri_for('listing_samples/miami_search_sss_rack.6.18.09/miami_search_sss_rack900.6.18.09.html')
206
+ assert_equal '/search/sss?query=rack&s=1000', miami_search_sss_rack900_061809.next_page_href
207
+
208
+ miami_search_sss_rack1000_061809 = CraigScrape::Listings.new relative_uri_for('listing_samples/miami_search_sss_rack.6.18.09/miami_search_sss_rack1000.6.18.09.html')
209
+ assert_equal nil, miami_search_sss_rack1000_061809.next_page_href
210
+
211
+ mia_search_kitten031510 = CraigScrape::Listings.new relative_uri_for('listing_samples/mia_search_kitten.3.15.10.html')
212
+ assert_equal "Adopt a 7 month on kitten- $75", mia_search_kitten031510.posts[0].label
213
+ assert_equal [15, 3], mia_search_kitten031510.posts[0].post_date.to_a[3..4]
214
+ assert_equal "Adorable Kitten! Free!!!", mia_search_kitten031510.posts[1].label
215
+ assert_equal [15, 3], mia_search_kitten031510.posts[1].post_date.to_a[3..4]
216
+ assert_equal "KITTENS,5 months, 1 Russian blue, 1 grey & white,vac spy/neu,$35fee ea", mia_search_kitten031510.posts[2].label
217
+ assert_equal [13, 3], mia_search_kitten031510.posts[2].post_date.to_a[3..4]
218
+ assert_equal "Kitties need a good home", mia_search_kitten031510.posts[3].label
219
+ assert_equal [13, 3], mia_search_kitten031510.posts[3].post_date.to_a[3..4]
220
+ assert_equal "7 week old kittens for adoption", mia_search_kitten031510.posts[4].label
221
+ assert_equal [13, 3], mia_search_kitten031510.posts[4].post_date.to_a[3..4]
222
+ assert_equal "Adorable Orange Kitten Free to Good Home", mia_search_kitten031510.posts[5].label
223
+ assert_equal [12, 3], mia_search_kitten031510.posts[5].post_date.to_a[3..4]
224
+ assert_equal "7 month old kitten free to good home", mia_search_kitten031510.posts[6].label
225
+ assert_equal [12, 3], mia_search_kitten031510.posts[6].post_date.to_a[3..4]
226
+ assert_equal "FEMALE KITTEN FOR GOOD HOME", mia_search_kitten031510.posts[7].label
227
+ assert_equal [9, 3], mia_search_kitten031510.posts[7].post_date.to_a[3..4]
228
+ assert_equal "Kitten", mia_search_kitten031510.posts[8].label
229
+ assert_equal [4, 3], mia_search_kitten031510.posts[8].post_date.to_a[3..4]
230
+ assert_equal "Kitties need a good home", mia_search_kitten031510.posts[9].label
231
+ assert_equal [4, 3], mia_search_kitten031510.posts[9].post_date.to_a[3..4]
232
+ assert_equal "Persain Cat And Tabby Cat", mia_search_kitten031510.posts[10].label
233
+ assert_equal [1, 3], mia_search_kitten031510.posts[10].post_date.to_a[3..4]
234
+ assert_equal "Tabby female kitten in a parking lot needs your help", mia_search_kitten031510.posts[11].label
235
+ assert_equal [23, 2], mia_search_kitten031510.posts[11].post_date.to_a[3..4]
236
+ assert_equal "Spring is almost officially here, grow your family, adopt a kitty!", mia_search_kitten031510.posts[12].label
237
+ assert_equal [22, 2], mia_search_kitten031510.posts[12].post_date.to_a[3..4]
238
+ assert_equal "Many adorable kittens for adoption!", mia_search_kitten031510.posts[13].label
239
+ assert_equal [22, 2], mia_search_kitten031510.posts[13].post_date.to_a[3..4]
240
+ assert_equal "2 free cats/kitten to good home", mia_search_kitten031510.posts[14].label
241
+ assert_equal [19, 2], mia_search_kitten031510.posts[14].post_date.to_a[3..4]
242
+ assert_equal "BEAUTIFUL KITTENS", mia_search_kitten031510.posts[15].label
243
+ assert_equal [19, 2], mia_search_kitten031510.posts[15].post_date.to_a[3..4]
244
+ assert_equal "MANY new adorable kittens for good homes!!!", mia_search_kitten031510.posts[16].label
245
+ assert_equal [18, 2], mia_search_kitten031510.posts[16].post_date.to_a[3..4]
246
+ assert_equal "Kitten living in a parking lot needs your help", mia_search_kitten031510.posts[17].label
247
+ assert_equal [16, 2], mia_search_kitten031510.posts[17].post_date.to_a[3..4]
248
+ assert_equal "BEAUTIFUL 8 WEEK KITTENS", mia_search_kitten031510.posts[18].label
249
+ assert_equal [16, 2], mia_search_kitten031510.posts[18].post_date.to_a[3..4]
250
+ assert_equal "ORANGE TABBY KITTEN", mia_search_kitten031510.posts[19].label
251
+ assert_equal [13, 2], mia_search_kitten031510.posts[19].post_date.to_a[3..4]
252
+ assert_equal "Lots of kittens to choose from! Pics!!", mia_search_kitten031510.posts[20].label
253
+ assert_equal [13, 2], mia_search_kitten031510.posts[20].post_date.to_a[3..4]
254
+
255
+ end
256
+
257
+ def test_new_listing_span051710_labels
258
+ new_listing_span051710 = CraigScrape::Listings.new relative_uri_for('listing_samples/new_listing_span.4.17.10.html')
259
+
260
+ assert_equal " Art Directly for Sale from the Artist", new_listing_span051710.posts[0].label
261
+ assert_equal "Wall Art, Contemporary Abstract by Vista Gallories", new_listing_span051710.posts[1].label
262
+ assert_equal "Gary George \"Darice\" Giclee Semi Nude Woman COA NEW", new_listing_span051710.posts[2].label
263
+ assert_equal "electric clock kits", new_listing_span051710.posts[3].label
264
+ assert_equal "Artificial Bonsai arrangements (3)", new_listing_span051710.posts[4].label
265
+ assert_equal "Wall Canvass", new_listing_span051710.posts[5].label
266
+ assert_equal "seeking drafting table", new_listing_span051710.posts[6].label
267
+ assert_equal "great electrical air compressor LIKE NEW", new_listing_span051710.posts[7].label
268
+ assert_equal "Mannequin Male Full Torso Display Form", new_listing_span051710.posts[8].label
269
+ assert_equal "CRAB NETS 12 X12 X7", new_listing_span051710.posts[9].label
270
+ assert_equal "Hundreds of Loose Beads from old Jewelry &newer Seed Beads arts crafts", new_listing_span051710.posts[10].label
271
+ assert_equal "HUNDREDS OF LOOSE BEADS VARIETY FOR ARTS CRAFTS MAKING JEWELRY", new_listing_span051710.posts[11].label
272
+ assert_equal "consolidated b-24d liberator", new_listing_span051710.posts[12].label
273
+ assert_equal "nort american p-51b mustang", new_listing_span051710.posts[13].label
274
+ assert_equal "spitfire mk.ixc kenley wing", new_listing_span051710.posts[14].label
275
+ assert_equal "republic p-47d thunderbolt bubbletop", new_listing_span051710.posts[15].label
276
+ assert_equal "Artistic & Commercial Mannequin Female Torso Ladies Form", new_listing_span051710.posts[16].label
277
+ assert_equal "Start your own Bath & Beauty company", new_listing_span051710.posts[17].label
278
+ assert_equal "hurricane mk.2 eagle squadron", new_listing_span051710.posts[18].label
279
+ assert_equal "HUGE Lot Iron-Ons Appliques-Craft Decals-Fabric-Holidays, Looney Tunes", new_listing_span051710.posts[19].label
280
+ assert_equal "typhoon mk.ib", new_listing_span051710.posts[20].label
281
+ assert_equal "Beautiful Handmade Sea Shell Candles - Great Gift Ideas", new_listing_span051710.posts[21].label
282
+ assert_equal "bristol beaufighter mk.vi", new_listing_span051710.posts[22].label
283
+ assert_equal "hawker tempest mk.v", new_listing_span051710.posts[23].label
284
+ assert_equal "gloster meteor f.1.v.1", new_listing_span051710.posts[24].label
285
+ assert_equal "Painted art picture with frame 43\"L X 31\"H", new_listing_span051710.posts[25].label
286
+ assert_equal "messerschmitt me 410b-2/u4", new_listing_span051710.posts[26].label
287
+ assert_equal "Matching Set 4 Wild Cat Prints Framed in Gold-Cheetah, Leopard, Lion", new_listing_span051710.posts[27].label
288
+ assert_equal "CATS IN PAJAMAS FRAMED PRINT-SIGNED-KATHRYN RAMSEUR GLICK 1995-NUMBERD", new_listing_span051710.posts[28].label
289
+ assert_equal "4 Needlecraft Books", new_listing_span051710.posts[29].label
290
+ assert_equal "UNIQUE HIDDEN ANGEL PRINT-MATTED & FRAMED Retails $89.99-Signed-Ocampa", new_listing_span051710.posts[30].label
291
+ assert_equal "royal air force hawker hurricane", new_listing_span051710.posts[31].label
292
+ assert_equal "UNIQUE LARGE PRINT-HANDS OF TIME-BY OCTAVIO - RETAILS $139.00", new_listing_span051710.posts[32].label
293
+ assert_equal "LARGE COBBLESTONE FRAMED PRINT LANDSCAPE BY HAILS - SIGNED 1996", new_listing_span051710.posts[33].label
294
+ assert_equal "zero fighier", new_listing_span051710.posts[34].label
295
+ assert_equal "UNIQUE 1 OF A KIND -HANDMADE JEWELRY", new_listing_span051710.posts[35].label
296
+ assert_equal "YARN YARN YARN", new_listing_span051710.posts[36].label
297
+ assert_equal "2012 Original Paintings", new_listing_span051710.posts[37].label
298
+ assert_equal "picture with birds songs$10", new_listing_span051710.posts[38].label
299
+ assert_equal "Modern original Still Life painting SIGNED", new_listing_span051710.posts[39].label
300
+ assert_equal "afghans", new_listing_span051710.posts[40].label
301
+ assert_equal "Teamwork Print-Inspirational", new_listing_span051710.posts[41].label
302
+ assert_equal "Large number of ceramic molds for sale at Reasonable prices!", new_listing_span051710.posts[42].label
303
+ assert_equal "1982 Knitting Collection", new_listing_span051710.posts[43].label
304
+ assert_equal "Bell Small Wilton Cake Pan", new_listing_span051710.posts[44].label
305
+ assert_equal "Winnie The Pooh Wilton Cake Pan", new_listing_span051710.posts[45].label
306
+ assert_equal "Holly Hobbie Wilton Cake Pan", new_listing_span051710.posts[46].label
307
+ assert_equal "Quilt~ Hand Crafted~Beautiful hand crafted quilted wall hanging", new_listing_span051710.posts[47].label
308
+ assert_equal "Pretty Pictures", new_listing_span051710.posts[48].label
309
+ assert_equal "messerschmitt bf 109d", new_listing_span051710.posts[49].label
310
+ assert_equal "douglas a-20 g havoc", new_listing_span051710.posts[50].label
311
+ assert_equal "me262a-1a/u3 reconnaissance", new_listing_span051710.posts[51].label
312
+ assert_equal "p-36 pearl harbor defender", new_listing_span051710.posts[52].label
313
+ assert_equal "spitfire mk.xivc", new_listing_span051710.posts[53].label
314
+ assert_equal "ART KIT", new_listing_span051710.posts[54].label
315
+ assert_equal "Unique Recycled Glass Melted Bottle Cheese Trays and dishes", new_listing_span051710.posts[55].label
316
+ assert_equal "T-SHIRT HEAT PRESS", new_listing_span051710.posts[56].label
317
+ assert_equal "Metal Alligator Wall Art With Neon Light", new_listing_span051710.posts[57].label
318
+ assert_equal "SOLAR GARDEN DECO LITES", new_listing_span051710.posts[58].label
319
+ assert_equal "POMPELL CHEETAH FRAMED ART PRINT & MATCHING THROW PILLOWS 35 X 27", new_listing_span051710.posts[59].label
320
+ assert_equal "\"YOU CAN DRAW\" 8 BOOKS IN 1", new_listing_span051710.posts[60].label
321
+ assert_equal "ROSEART SMART 3 IN 1 PORTFOLIO", new_listing_span051710.posts[61].label
322
+ assert_equal "art supplies", new_listing_span051710.posts[62].label
323
+ assert_equal "ZINC OXIDE", new_listing_span051710.posts[63].label
324
+ assert_equal "Wood Veneer", new_listing_span051710.posts[64].label
325
+ assert_equal "Scrapbook magazines", new_listing_span051710.posts[65].label
326
+ assert_equal "henri plisson fine art", new_listing_span051710.posts[66].label
327
+ assert_equal "Beautiful brand new bronze Fountain", new_listing_span051710.posts[67].label
328
+ assert_equal "Contemporary fine arts and quality handmade crafts", new_listing_span051710.posts[68].label
329
+ assert_equal "p-61 black widow", new_listing_span051710.posts[69].label
330
+ assert_equal "New Abstract Oil Paintings for Sale - Made in USA!", new_listing_span051710.posts[70].label
331
+ assert_equal "Fun Stamps", new_listing_span051710.posts[71].label
332
+ assert_equal "For Sale - Salvador Dali Print - Lincoln in Dalivision", new_listing_span051710.posts[72].label
333
+ assert_equal "For Sale Print on Canvas Gone with the Wind", new_listing_span051710.posts[73].label
334
+ assert_equal "For Sale - Two Framed Egyptian Prints on Papyrus", new_listing_span051710.posts[74].label
335
+ assert_equal "4/16 and 4/17 Gallery art unframed - gallery closed", new_listing_span051710.posts[75].label
336
+ assert_equal "Sewing patterns TONS 4/16 and 4/17", new_listing_span051710.posts[76].label
337
+ assert_equal "For Sale 4 panel Asian folding Art", new_listing_span051710.posts[77].label
338
+ assert_equal "@@@ Original Artwork on Print & signed", new_listing_span051710.posts[78].label
339
+ assert_equal "Three Moai Tikis", new_listing_span051710.posts[79].label
340
+ assert_equal "Stained Glass Kiln", new_listing_span051710.posts[80].label
341
+ assert_equal "Tiki Carved From Palm", new_listing_span051710.posts[81].label
342
+ assert_equal "The End is Near! 2012 painting", new_listing_span051710.posts[82].label
343
+ assert_equal "PAINTING BY KENT", new_listing_span051710.posts[83].label
344
+ assert_equal "ART SALE! ONLY $29 to $69 FOR THESE ORIGINAL PHOTO ART PIECES!", new_listing_span051710.posts[84].label
345
+ assert_equal " Contemporary Painting for Sale!!!", new_listing_span051710.posts[85].label
346
+ assert_equal "gift baskets and bears", new_listing_span051710.posts[86].label
347
+ assert_equal "Eyvind Earle Nocturne Serigraph", new_listing_span051710.posts[87].label
348
+ assert_equal "jo's watercolors", new_listing_span051710.posts[88].label
349
+ assert_equal "Eyvind Earle Carmel Cypress Serigraph", new_listing_span051710.posts[89].label
350
+ assert_equal "Eyvind Earle Stardust Blue Serigraph", new_listing_span051710.posts[90].label
351
+ assert_equal "Portraits painted of your loved ones", new_listing_span051710.posts[91].label
352
+ assert_equal "Attn Crafters! 10 Strands of Lemons", new_listing_span051710.posts[92].label
353
+ assert_equal "SARAH E. AND GULLS", new_listing_span051710.posts[93].label
354
+ assert_equal "cavalier 98", new_listing_span051710.posts[94].label
355
+ assert_equal "model kit fairey swordfish mk 2", new_listing_span051710.posts[95].label
356
+ assert_equal "jo's watercolors", new_listing_span051710.posts[96].label
357
+ assert_equal "Stampin' Up Rubber Stamps", new_listing_span051710.posts[97].label
358
+ assert_equal "Wyland Oil Painting", new_listing_span051710.posts[98].label
359
+ assert_equal "Denim Fabric Blocks for Crafts", new_listing_span051710.posts[99].label
360
+ end
361
+
362
+ end
@@ -0,0 +1,426 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'test/unit'
4
+ require File.dirname(__FILE__)+'/../lib/libcraigscrape'
5
+ require File.dirname(__FILE__)+'/libcraigscrape_test_helpers'
6
+
7
+
8
+ class CraigslistPostingTest < Test::Unit::TestCase
9
+ include LibcraigscrapeTestHelpers
10
+
11
+ def test_pukes
12
+ assert_raise(CraigScrape::Scraper::ParseError) do
13
+ CraigScrape::Posting.new( relative_uri_for('google.html') ).contents
14
+ end
15
+ end
16
+
17
+ def test_listing_parse
18
+ search_html_one = <<EOD
19
+ <p> Apr 18 - <a href="/brw/reb/1128608404.html">Losing your house? You'll need this New Loan Mod Video -</a><font size="-1"> (W. Woodland)</font> <span class="p"> img</span> &lt;&lt;<i><a href="/reb/">real&nbsp;estate - by broker</a></i></p>
20
+ EOD
21
+ search_html_two = <<EOD
22
+ <p> Jan 4 - <a href="/mdc/reb/1128609783.html">$348000 / 1br - Large 1/1 plus office on 49th Floor. 5-Star NEW Condo. Great Views -</a><font size="-1"> (Miami)</font> <span class="p"> pic&nbsp;img</span> &lt;&lt;<i><a href="/reb/">real&nbsp;estate - by broker</a></i></p>
23
+ EOD
24
+ search_html_three = <<EOD
25
+ <p> Dec 31 - <a href="/mdc/reb/1128520894.html">$22,000 HOME -ADULT COMMUNITY BOYNTON BEACH -</a> <span class="p"> pic</span> &lt;&lt;<i><a href="/reb/">real&nbsp;estate - by broker</a></i></p>
26
+ EOD
27
+ search_html_four = <<EOD
28
+ <p> Jul 22 - <a href="/mdc/reb/1128474725.html">$325000 / 3br - GOOD DEAL GREAT HOUSE AND LOCATION -</a><font size="-1"> (CORAL GABLES)</font> &lt;&lt;<i><a href="/reb/">real&nbsp;estate - by broker</a></i></p>
29
+ EOD
30
+ search_html_five = <<EOD
31
+ <p> Apr 9 - <a href="/pbc/boa/1115308178.html">40' SILVERTON CONVERTIBLE DIESEL - $105000 -</a><font size="-1"> (HOBE SOUND)</font> <span class="p"> pic</span></p>
32
+ EOD
33
+ category_listing_one = <<EOD
34
+ <p><a href="/pbc/reb/1128661387.html">$2995000 / 5br - Downtown Boca New Home To Be Built -</a><font size="-1"> (Boca Raton)</font> <span class="p"> pic</span> &lt;&lt;<i><a href="/reb/">real&nbsp;estate - by broker</a></i></p>
35
+ EOD
36
+ category_listing_two = <<EOD
37
+ <p><a href="/mdc/jwl/1128691192.html">925 Sterling Silver Dragonfly Charm Bracelet - $25 -</a> <span class="p"> img</span></p>
38
+ EOD
39
+
40
+ one = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(
41
+ Nokogiri::HTML(search_html_one, nil, CraigScrape::Scraper::HTML_ENCODING).at('p')
42
+ )
43
+ assert_equal true, one.has_img?
44
+ assert_equal false, one.has_pic?
45
+ assert_equal true, one.has_pic_or_img?
46
+ assert_equal '/brw/reb/1128608404.html', one.href
47
+ assert_equal "Losing your house? You'll need this New Loan Mod Video", one.label
48
+ assert_equal "real estate - by broker", one.section
49
+ assert_equal "W. Woodland", one.location
50
+ assert_equal 4, one.post_date.month
51
+ assert_equal 18, one.post_date.day
52
+ assert_equal nil, one.price
53
+
54
+ two = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(
55
+ Nokogiri::HTML(search_html_two, nil, CraigScrape::Scraper::HTML_ENCODING).at('p')
56
+ )
57
+ assert_equal true, two.has_img?
58
+ assert_equal true, two.has_pic?
59
+ assert_equal true, two.has_pic_or_img?
60
+ assert_equal '/mdc/reb/1128609783.html', two.href
61
+ assert_equal "$348000 / 1br - Large 1/1 plus office on 49th Floor. 5-Star NEW Condo. Great Views", two.label
62
+ assert_equal "real estate - by broker", two.section
63
+ assert_equal "Miami", two.location
64
+ assert_equal 1, two.post_date.month
65
+ assert_equal 4, two.post_date.day
66
+ assert_equal 348000.0, two.price
67
+
68
+ three = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(
69
+ Nokogiri::HTML(search_html_three, nil, CraigScrape::Scraper::HTML_ENCODING).at('p')
70
+ )
71
+ assert_equal false, three.has_img?
72
+ assert_equal true, three.has_pic?
73
+ assert_equal true, three.has_pic_or_img?
74
+ assert_equal '/mdc/reb/1128520894.html', three.href
75
+ assert_equal "$22,000 HOME -ADULT COMMUNITY BOYNTON BEACH", three.label
76
+ assert_equal "real estate - by broker", three.section
77
+ assert_equal nil, three.location
78
+ assert_equal 12, three.post_date.month
79
+ assert_equal 31, three.post_date.day
80
+ assert_equal 22.0, three.price
81
+
82
+ four = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(
83
+ Nokogiri::HTML(search_html_four, nil, CraigScrape::Scraper::HTML_ENCODING).at('p')
84
+ )
85
+ assert_equal false, four.has_img?
86
+ assert_equal false, four.has_pic?
87
+ assert_equal false, four.has_pic_or_img?
88
+ assert_equal '/mdc/reb/1128474725.html', four.href
89
+ assert_equal "$325000 / 3br - GOOD DEAL GREAT HOUSE AND LOCATION", four.label
90
+ assert_equal "real estate - by broker", four.section
91
+ assert_equal "CORAL GABLES", four.location
92
+ assert_equal 7, four.post_date.month
93
+ assert_equal 22, four.post_date.day
94
+ assert_equal 325000.0, four.price
95
+
96
+ five = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(
97
+ Nokogiri::HTML(search_html_five, nil, CraigScrape::Scraper::HTML_ENCODING).at('p')
98
+ )
99
+ assert_equal false, five.has_img?
100
+ assert_equal true, five.has_pic?
101
+ assert_equal true, five.has_pic_or_img?
102
+ assert_equal '/pbc/boa/1115308178.html', five.href
103
+ assert_equal "40' SILVERTON CONVERTIBLE DIESEL - $105000", five.label
104
+ assert_equal nil, five.section
105
+ assert_equal "HOBE SOUND", five.location
106
+ assert_equal 4, five.post_date.month
107
+ assert_equal 9, five.post_date.day
108
+ assert_equal 105000.0, five.price
109
+
110
+ six = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(
111
+ Nokogiri::HTML(category_listing_one, nil, CraigScrape::Scraper::HTML_ENCODING).at('p')
112
+ )
113
+ assert_equal false, six.has_img?
114
+ assert_equal true, six.has_pic?
115
+ assert_equal true, six.has_pic_or_img?
116
+ assert_equal '/pbc/reb/1128661387.html', six.href
117
+ assert_equal "$2995000 / 5br - Downtown Boca New Home To Be Built", six.label
118
+ assert_equal "real estate - by broker", six.section
119
+ assert_equal "Boca Raton", six.location
120
+ assert_equal nil, six.post_date
121
+ assert_equal 2995000.0, six.price
122
+
123
+ seven = CraigScrape::Posting.new CraigScrape::Listings.parse_summary(
124
+ Nokogiri::HTML(category_listing_two, nil, CraigScrape::Scraper::HTML_ENCODING).at('p')
125
+ )
126
+ assert_equal true, seven.has_img?
127
+ assert_equal false, seven.has_pic?
128
+ assert_equal true, seven.has_pic_or_img?
129
+ assert_equal '/mdc/jwl/1128691192.html', seven.href
130
+ assert_equal "925 Sterling Silver Dragonfly Charm Bracelet - $25", seven.label
131
+ assert_equal nil, seven.section
132
+ assert_equal nil, seven.location
133
+ assert_equal nil, seven.post_date
134
+ assert_equal 25.0, seven.price
135
+ end
136
+
137
+
138
+ def test_posting_parse
139
+ posting0 = CraigScrape::Posting.new relative_uri_for('post_samples/posting0.html')
140
+ assert_equal "Has storage for videos/dvds. About 2 ft high by 21/2 ft widw. Almond/light beige color", posting0.contents
141
+ assert_equal ["south florida craigslist", "miami / dade", "furniture - by owner"], posting0.full_section
142
+ assert_equal "tv cart on wheels - $35 (NMB)", posting0.header
143
+ assert_equal "tv cart on wheels - $35", posting0.label
144
+ assert_equal "tv cart on wheels", posting0.title
145
+ assert_equal "NMB", posting0.location
146
+ assert_equal 1131363612, posting0.posting_id
147
+ assert_equal "sale-ktf9w-1131363612@craigslist.org", posting0.reply_to
148
+ assert_equal [0, 21, 13, 20, 4, 2009, 1, 110, true, "EDT"], posting0.post_time.to_a
149
+ assert_equal [], posting0.pics
150
+ assert_equal "Has storage for videos/dvds. About 2 ft high by 21/2 ft widw. Almond/light beige color",posting0.contents_as_plain
151
+ assert_equal 35.0, posting0.price
152
+ assert_equal [], posting0.images
153
+ assert_equal [], posting0.img_types
154
+
155
+ posting1 = CraigScrape::Posting.new relative_uri_for('post_samples/posting1.html')
156
+ assert_equal "Residential income property\227Investors this property is for you! This duplex has a 2bedroom/1bath unit on each side. It features updated kitchens and baths (new tubs, toilet, sink, vanities), ceramic tile flooring throughout, separate water and electric meters and on site laundry facilities. It is also closed to the Galleria, beaches and downtown Fort Lauderdale! \r<br>\n\r<br>\nJe parle le Fran\347ais\r<br>\n\r<br>\nThis property is being offered by Blaunch Perrier, Broker Associate, Atlantic Properties International. Blaunch can be reached at 954-593-0077. For additional property information you may also visit www.garylanham.com\r<br>\n\r<br>", posting1.contents
157
+ assert_equal ["south florida craigslist", "broward county", "real estate - by broker"], posting1.full_section
158
+ assert_equal "$189900 / 4br - Investment Property--Duplex in Fort Lauderdale", posting1.header
159
+ assert_equal "$189900 / 4br - Investment Property--Duplex in Fort Lauderdale", posting1.label
160
+ assert_equal "Investment Property--Duplex in Fort Lauderdale", posting1.title
161
+ assert_equal '1000 NE 14th Pl', posting1.location
162
+ assert_equal 1131242195, posting1.posting_id
163
+ assert_equal "hous-5nzhq-1131242195@craigslist.org", posting1.reply_to
164
+ assert_equal [0, 33, 13, 20, 4, 2009, 1, 110, true, "EDT"], posting1.post_time.to_a
165
+ assert_equal %w(http://images.craigslist.org/3n83o33l5ZZZZZZZZZ94k913ac1582d4b1fa4.jpg http://images.craigslist.org/3n93p63obZZZZZZZZZ94k19d5e32eb3b610c2.jpg http://images.craigslist.org/3n93m03l6ZZZZZZZZZ94k6e9785e37a1b1f3f.jpg http://images.craigslist.org/3ma3oc3l4ZZZZZZZZZ94kbfecbcd2fb2e19cc.jpg), posting1.pics
166
+ assert_equal "Residential income property\227Investors this property is for you! This duplex has a 2bedroom/1bath unit on each side. It features updated kitchens and baths (new tubs, toilet, sink, vanities), ceramic tile flooring throughout, separate water and electric meters and on site laundry facilities. It is also closed to the Galleria, beaches and downtown Fort Lauderdale! \r\n\r\nJe parle le Fran\347ais\r\n\r\nThis property is being offered by Blaunch Perrier, Broker Associate, Atlantic Properties International. Blaunch can be reached at 954-593-0077. For additional property information you may also visit www.garylanham.com\r\n\r", posting1.contents_as_plain
167
+ assert_equal 189900.0, posting1.price
168
+ assert_equal [], posting1.images
169
+ assert_equal ["http://images.craigslist.org/3n83o33l5ZZZZZZZZZ94k913ac1582d4b1fa4.jpg", "http://images.craigslist.org/3n93p63obZZZZZZZZZ94k19d5e32eb3b610c2.jpg", "http://images.craigslist.org/3n93m03l6ZZZZZZZZZ94k6e9785e37a1b1f3f.jpg", "http://images.craigslist.org/3ma3oc3l4ZZZZZZZZZ94kbfecbcd2fb2e19cc.jpg"], posting1.pics
170
+ assert_equal [:pic], posting1.img_types
171
+
172
+ posting2 = CraigScrape::Posting.new relative_uri_for('post_samples/posting2.html')
173
+ assert_equal 15488, posting2.contents.length # This is easy, and probably fine enough
174
+ assert_equal ["south florida craigslist", "broward county", "cars & trucks - by dealer"], posting2.full_section
175
+ assert_equal "PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEE - $23975 (Fort Lauderdale)", posting2.header
176
+ assert_equal "PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEE - $23975", posting2.label
177
+ assert_equal "PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEE", posting2.title
178
+ assert_equal 'Fort Lauderdale', posting2.location
179
+ assert_equal 1127037648, posting2.posting_id
180
+ assert_equal nil, posting2.reply_to
181
+ assert_equal [0, 16, 14, 17, 4, 2009, 5, 107, true, "EDT"], posting2.post_time.to_a
182
+ assert_equal [], posting2.pics
183
+ assert_equal "\302\240 Sheehan Buick Pontiac GMC \302\240 Pompano Beach, FL(754) 224-3257 \302\240PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!2002 Chevrolet Corvette Z06 Florida Driven AutoCheck Certified 5.7L V8 6sp2 Door Coupe.\302\240Price: \302\240 $23,975Exterior:Electron Blue MetallicInterior:BlackStock#:P5110AVIN:1G1YY12S625129021FREE AutoCheck Vehicle ReportMileage:63,560Transmission:6 Speed ManualEngine:V8 5.7L OHVWarranty:Limited WarrantyTitle:Clear\302\273\302\240View All 58 Photos\302\273\302\240View Full Vehicle Details\302\273\302\240Ask the Seller a Question\302\273\302\240E-mail this to a Friend\302\240 DescriptionPRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!\r\n\r\nLOADED WITH BLACK LEATHER BUCKET SEATS, POWER DRIVERS SEAT, DUAL ZONE CLIMATE CONTROL, 4 WHEEL ABS BRAKES, POWER STEERING AND BRAKES, REAR LIMITED SLIP DIFFERENTIAL, STABILITY CONTROL, CRUISE CONTROL, TLT STEERING WHEEL, POWER WINDOWS AND LOCKS, AUTOMATIC ON/OFF HEADLAMPS, FOG LIGHTS, DUAL AIR BAG SAFETY, AM/FM STEREO CD PLAYER, INTERMITTENT WINDSHIELD WIPERS AND SO MUCH MORE - THIS CAR IS TOTALLY HOT WITH GREAT LOW MILES!\r\n\r\nPlease call us to make your deal now at 1-888-453-5244. Please visit our Website at www.sheehanautoplex.com ***View 50+ Pictures of this vehicle - a complete description including standard features and all added options & a FREE AUTO CHECK REPORT at www.sheehanautoplex.com. ***Financing for Everyone - Good credit - bad credit - divorce - charge off's - NO PROBLEM. To complete a secure credit application, please visit our website at www.sheehanautoplex.com ***The largest Dealer in the State of Florida - We export all over the world - For details please visit www.sheehanautoplex.com ***Sheehan Autoplex takes great pride in our outstanding customer service and has been recognized by the following associations - BBB (Better Business Bureau) - NIADA - and the FIADA. Call us to get your best deal. CALL NOW. 1-888-453-5244\302\240 Contact Sheehan Buick Pontiac GMCPhone:(754) 224-3257Fax:(954) 781-9050Phone:(754) 224-3257E-mail:sales@proauto.comBusiness HoursWeekdays:9:00 AM to 9:00 PMSat:9:00 AM to 6:00 PMSun:",posting2.contents_as_plain
184
+ assert_equal 23975.0, posting2.price
185
+ assert_equal ["http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/19bce8e86c_355.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/ff9b026b06_355.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/6b75d87620_355.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/53b025e472_355.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/0d1befded7_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/95477f92bb_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/2850b2f160_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/a4281c6c91_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/862ee4ce71_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/74cadeff2e_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/63b05a0c76_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/00f84ea5bf_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/fe29734ab5_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/7f714d5159_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/720ddcc0a1_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/fc90fba588_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/d576661767_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/3423fb4814_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/5f0a0e85f8_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/d3ca0e29cc_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/23888ae8bc_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/93fc7d2373_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/9ac9da47b8_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/b1a84ca79e_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/6d219b534d_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/8bfe03d99b_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/d1086ab561_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/ab7a050466_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/9ea616d5d7_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/4b91de556d_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/1cefd8873a_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/8aec930e90_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/76b603822f_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/2d1b6d8a13_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/4fc82180ab_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/843c9e41ae_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/9d91990245_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/f34b8cfaed_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/765dae1031_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/7463a88d92_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/afe5801857_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/25abb2bd26_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/bc2fdaa3ea_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/e2a9b0dc69_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/08c2ca66b6_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/5e46230ec6_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/0b45184c58_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/311457aed0_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/43090899dc_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/c33b7f4c2a_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/24f419b851_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/50d3e2126d_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/6c125ffc51_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/93db0546fd_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/00e0d91652_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/2b242fbc58_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/8ee3c932a2_105.jpg", "http://static.automanager.com/c/012569/723b2925-b81d-4d6c-8e15-1542bab88dc1/64103fe7bd_105.jpg"], posting2.images
186
+ assert_equal [:img], posting2.img_types
187
+
188
+ posting3 = CraigScrape::Posting.new relative_uri_for('post_samples/posting3.html')
189
+ assert_equal "1992 Twin Turbo 300ZX. This car is pearl white outside and Camel leather interior with suede accents. Motor was re-done from the ground up two years ago. 23,000 on new motor rebuild! New Leather seats and center arm rest done also two years ago. Has Alpine Am/Fm Cd with Ipod cable, Viper pager alarm New! JL Audio Amp & JLAudio sub box custom made. Mtx mids& highs component speakers sparate tweeter. Car runs strong & straight. Just detailed the interior. Exterior should be painted. This car once painted will sell for over $10,000. \r<br>\nCome get a great deal now! offers and trades will be considered. 786-303-6550 Manny", posting3.contents
190
+ assert_equal ["south florida craigslist", "miami / dade", "cars & trucks - by owner"], posting3.full_section
191
+ assert_equal "300ZX Nissan Twin Turbo 1992 - $5800 (N.Miami/ Hialeah)", posting3.header
192
+ assert_equal "300ZX Nissan Twin Turbo 1992 - $5800", posting3.label
193
+ assert_equal "300ZX Nissan Twin Turbo 1992", posting3.title
194
+ assert_equal "N.Miami/ Hialeah", posting3.location
195
+ assert_equal 1130212403, posting3.posting_id
196
+ assert_equal "sale-c9bpa-1130212403@craigslist.org", posting3.reply_to
197
+ assert_equal [0, 21, 18, 19, 4, 2009, 0, 109, true, "EDT"], posting3.post_time.to_a
198
+ assert_equal %w(http://images.craigslist.org/3n23kf3lfZZZZZZZZZ94j1160e7d7b0601934.jpg http://images.craigslist.org/3nc3kf3p2ZZZZZZZZZ94j04fbc71e0a551ace.jpg http://images.craigslist.org/3nc3k33l7ZZZZZZZZZ94k13d8d7b1024e1e0e.jpg http://images.craigslist.org/3n23k63mfZZZZZZZZZ94k7838ae5d48d91eb8.jpg), posting3.pics
199
+ assert_equal "1992 Twin Turbo 300ZX. This car is pearl white outside and Camel leather interior with suede accents. Motor was re-done from the ground up two years ago. 23,000 on new motor rebuild! New Leather seats and center arm rest done also two years ago. Has Alpine Am/Fm Cd with Ipod cable, Viper pager alarm New! JL Audio Amp & JLAudio sub box custom made. Mtx mids& highs component speakers sparate tweeter. Car runs strong & straight. Just detailed the interior. Exterior should be painted. This car once painted will sell for over $10,000. \r\nCome get a great deal now! offers and trades will be considered. 786-303-6550 Manny",posting3.contents_as_plain
200
+ assert_equal 5800.0, posting3.price
201
+ assert_equal [], posting3.images
202
+ assert_equal ["http://images.craigslist.org/3n23kf3lfZZZZZZZZZ94j1160e7d7b0601934.jpg", "http://images.craigslist.org/3nc3kf3p2ZZZZZZZZZ94j04fbc71e0a551ace.jpg", "http://images.craigslist.org/3nc3k33l7ZZZZZZZZZ94k13d8d7b1024e1e0e.jpg", "http://images.craigslist.org/3n23k63mfZZZZZZZZZ94k7838ae5d48d91eb8.jpg"], posting3.pics
203
+ assert_equal [:pic], posting3.img_types
204
+
205
+ # This one ended up being quite a curveball since the user uploaded HTML was such junk:
206
+ posting4 = CraigScrape::Posting.new relative_uri_for('post_samples/posting4.html')
207
+ assert_equal 19412, posting4.contents.length
208
+ assert_equal ["south florida craigslist", "broward county", "real estate - by broker"], posting4.full_section
209
+ assert_equal "$225000 / 3br - Palm Aire Golf Corner Unit!", posting4.header
210
+ assert_equal "Palm Aire Golf Corner Unit!", posting4.title
211
+ assert_equal "$225000 / 3br - Palm Aire Golf Corner Unit!", posting4.label
212
+ assert_equal nil, posting4.location
213
+ assert_equal 1139303170, posting4.posting_id
214
+ assert_equal "hous-sk9f2-1139303170@craigslist.org", posting4.reply_to
215
+ assert_equal [0, 8, 9, 25, 4, 2009, 6, 115, true, "EDT"], posting4.post_time.to_a
216
+ assert_equal [], posting4.pics
217
+ assert_equal 6396,posting4.contents_as_plain.length
218
+ assert_equal 225000.0, posting4.price
219
+ assert_equal ["http://fortlauderdaleareahomesales.com/myfiles/5.jpg", "http://fortlauderdaleareahomesales.com/myfiles/4.jpg", "http://fortlauderdaleareahomesales.com/myfiles/7.jpg", "http://fortlauderdaleareahomesales.com/myfiles/10.jpg", "http://fortlauderdaleareahomesales.com/myfiles/1.jpg", "http://fortlauderdaleareahomesales.com/myfiles/2.jpg", "http://fortlauderdaleareahomesales.com/myfiles/3.jpg", "http://fortlauderdaleareahomesales.com/myfiles/8.jpg", "http://fortlauderdaleareahomesales.com/myfiles/9.jpg", "http://fortlauderdaleareahomesales.com/myfiles/11.jpg", "http://fortlauderdaleareahomesales.com/myfiles/14.jpg", "http://fortlauderdaleareahomesales.com/myfiles/6.jpg"], posting4.images
220
+ assert_equal [:img], posting4.img_types
221
+
222
+ posting5 = CraigScrape::Posting.new relative_uri_for('post_samples/posting5.html')
223
+ assert_equal true, posting5.flagged_for_removal?
224
+ assert_equal nil, posting5.contents
225
+ assert_equal ["south florida craigslist", "palm beach co", "apts/housing for rent"], posting5.full_section
226
+ assert_equal "This posting has been <a href=\"http://www.craigslist.org/about/help/flags_and_community_moderation\">flagged</a> for removal", posting5.header
227
+ assert_equal nil, posting5.title
228
+ assert_equal nil, posting5.label
229
+ assert_equal nil, posting5.location
230
+ assert_equal nil, posting5.posting_id
231
+ assert_equal nil, posting5.reply_to
232
+ assert_equal nil, posting5.post_time
233
+ assert_equal [], posting5.pics
234
+ assert_equal nil, posting5.contents_as_plain
235
+ assert_equal nil, posting5.price
236
+ assert_equal [], posting5.images
237
+ assert_equal [], posting5.img_types
238
+
239
+ posting_deleted = CraigScrape::Posting.new relative_uri_for('post_samples/this_post_has_been_deleted_by_its_author.html')
240
+ assert_equal true, posting_deleted.deleted_by_author?
241
+ assert_equal nil, posting_deleted.contents
242
+ assert_equal ["south florida craigslist", "broward county", "cars & trucks - by owner"], posting_deleted.full_section
243
+ assert_equal "This posting has been deleted by its author.", posting_deleted.header
244
+ assert_equal nil, posting_deleted.label
245
+ assert_equal nil, posting_deleted.title
246
+ assert_equal nil, posting_deleted.location
247
+ assert_equal nil, posting_deleted.posting_id
248
+ assert_equal nil, posting_deleted.reply_to
249
+ assert_equal nil, posting_deleted.post_time
250
+ assert_equal [], posting_deleted.pics
251
+ assert_equal nil, posting_deleted.contents_as_plain
252
+ assert_equal nil, posting_deleted.price
253
+ assert_equal [], posting_deleted.images
254
+ assert_equal [], posting_deleted.img_types
255
+
256
+ posting6 = CraigScrape::Posting.new relative_uri_for('post_samples/1207457727.html')
257
+ assert_equal "<p><br>Call!! asking for a new owner.<br> no deposit required rent to own properties. <br> <br> Defaulting payment records are not a problem, <br> we will help you protect the previous owners credit history! 202-567-6371 <br><br></p>",posting6.contents
258
+ assert_equal "Call!! asking for a new owner. no deposit required rent to own properties. Defaulting payment records are not a problem, we will help you protect the previous owners credit history! 202-567-6371 ",posting6.contents_as_plain
259
+ assert_equal false,posting6.deleted_by_author?
260
+ assert_equal false,posting6.flagged_for_removal?
261
+ assert_equal ["south florida craigslist", "broward county", "apts/housing for rent"],posting6.full_section
262
+ assert_equal "$1350 / 3br - 2bth for no deposit req (Coral Springs)",posting6.header
263
+ assert_equal "$1350 / 3br - 2bth for no deposit req",posting6.label
264
+ assert_equal ["http://images.craigslist.org/3k43pe3o8ZZZZZZZZZ9655022102a3ea51624.jpg", "http://images.craigslist.org/3n13m53p6ZZZZZZZZZ96596515e51237a179c.jpg", "http://images.craigslist.org/3od3p33leZZZZZZZZZ9656d614da8e3a51dd9.jpg", "http://images.craigslist.org/3pb3oa3leZZZZZZZZZ965eb60e4d2344019fb.jpg"],posting6.pics
265
+ assert_equal 'Coral Springs',posting6.location
266
+ assert_equal [0, 56, 18, 5, 6, 2009, 5, 156, true, "EDT"],posting6.post_time.to_a
267
+ assert_equal 1207457727,posting6.posting_id
268
+ assert_equal 1350.0,posting6.price
269
+ assert_equal "hous-ccpap-1207457727@craigslist.org",posting6.reply_to
270
+ assert_equal "2bth for no deposit req",posting6.title
271
+ assert_equal [], posting6.images
272
+ assert_equal ["http://images.craigslist.org/3k43pe3o8ZZZZZZZZZ9655022102a3ea51624.jpg", "http://images.craigslist.org/3n13m53p6ZZZZZZZZZ96596515e51237a179c.jpg", "http://images.craigslist.org/3od3p33leZZZZZZZZZ9656d614da8e3a51dd9.jpg", "http://images.craigslist.org/3pb3oa3leZZZZZZZZZ965eb60e4d2344019fb.jpg"], posting6.pics
273
+ assert_equal [:pic], posting6.img_types
274
+
275
+ brw_reb_1224008903 = CraigScrape::Posting.new relative_uri_for('post_samples/brw_reb_1224008903.html')
276
+ assert_equal "Nice 3 Bedroom/ 2 Bathroom/ Garage Home in Sunrise. 1,134 square feet of living area with a 6,000 square foot lot. Wood laminate flooring throughout the entire house. House has been updated. Stamped concrete driveway which leads to garage. Big back yard. Central AC. Washer/Dryer. Not a short sale or foreclosure. Asking $189,999. Call Charles Schneider (The Best Damn Real Estate Company Period!) at 954-478-4784.\r<br>\n\r<br>\nDirections: Take Pine Island Road north off of Sunrise Boulevard (past Sunset Strip) to N.W. 25th Court. Head west (left) on N.W. 25th Court to N.W. 91st Lane. Head north (right) on N.W. 91st Lane to N.W. 26th Street. Head east (right) on N.W. 26th Street to the property- 9163 N.W. 26th Street, Sunrise, FL 33322", brw_reb_1224008903.contents
277
+ assert_equal "Nice 3 Bedroom/ 2 Bathroom/ Garage Home in Sunrise. 1,134 square feet of living area with a 6,000 square foot lot. Wood laminate flooring throughout the entire house. House has been updated. Stamped concrete driveway which leads to garage. Big back yard. Central AC. Washer/Dryer. Not a short sale or foreclosure. Asking $189,999. Call Charles Schneider (The Best Damn Real Estate Company Period!) at 954-478-4784.\r\n\r\nDirections: Take Pine Island Road north off of Sunrise Boulevard (past Sunset Strip) to N.W. 25th Court. Head west (left) on N.W. 25th Court to N.W. 91st Lane. Head north (right) on N.W. 91st Lane to N.W. 26th Street. Head east (right) on N.W. 26th Street to the property- 9163 N.W. 26th Street, Sunrise, FL 33322", brw_reb_1224008903.contents_as_plain
278
+ assert_equal false, brw_reb_1224008903.deleted_by_author?
279
+ assert_equal false, brw_reb_1224008903.flagged_for_removal?
280
+ assert_equal ["south florida craigslist", "broward county", "real estate - by broker"], brw_reb_1224008903.full_section
281
+ assert_equal "$189999 / 3br - Nice 3 Bedroom/ 2 Bathroom House with Garage in Sunrise (Sunrise) (map)", brw_reb_1224008903.header
282
+ assert_equal "$189999 / 3br - Nice 3 Bedroom/ 2 Bathroom House with Garage in Sunrise (Sunrise) (map)", brw_reb_1224008903.header_as_plain
283
+ assert_equal ["http://images.craigslist.org/3ma3o93laZZZZZZZZZ96g5ee7cc528f1818a8.jpg", "http://images.craigslist.org/3nf3m03oeZZZZZZZZZ96gb267b7db57d91f60.jpg", "http://images.craigslist.org/3m63oc3p1ZZZZZZZZZ96g521443416aea1cac.jpg", "http://images.craigslist.org/3nc3p53l5ZZZZZZZZZ96g8706fce2c0bb17e9.jpg"], brw_reb_1224008903.pics
284
+ assert_equal "Sunrise", brw_reb_1224008903.location
285
+ assert_equal [0, 43, 18, 16, 6, 2009, 2, 167, true, "EDT"], brw_reb_1224008903.post_time.to_a
286
+ assert_equal 1224008903, brw_reb_1224008903.posting_id
287
+ assert_equal 189999.0, brw_reb_1224008903.price
288
+ assert_equal "1971CJS@Bellsouth.net", brw_reb_1224008903.reply_to
289
+ assert_equal false, brw_reb_1224008903.system_post?
290
+ assert_equal "Nice 3 Bedroom/ 2 Bathroom House with Garage in Sunrise", brw_reb_1224008903.title
291
+ assert_equal "$189999 / 3br - Nice 3 Bedroom/ 2 Bathroom House with Garage in Sunrise", brw_reb_1224008903.label
292
+ assert_equal [], brw_reb_1224008903.images
293
+ assert_equal ["http://images.craigslist.org/3ma3o93laZZZZZZZZZ96g5ee7cc528f1818a8.jpg", "http://images.craigslist.org/3nf3m03oeZZZZZZZZZ96gb267b7db57d91f60.jpg", "http://images.craigslist.org/3m63oc3p1ZZZZZZZZZ96g521443416aea1cac.jpg", "http://images.craigslist.org/3nc3p53l5ZZZZZZZZZ96g8706fce2c0bb17e9.jpg"], brw_reb_1224008903.pics
294
+ assert_equal [:pic], brw_reb_1224008903.img_types
295
+
296
+ sfbay_art_1223614914 = CraigScrape::Posting.new relative_uri_for('post_samples/sfbay_art_1223614914.html')
297
+ assert_equal "Bombay Company Beautiful Art Postered Painting \r<br>\n\225\tThe most beautiful piece of art you could have\r<br>\n\225\tMatches with any type of furnishing and decoration\r<br>\n\225\tA must see/Only one year old\r<br>\n\225\tRegular Price @ $1500.00\r<br>\n\225\tSale Price @ $650.00\r<br>", sfbay_art_1223614914.contents
298
+ assert_equal "Bombay Company Beautiful Art Postered Painting \r\n\225\tThe most beautiful piece of art you could have\r\n\225\tMatches with any type of furnishing and decoration\r\n\225\tA must see/Only one year old\r\n\225\tRegular Price @ $1500.00\r\n\225\tSale Price @ $650.00\r", sfbay_art_1223614914.contents_as_plain
299
+ assert_equal false, sfbay_art_1223614914.deleted_by_author?
300
+ assert_equal false, sfbay_art_1223614914.flagged_for_removal?
301
+ assert_equal ["SF bay area craigslist", "south bay", "art & crafts"], sfbay_art_1223614914.full_section
302
+ assert_equal "Bombay Company Art Painting - $650 (saratoga)", sfbay_art_1223614914.header
303
+ assert_equal "Bombay Company Art Painting - $650 (saratoga)", sfbay_art_1223614914.header_as_plain
304
+ assert_equal ["http://images.craigslist.org/3kf3o93laZZZZZZZZZ96fbc594a6ceb1f1025.jpg"], sfbay_art_1223614914.pics
305
+ assert_equal "Bombay Company Art Painting - $650", sfbay_art_1223614914.label
306
+ assert_equal 'saratoga', sfbay_art_1223614914.location
307
+ assert_equal [0, 0, 0, 15, 6, 2009, 1, 166, true, "EDT"], sfbay_art_1223614914.post_date.to_a
308
+ assert_equal [0, 38, 22, 15, 6, 2009, 1, 166, true, "EDT"], sfbay_art_1223614914.post_time.to_a
309
+ assert_equal 1223614914, sfbay_art_1223614914.posting_id
310
+ assert_equal 650.0, sfbay_art_1223614914.price
311
+ assert_equal "sale-trzm8-1223614914@craigslist.org", sfbay_art_1223614914.reply_to
312
+ assert_equal false, sfbay_art_1223614914.system_post?
313
+ assert_equal "Bombay Company Art Painting", sfbay_art_1223614914.title
314
+ assert_equal [], sfbay_art_1223614914.images
315
+ assert_equal ["http://images.craigslist.org/3kf3o93laZZZZZZZZZ96fbc594a6ceb1f1025.jpg"], sfbay_art_1223614914.pics
316
+ assert_equal [:pic], sfbay_art_1223614914.img_types
317
+ end
318
+
319
+ # This was actually a 'bug' with hpricot itself when the ulimit is set too low.
320
+ # the Easy fix is running "ulimit -s 16384" before the tests. But the better fix was
321
+ # to remove the userbody sending these pages to be parsed by Hpricot
322
+ def test_bugs_found061710
323
+ posting_061710 = CraigScrape::Posting.new relative_uri_for('post_samples/posting1796890756-061710.html')
324
+
325
+ assert_equal false, posting_061710.deleted_by_author?
326
+ assert_equal true, posting_061710.downloaded?
327
+ assert_equal false, posting_061710.flagged_for_removal?
328
+ assert_equal ["south florida craigslist", "miami / dade", "for sale / wanted", "general for sale"], posting_061710.full_section
329
+ assert_equal false, posting_061710.has_img?
330
+ assert_equal false, posting_061710.has_pic?
331
+ assert_equal false, posting_061710.has_pic_or_img?
332
+ assert_equal "*****SOFTWARE**** (Dade/Broward)", posting_061710.header
333
+ assert_equal "*****SOFTWARE**** (Dade/Broward)", posting_061710.header_as_plain
334
+ assert_equal nil, posting_061710.href
335
+ assert_equal [], posting_061710.images
336
+ assert_equal [], posting_061710.img_types
337
+ assert_equal "*****SOFTWARE****", posting_061710.label
338
+ assert_equal "Dade/Broward", posting_061710.location
339
+ assert_equal [], posting_061710.pics
340
+ assert_equal [0, 0, 0, 17, 6, 2010, 4, 168, true, "EDT"], posting_061710.post_date.to_a
341
+ assert_equal [0, 22, 13, 17, 6, 2010, 4, 168, true, "EDT"], posting_061710.post_time.to_a
342
+ assert_equal 1796890756, posting_061710.posting_id
343
+ assert_equal nil, posting_061710.price
344
+ assert_equal nil, posting_061710.reply_to
345
+ assert_equal "general for sale", posting_061710.section
346
+ assert_equal false, posting_061710.system_post?
347
+ assert_equal "*****SOFTWARE****", posting_061710.title
348
+
349
+ posting1808219423 = CraigScrape::Posting.new relative_uri_for('post_samples/posting1808219423.html')
350
+ assert_equal false, posting1808219423.deleted_by_author?
351
+ assert_equal true, posting1808219423.downloaded?
352
+ assert_equal false, posting1808219423.flagged_for_removal?
353
+ assert_equal ["south florida craigslist", "miami / dade", "for sale / wanted", "general for sale"], posting1808219423.full_section
354
+ assert_equal true, posting1808219423.has_img?
355
+ assert_equal false, posting1808219423.has_pic?
356
+ assert_equal true, posting1808219423.has_pic_or_img?
357
+ assert_equal "*Software*AdobeCS5*RosettaStone*AutoCAD*Windows7*Office2010*&* More (Dade/Broward)", posting1808219423.header
358
+ assert_equal "*Software*AdobeCS5*RosettaStone*AutoCAD*Windows7*Office2010*&* More (Dade/Broward)", posting1808219423.header_as_plain
359
+ assert_equal nil, posting1808219423.href
360
+ assert_equal ["http://i800.photobucket.com/albums/yy287/todofull69/Programas/office-2010.jpg", "http://i844.photobucket.com/albums/ab10/fziqe/adobeblogcopy.jpg", "http://i31.photobucket.com/albums/c383/drapizan/RosettaStone.jpg", "http://i1002.photobucket.com/albums/af142/tagurtoast/Windows_7.jpg", "http://i800.photobucket.com/albums/yy287/todofull69/Programas/office-2010.jpg", "http://i844.photobucket.com/albums/ab10/fziqe/adobeblogcopy.jpg", "http://i31.photobucket.com/albums/c383/drapizan/RosettaStone.jpg", "http://i1002.photobucket.com/albums/af142/tagurtoast/Windows_7.jpg", "http://i800.photobucket.com/albums/yy287/todofull69/Programas/office-2010.jpg", "http://i844.photobucket.com/albums/ab10/fziqe/adobeblogcopy.jpg", "http://i31.photobucket.com/albums/c383/drapizan/RosettaStone.jpg", "http://i1002.photobucket.com/albums/af142/tagurtoast/Windows_7.jpg", "http://i800.photobucket.com/albums/yy287/todofull69/Programas/office-2010.jpg", "http://i844.photobucket.com/albums/ab10/fziqe/adobeblogcopy.jpg", "http://i31.photobucket.com/albums/c383/drapizan/RosettaStone.jpg", "http://i1002.photobucket.com/albums/af142/tagurtoast/Windows_7.jpg", "http://i800.photobucket.com/albums/yy287/todofull69/Programas/office-2010.jpg", "http://i844.photobucket.com/albums/ab10/fziqe/adobeblogcopy.jpg", "http://i31.photobucket.com/albums/c383/drapizan/RosettaStone.jpg", "http://i1002.photobucket.com/albums/af142/tagurtoast/Windows_7.jpg"], posting1808219423.images
361
+ assert_equal [:img], posting1808219423.img_types
362
+ assert_equal "*Software*AdobeCS5*RosettaStone*AutoCAD*Windows7*Office2010*&* More", posting1808219423.label
363
+ assert_equal "Dade/Broward", posting1808219423.location
364
+ assert_equal [], posting1808219423.pics
365
+ assert_equal [0, 0, 0, 24, 6, 2010, 4, 175, true, "EDT"], posting1808219423.post_date.to_a
366
+ assert_equal [0, 35, 7, 24, 6, 2010, 4, 175, true, "EDT"], posting1808219423.post_time.to_a
367
+ assert_equal 1808219423, posting1808219423.posting_id
368
+ assert_equal nil, posting1808219423.price
369
+ assert_equal nil, posting1808219423.reply_to
370
+ assert_equal "general for sale", posting1808219423.section
371
+ assert_equal false, posting1808219423.system_post?
372
+ assert_equal "*Software*AdobeCS5*RosettaStone*AutoCAD*Windows7*Office2010*&* More", posting1808219423.title
373
+ end
374
+
375
+ def test_bug_found090610
376
+ posting_090610 = CraigScrape::Posting.new relative_uri_for('post_samples/posting1938291834-090610.html')
377
+
378
+ assert_equal 27629, posting_090610.contents.length
379
+ assert_equal 2326, posting_090610.contents_as_plain.length
380
+ assert_equal false, posting_090610.deleted_by_author?
381
+ assert_equal true, posting_090610.downloaded?
382
+ assert_equal false, posting_090610.flagged_for_removal?
383
+ assert_equal ["boston craigslist", "boston/camb/brook", "for sale / wanted", "arts & crafts"], posting_090610.full_section
384
+ assert_equal true, posting_090610.has_img?
385
+ assert_equal false, posting_090610.has_pic?
386
+ assert_equal true, posting_090610.has_pic_or_img?
387
+ assert_equal "2008 GMC Sierra 2500HD - $14800 (boston)", posting_090610.header
388
+ assert_equal "2008 GMC Sierra 2500HD - $14800 (boston)", posting_090610.header_as_plain
389
+ assert_equal nil, posting_090610.href
390
+ assert_equal ["http://i866.photobucket.com/albums/ab228/rodreigo/GMC%20Sierra/used-2008-gmc-sierra_2500hd-slttruckcrewcabstandardbed-5703-5793520-2-400-1.jpg", "http://i866.photobucket.com/albums/ab228/rodreigo/GMC%20Sierra/used-2008-gmc-sierra_2500hd-slttruckcrewcabstandardbed-5703-5793520-1-400.jpg", "http://i866.photobucket.com/albums/ab228/rodreigo/GMC%20Sierra/used-2008-gmc-sierra_2500hd-slttruckcrewcabstandardbed-5703-5793520-29-640.jpg", "http://i866.photobucket.com/albums/ab228/rodreigo/GMC%20Sierra/used-2008-gmc-sierra_2500hd-slttruckcrewcabstandardbed-5703-5793520-11-640.jpg"], posting_090610.images
391
+ assert_equal [:img], posting_090610.img_types
392
+ assert_equal "2008 GMC Sierra 2500HD - $14800", posting_090610.label
393
+ assert_equal "boston", posting_090610.location
394
+ assert_equal [], posting_090610.pics
395
+ assert_equal [0, 0, 0, 5, 9, 2010, 0, 248, true, "EDT"], posting_090610.post_date.to_a
396
+ assert_equal [0, 29, 18, 5, 9, 2010, 0, 248, true, "EDT"], posting_090610.post_time.to_a
397
+ assert_equal 1938291834, posting_090610.posting_id
398
+ assert_equal 14800.0, posting_090610.price
399
+ assert_equal nil, posting_090610.reply_to
400
+ assert_equal "arts & crafts", posting_090610.section
401
+ assert_equal false, posting_090610.system_post?
402
+ assert_equal "2008 GMC Sierra 2500HD", posting_090610.title
403
+ end
404
+
405
+ def test_expired_post
406
+ posting_expired = CraigScrape::Posting.new relative_uri_for('post_samples/this_post_has_expired.html')
407
+ assert_equal true, posting_expired.posting_has_expired?
408
+ assert_equal true, posting_expired.system_post?
409
+ assert_equal nil, posting_expired.contents
410
+ assert_equal ["charleston craigslist", "for sale / wanted", "cars & trucks - by owner" ], posting_expired.full_section
411
+ assert_equal "This posting has expired.", posting_expired.header
412
+ assert_equal nil, posting_expired.label
413
+ assert_equal nil, posting_expired.title
414
+ assert_equal nil, posting_expired.location
415
+ assert_equal nil, posting_expired.posting_id
416
+ assert_equal nil, posting_expired.reply_to
417
+ assert_equal nil, posting_expired.post_time
418
+ assert_equal [], posting_expired.pics
419
+ assert_equal nil, posting_expired.contents_as_plain
420
+ assert_equal nil, posting_expired.price
421
+ assert_equal [], posting_expired.images
422
+ assert_equal [], posting_expired.img_types
423
+
424
+ end
425
+
426
+ end