olek-libcraigscrape 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. data/CHANGELOG +94 -0
  2. data/COPYING +674 -0
  3. data/COPYING.LESSER +165 -0
  4. data/README +89 -0
  5. data/Rakefile +125 -0
  6. data/bin/craig_report_schema.yml +68 -0
  7. data/bin/craigwatch +581 -0
  8. data/bin/report_mailer/craigslist_report.html.erb +17 -0
  9. data/bin/report_mailer/craigslist_report.plain.erb +18 -0
  10. data/lib/geo_listings.rb +144 -0
  11. data/lib/libcraigscrape.rb +217 -0
  12. data/lib/listings.rb +160 -0
  13. data/lib/posting.rb +324 -0
  14. data/lib/scraper.rb +212 -0
  15. data/test/geolisting_samples/geo_listing_ca070209.html +76 -0
  16. data/test/geolisting_samples/geo_listing_ca_sk070209.html +31 -0
  17. data/test/geolisting_samples/geo_listing_cn070209.html +35 -0
  18. data/test/geolisting_samples/geo_listing_us070209.html +355 -0
  19. data/test/geolisting_samples/hierarchy_test071009/index.html +31 -0
  20. data/test/geolisting_samples/hierarchy_test071009/us/fl/ft%20myers%20%5C/%20SW%20florida/index.html +46 -0
  21. data/test/geolisting_samples/hierarchy_test071009/us/fl/ft%20myers%20%5C/index.html +46 -0
  22. data/test/geolisting_samples/hierarchy_test071009/us/fl/index.html +46 -0
  23. data/test/geolisting_samples/hierarchy_test071009/us/fl/miami/index.html +46 -0
  24. data/test/geolisting_samples/hierarchy_test071009/us/fl/miami/nonsense/index.html +46 -0
  25. data/test/geolisting_samples/hierarchy_test071009/us/fl/miami/nonsense/more-nonsense/index.html +46 -0
  26. data/test/geolisting_samples/hierarchy_test071009/us/fl/nonexist/index.html +46 -0
  27. data/test/geolisting_samples/hierarchy_test071009/us/fl/nonsense/index.html +46 -0
  28. data/test/geolisting_samples/hierarchy_test071009/us/fl/south%20florida/index.html +46 -0
  29. data/test/geolisting_samples/hierarchy_test071009/us/index.html +355 -0
  30. data/test/google.html +8 -0
  31. data/test/libcraigscrape_test_helpers.rb +37 -0
  32. data/test/listing_samples/category_output.html +231 -0
  33. data/test/listing_samples/category_output_2.html +217 -0
  34. data/test/listing_samples/empty_listings.html +128 -0
  35. data/test/listing_samples/fortmyers_art_index.060909/1046596324.html +93 -0
  36. data/test/listing_samples/fortmyers_art_index.060909/1053085283.html +92 -0
  37. data/test/listing_samples/fortmyers_art_index.060909/1112522674.html +89 -0
  38. data/test/listing_samples/fortmyers_art_index.060909/823516079.html +92 -0
  39. data/test/listing_samples/fortmyers_art_index.060909/825684735.html +89 -0
  40. data/test/listing_samples/fortmyers_art_index.060909/891513957.html +94 -0
  41. data/test/listing_samples/fortmyers_art_index.060909/897549505.html +99 -0
  42. data/test/listing_samples/fortmyers_art_index.060909/960826026.html +89 -0
  43. data/test/listing_samples/fortmyers_art_index.060909/993256300.html +89 -0
  44. data/test/listing_samples/fortmyers_art_index.060909/fortmyers_art_index500.060909.html +237 -0
  45. data/test/listing_samples/fortmyers_art_index.060909/fortmyers_art_index600.060909.html +132 -0
  46. data/test/listing_samples/long_search_output.html +137 -0
  47. data/test/listing_samples/mia_fua_index8900.5.21.09.html +226 -0
  48. data/test/listing_samples/mia_search_kitten.3.15.10.html +149 -0
  49. data/test/listing_samples/miami_search_sss_rack.6.18.09/miami_search_sss_rack1000.6.18.09.html +144 -0
  50. data/test/listing_samples/miami_search_sss_rack.6.18.09/miami_search_sss_rack900.6.18.09.html +146 -0
  51. data/test/listing_samples/new_listing_span.4.17.10.html +769 -0
  52. data/test/listing_samples/short_search_output.html +133 -0
  53. data/test/post_samples/1207457727.html +92 -0
  54. data/test/post_samples/brw_reb_1224008903.html +101 -0
  55. data/test/post_samples/posting0.html +91 -0
  56. data/test/post_samples/posting1.html +106 -0
  57. data/test/post_samples/posting1796890756-061710.html +2318 -0
  58. data/test/post_samples/posting1808219423.html +2473 -0
  59. data/test/post_samples/posting1938291834-090610.html +188 -0
  60. data/test/post_samples/posting2.html +107 -0
  61. data/test/post_samples/posting3.html +92 -0
  62. data/test/post_samples/posting4.html +993 -0
  63. data/test/post_samples/posting5.html +38 -0
  64. data/test/post_samples/sfbay_art_1223614914.html +94 -0
  65. data/test/post_samples/this_post_has_been_deleted_by_its_author.html +37 -0
  66. data/test/post_samples/this_post_has_expired.html +48 -0
  67. data/test/test_craigslist_geolisting.rb +521 -0
  68. data/test/test_craigslist_listing.rb +362 -0
  69. data/test/test_craigslist_posting.rb +426 -0
  70. metadata +273 -0
@@ -0,0 +1,38 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2
+ <html>
3
+ <head>
4
+ <title></title>
5
+ <meta name="robots" content="NOARCHIVE,NOFOLLOW">
6
+ <link rel="stylesheet" title="craigslist" href="http://www.craigslist.org/styles/craigslist.css" type="text/css" media="all">
7
+ </head>
8
+
9
+ <body onload="initFlag(1139838814)" class="posting">
10
+
11
+ <div class="bchead">
12
+
13
+ <a href="http://miami.craigslist.org">south florida craigslist</a>
14
+ &gt; <a href="/pbc/">palm beach co</a> &gt; <a href="/pbc/apa/">apts/housing for rent</a>
15
+ </div>
16
+
17
+
18
+
19
+ <hr>
20
+ <br>
21
+ <br>
22
+ <h2>This posting has been <a href="http://www.craigslist.org/about/help/flags_and_community_moderation">flagged</a> for removal</h2>
23
+ <h5>(The title on the listings page will be removed in just a few minutes.)</h5>
24
+
25
+ <br><br>
26
+
27
+ <hr>
28
+ <ul class="clfooter">
29
+ <li>Copyright &copy; 2009 craigslist, inc.</li>
30
+ <li><a href="http://www.craigslist.org/about/terms.of.use.html">terms of use</a></li>
31
+ <li><a href="http://www.craigslist.org/about/privacy_policy">privacy policy</a></li>
32
+ <li><a href="/forums/?forumID=8">feedback forum</a></li>
33
+ </ul>
34
+ <script type="text/javascript" src="http://www.craigslist.org/js/jquery.js"></script>
35
+ <script type="text/javascript" src="http://www.craigslist.org/js/postings.js"></script>
36
+ </body>
37
+ </html>
38
+
@@ -0,0 +1,94 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2
+ <html>
3
+ <head>
4
+ <title>Bombay Company Art Painting</title>
5
+ <meta name="robots" content="NOARCHIVE,NOFOLLOW">
6
+ <link rel="stylesheet" title="craigslist" href="http://www.craigslist.org/styles/craigslist.css" type="text/css" media="all">
7
+ </head>
8
+
9
+ <body onload="initFlag(1223614914)" class="posting">
10
+
11
+ <div class="bchead">
12
+ <a id="ef" href="/email.friend?postingID=1223614914">email this posting to a friend</a>
13
+ <a href="http://sfbay.craigslist.org">SF bay area craigslist</a>
14
+ &gt; <a href="/sby/">south bay</a> &gt; <a href="/sby/art/">art &amp; crafts</a>
15
+ </div>
16
+
17
+ <div id="flags">
18
+ <div id="flagMsg">
19
+ please <a href="http://www.craigslist.org/about/help/flags_and_community_moderation">flag</a> with care:
20
+ </div>
21
+ <div id="flagChooser">
22
+ <br>
23
+ <a class="fl" id="flag16" href="/flag/?flagCode=16&amp;postingID=1223614914"
24
+ title="Wrong category, wrong site, discusses another post, or otherwise misplaced">
25
+ miscategorized</a>
26
+ <br>
27
+
28
+ <a class="fl" id="flag28" href="/flag/?flagCode=28&amp;postingID=1223614914"
29
+ title="Violates craigslist Terms Of Use or other posted guidelines">
30
+ prohibited</a>
31
+ <br>
32
+
33
+ <a class="fl" id="flag15" href="/flag/?flagCode=15&amp;postingID=1223614914"
34
+ title="Posted too frequently, in multiple cities/categories, or is too commercial">
35
+ spam/overpost</a>
36
+ <br>
37
+
38
+ <a class="fl" id="flag9" href="/flag/?flagCode=9&amp;postingID=1223614914"
39
+ title="Should be considered for inclusion in the Best-Of-Craigslist">
40
+ best of craigslist</a>
41
+ <br>
42
+ </div>
43
+ </div>
44
+
45
+ <div id="tsb">
46
+ <em>Avoid scams and fraud by dealing locally!</em> Beware any deal involving Western Union, Moneygram, wire transfer, cashier check, money order, shipping, escrow, or any promise of transaction protection/certification/guarantee. <a href="http://www.craigslist.org/about/scams.html">More info</a></div>
47
+ <h2>Bombay Company Art Painting - $650 (saratoga)</h2>
48
+ <hr>
49
+ Reply to: <a href="mailto:sale-trzm8-1223614914@craigslist.org?subject=Bombay%20Company%20Art%20Painting%20-%20%24650%20(saratoga)">sale-trzm8-1223614914@craigslist.org</a> <sup>[<a href="http://www.craigslist.org/about/help/replying_to_posts" target="_blank">Errors when replying to ads?</a>]</sup><br>
50
+ Date: 2009-06-15, 7:38PM PDT<br>
51
+ <br>
52
+ <br>
53
+ <div id="userbody">
54
+ Bombay Company Beautiful Art Postered Painting
55
+ � The most beautiful piece of art you could have
56
+ � Matches with any type of furnishing and decoration
57
+ � A must see/Only one year old
58
+ � Regular Price @ $1500.00
59
+ � Sale Price @ $650.00
60
+
61
+
62
+
63
+ <br><br><ul>
64
+ <li>it's NOT ok to contact this poster with services or other commercial interests</ul>
65
+
66
+ <table summary="craigslist hosted images">
67
+ <tr>
68
+ <td align="center"><img src="http://images.craigslist.org/3kf3o93laZZZZZZZZZ96fbc594a6ceb1f1025.jpg" alt="image 1223614914-0"></td>
69
+ <td align="center"></td>
70
+ </tr>
71
+ <tr>
72
+ <td align="center"></td>
73
+ <td align="center"></td>
74
+ </tr>
75
+ </table>
76
+
77
+ </div>
78
+ PostingID: 1223614914<br>
79
+
80
+
81
+ <br>
82
+
83
+ <hr>
84
+ <ul class="clfooter">
85
+ <li>Copyright &copy; 2009 craigslist, inc.</li>
86
+ <li><a href="http://www.craigslist.org/about/terms.of.use.html">terms of use</a></li>
87
+ <li><a href="http://www.craigslist.org/about/privacy_policy">privacy policy</a></li>
88
+ <li><a href="/forums/?forumID=8">feedback forum</a></li>
89
+ </ul>
90
+ <script type="text/javascript" src="http://www.craigslist.org/js/jquery.js"></script>
91
+ <script type="text/javascript" src="http://www.craigslist.org/js/postings.js"></script>
92
+ </body>
93
+ </html>
94
+
@@ -0,0 +1,37 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2
+ <html>
3
+ <head>
4
+ <title></title>
5
+ <meta name="robots" content="NOARCHIVE,NOFOLLOW" />
6
+ <link href="http://www.craigslist.org/styles/craigslist.css" title="craigslist" rel="stylesheet" media="all" type="text/css" />
7
+ </head>
8
+
9
+ <body class="posting" onload="initFlag(1187861811)">
10
+
11
+ <div class="bchead">
12
+
13
+ <a href="http://miami.craigslist.org">south florida craigslist</a>
14
+ &gt; <a href="/brw/">broward county</a> &gt; <a href="/brw/cto/">cars &amp; trucks - by owner</a>
15
+ </div>
16
+
17
+
18
+
19
+ <hr />
20
+ <br />
21
+ <br />
22
+ <h2>This posting has been deleted by its author.</h2>
23
+ <h5>(The title on the listings page will be removed in just a few minutes.)</h5>
24
+
25
+ <br /><br />
26
+
27
+ <hr />
28
+ <ul class="clfooter">
29
+ <li>Copyright &copy; 2009 craigslist, inc.</li>
30
+ <li><a href="http://www.craigslist.org/about/terms.of.use.html">terms of use</a></li>
31
+ <li><a href="http://www.craigslist.org/about/privacy_policy">privacy policy</a></li>
32
+ <li><a href="/forums/?forumID=8">feedback forum</a></li>
33
+ </ul>
34
+ <script src="http://www.craigslist.org/js/jquery.js" type="text/javascript"></script>
35
+ <script src="http://www.craigslist.org/js/postings.js" type="text/javascript"></script>
36
+ </body>
37
+ </html>
@@ -0,0 +1,48 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2
+ <html>
3
+ <head>
4
+ <title></title>
5
+ <meta name="robots" content="NOARCHIVE,NOFOLLOW">
6
+ <link type="text/css" rel="stylesheet" media="all" href="http://www.craigslist.org/styles/craigslist.css?v=8">
7
+ </head>
8
+
9
+ <body class="posting">
10
+
11
+
12
+ <div class="bchead">
13
+
14
+ <a href="http://charleston.craigslist.org/">charleston craigslist</a> &gt;
15
+
16
+ <a href="http://charleston.craigslist.org/sss/">for sale / wanted</a> &gt;
17
+ <a href="http://charleston.craigslist.org/cto/">cars &amp; trucks - by owner</a>
18
+ </div>
19
+
20
+
21
+
22
+
23
+
24
+ <hr>
25
+ <br>
26
+ <br>
27
+ <h2>This posting has expired.</h2>
28
+ <h5>(The title on the listings page will be removed in just a few minutes.)</h5>
29
+
30
+ <br><br>
31
+
32
+ <hr>
33
+ <ul class="clfooter">
34
+ <li>Copyright &copy; 2011 craigslist, inc.</li>
35
+ <li><a href="http://www.craigslist.org/about/terms.of.use.html">terms of use</a></li>
36
+ <li><a href="http://www.craigslist.org/about/privacy_policy">privacy policy</a></li>
37
+ <li><a href="/forums/?forumID=8">feedback forum</a></li>
38
+ </ul>
39
+
40
+ <script type="text/javascript" src="http://www.craigslist.org/js/jquery-1.4.2.js"></script>
41
+ <script type="text/javascript" src="http://www.craigslist.org/js/postings.js"></script>
42
+ <script type="text/javascript"><!--
43
+ pID = 1968731193;
44
+ -->
45
+ </script>
46
+ </body>
47
+ </html>
48
+
@@ -0,0 +1,521 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'test/unit'
4
+ require File.dirname(__FILE__)+'/../lib/libcraigscrape'
5
+ require File.dirname(__FILE__)+'/libcraigscrape_test_helpers'
6
+
7
+ class CraigslistGeolistingTest < Test::Unit::TestCase
8
+ include LibcraigscrapeTestHelpers
9
+
10
+ def test_pukes
11
+ assert_raise(CraigScrape::Scraper::ParseError) do
12
+ CraigScrape::GeoListings.new( relative_uri_for('google.html') ).sites
13
+ end
14
+ end
15
+
16
+ def test_geo_listings
17
+ geo_listing_us070209 = CraigScrape::GeoListings.new relative_uri_for(
18
+ 'geolisting_samples/geo_listing_us070209.html'
19
+ )
20
+ assert_equal 'united states', geo_listing_us070209.location
21
+ assert_equal 326, geo_listing_us070209.sites.length
22
+ assert_equal "sfbay.craigslist.org", geo_listing_us070209.sites["SF bay area"]
23
+ assert_equal "abilene.craigslist.org", geo_listing_us070209.sites["abilene"]
24
+ assert_equal "akroncanton.craigslist.org", geo_listing_us070209.sites["akron / canton"]
25
+ assert_equal "anchorage.craigslist.org", geo_listing_us070209.sites["alaska"]
26
+ assert_equal "albany.craigslist.org", geo_listing_us070209.sites["albany"]
27
+ assert_equal "albuquerque.craigslist.org", geo_listing_us070209.sites["albuquerque"]
28
+ assert_equal "altoona.craigslist.org", geo_listing_us070209.sites["altoona-johnstown"]
29
+ assert_equal "amarillo.craigslist.org", geo_listing_us070209.sites["amarillo"]
30
+ assert_equal "ames.craigslist.org", geo_listing_us070209.sites["ames, IA"]
31
+ assert_equal "annarbor.craigslist.org", geo_listing_us070209.sites["ann arbor"]
32
+ assert_equal "annapolis.craigslist.org", geo_listing_us070209.sites["annapolis"]
33
+ assert_equal "appleton.craigslist.org", geo_listing_us070209.sites["appleton-oshkosh-FDL"]
34
+ assert_equal "asheville.craigslist.org", geo_listing_us070209.sites["asheville"]
35
+ assert_equal "athensga.craigslist.org", geo_listing_us070209.sites["athens, GA"]
36
+ assert_equal "athensohio.craigslist.org", geo_listing_us070209.sites["athens, OH"]
37
+ assert_equal "atlanta.craigslist.org", geo_listing_us070209.sites["atlanta"]
38
+ assert_equal "auburn.craigslist.org", geo_listing_us070209.sites["auburn"]
39
+ assert_equal "augusta.craigslist.org", geo_listing_us070209.sites["augusta"]
40
+ assert_equal "austin.craigslist.org", geo_listing_us070209.sites["austin"]
41
+ assert_equal "bakersfield.craigslist.org", geo_listing_us070209.sites["bakersfield"]
42
+ assert_equal "baltimore.craigslist.org", geo_listing_us070209.sites["baltimore"]
43
+ assert_equal "batonrouge.craigslist.org", geo_listing_us070209.sites["baton rouge"]
44
+ assert_equal "beaumont.craigslist.org", geo_listing_us070209.sites["beaumont / port arthur"]
45
+ assert_equal "bellingham.craigslist.org", geo_listing_us070209.sites["bellingham"]
46
+ assert_equal "bend.craigslist.org", geo_listing_us070209.sites["bend"]
47
+ assert_equal "binghamton.craigslist.org", geo_listing_us070209.sites["binghamton"]
48
+ assert_equal "bham.craigslist.org", geo_listing_us070209.sites["birmingham, AL"]
49
+ assert_equal "blacksburg.craigslist.org", geo_listing_us070209.sites["blacksburg"]
50
+ assert_equal "bloomington.craigslist.org", geo_listing_us070209.sites["bloomington"]
51
+ assert_equal "bn.craigslist.org", geo_listing_us070209.sites["bloomington-normal"]
52
+ assert_equal "boise.craigslist.org", geo_listing_us070209.sites["boise"]
53
+ assert_equal "boone.craigslist.org", geo_listing_us070209.sites["boone"]
54
+ assert_equal "boston.craigslist.org", geo_listing_us070209.sites["boston"]
55
+ assert_equal "boulder.craigslist.org", geo_listing_us070209.sites["boulder"]
56
+ assert_equal "bgky.craigslist.org", geo_listing_us070209.sites["bowling green"]
57
+ assert_equal "brownsville.craigslist.org", geo_listing_us070209.sites["brownsville"]
58
+ assert_equal "brunswick.craigslist.org", geo_listing_us070209.sites["brunswick, GA"]
59
+ assert_equal "buffalo.craigslist.org", geo_listing_us070209.sites["buffalo"]
60
+ assert_equal "capecod.craigslist.org", geo_listing_us070209.sites["cape cod / islands"]
61
+ assert_equal "carbondale.craigslist.org", geo_listing_us070209.sites["carbondale"]
62
+ assert_equal "catskills.craigslist.org", geo_listing_us070209.sites["catskills"]
63
+ assert_equal "cedarrapids.craigslist.org", geo_listing_us070209.sites["cedar rapids"]
64
+ assert_equal "cnj.craigslist.org", geo_listing_us070209.sites["central NJ"]
65
+ assert_equal "centralmich.craigslist.org", geo_listing_us070209.sites["central michigan"]
66
+ assert_equal "chambana.craigslist.org", geo_listing_us070209.sites["champaign urbana"]
67
+ assert_equal "charleston.craigslist.org", geo_listing_us070209.sites["charleston, SC"]
68
+ assert_equal "charlestonwv.craigslist.org", geo_listing_us070209.sites["charleston, WV"]
69
+ assert_equal "charlotte.craigslist.org", geo_listing_us070209.sites["charlotte"]
70
+ assert_equal "charlottesville.craigslist.org", geo_listing_us070209.sites["charlottesville"]
71
+ assert_equal "chattanooga.craigslist.org", geo_listing_us070209.sites["chattanooga"]
72
+ assert_equal "chautauqua.craigslist.org", geo_listing_us070209.sites["chautauqua"]
73
+ assert_equal "chicago.craigslist.org", geo_listing_us070209.sites["chicago"]
74
+ assert_equal "chico.craigslist.org", geo_listing_us070209.sites["chico"]
75
+ assert_equal "cincinnati.craigslist.org", geo_listing_us070209.sites["cincinnati, OH"]
76
+ assert_equal "clarksville.craigslist.org", geo_listing_us070209.sites["clarksville, TN"]
77
+ assert_equal "cleveland.craigslist.org", geo_listing_us070209.sites["cleveland"]
78
+ assert_equal "collegestation.craigslist.org", geo_listing_us070209.sites["college station"]
79
+ assert_equal "cosprings.craigslist.org", geo_listing_us070209.sites["colorado springs"]
80
+ assert_equal "columbiamo.craigslist.org", geo_listing_us070209.sites["columbia / jeff city"]
81
+ assert_equal "columbia.craigslist.org", geo_listing_us070209.sites["columbia, SC"]
82
+ assert_equal "columbus.craigslist.org", geo_listing_us070209.sites["columbus"]
83
+ assert_equal "columbusga.craigslist.org", geo_listing_us070209.sites["columbus, GA"]
84
+ assert_equal "corpuschristi.craigslist.org", geo_listing_us070209.sites["corpus christi"]
85
+ assert_equal "corvallis.craigslist.org", geo_listing_us070209.sites["corvallis/albany"]
86
+ assert_equal "dallas.craigslist.org", geo_listing_us070209.sites["dallas / fort worth"]
87
+ assert_equal "danville.craigslist.org", geo_listing_us070209.sites["danville"]
88
+ assert_equal "dayton.craigslist.org", geo_listing_us070209.sites["dayton / springfield"]
89
+ assert_equal "daytona.craigslist.org", geo_listing_us070209.sites["daytona beach"]
90
+ assert_equal "decatur.craigslist.org", geo_listing_us070209.sites["decatur, IL"]
91
+ assert_equal "delaware.craigslist.org", geo_listing_us070209.sites["delaware"]
92
+ assert_equal "denver.craigslist.org", geo_listing_us070209.sites["denver"]
93
+ assert_equal "desmoines.craigslist.org", geo_listing_us070209.sites["des moines"]
94
+ assert_equal "detroit.craigslist.org", geo_listing_us070209.sites["detroit metro"]
95
+ assert_equal "dothan.craigslist.org", geo_listing_us070209.sites["dothan, AL"]
96
+ assert_equal "dubuque.craigslist.org", geo_listing_us070209.sites["dubuque"]
97
+ assert_equal "duluth.craigslist.org", geo_listing_us070209.sites["duluth / superior"]
98
+ assert_equal "eastidaho.craigslist.org", geo_listing_us070209.sites["east idaho"]
99
+ assert_equal "eastoregon.craigslist.org", geo_listing_us070209.sites["east oregon"]
100
+ assert_equal "newlondon.craigslist.org", geo_listing_us070209.sites["eastern CT"]
101
+ assert_equal "eastnc.craigslist.org", geo_listing_us070209.sites["eastern NC"]
102
+ assert_equal "easternshore.craigslist.org", geo_listing_us070209.sites["eastern shore"]
103
+ assert_equal "eauclaire.craigslist.org", geo_listing_us070209.sites["eau claire"]
104
+ assert_equal "elpaso.craigslist.org", geo_listing_us070209.sites["el paso"]
105
+ assert_equal "elmira.craigslist.org", geo_listing_us070209.sites["elmira-corning"]
106
+ assert_equal "erie.craigslist.org", geo_listing_us070209.sites["erie, PA"]
107
+ assert_equal "eugene.craigslist.org", geo_listing_us070209.sites["eugene"]
108
+ assert_equal "evansville.craigslist.org", geo_listing_us070209.sites["evansville"]
109
+ assert_equal "fargo.craigslist.org", geo_listing_us070209.sites["fargo / moorhead"]
110
+ assert_equal "farmington.craigslist.org", geo_listing_us070209.sites["farmington, NM"]
111
+ assert_equal "fayetteville.craigslist.org", geo_listing_us070209.sites["fayetteville"]
112
+ assert_equal "fayar.craigslist.org", geo_listing_us070209.sites["fayetteville, AR"]
113
+ assert_equal "flagstaff.craigslist.org", geo_listing_us070209.sites["flagstaff / sedona"]
114
+ assert_equal "flint.craigslist.org", geo_listing_us070209.sites["flint"]
115
+ assert_equal "shoals.craigslist.org", geo_listing_us070209.sites["florence / muscle shoals"]
116
+ assert_equal "florencesc.craigslist.org", geo_listing_us070209.sites["florence, SC"]
117
+ assert_equal "keys.craigslist.org", geo_listing_us070209.sites["florida keys"]
118
+ assert_equal "fortcollins.craigslist.org", geo_listing_us070209.sites["fort collins / north CO"]
119
+ assert_equal "fortsmith.craigslist.org", geo_listing_us070209.sites["fort smith, AR"]
120
+ assert_equal "fortwayne.craigslist.org", geo_listing_us070209.sites["fort wayne"]
121
+ assert_equal "fredericksburg.craigslist.org", geo_listing_us070209.sites["fredericksburg"]
122
+ assert_equal "fresno.craigslist.org", geo_listing_us070209.sites["fresno"]
123
+ assert_equal "fortmyers.craigslist.org", geo_listing_us070209.sites["ft myers / SW florida"]
124
+ assert_equal "gadsden.craigslist.org", geo_listing_us070209.sites["gadsden-anniston"]
125
+ assert_equal "gainesville.craigslist.org", geo_listing_us070209.sites["gainesville"]
126
+ assert_equal "galveston.craigslist.org", geo_listing_us070209.sites["galveston"]
127
+ assert_equal "goldcountry.craigslist.org", geo_listing_us070209.sites["gold country"]
128
+ assert_equal "grandisland.craigslist.org", geo_listing_us070209.sites["grand island"]
129
+ assert_equal "grandrapids.craigslist.org", geo_listing_us070209.sites["grand rapids"]
130
+ assert_equal "greenbay.craigslist.org", geo_listing_us070209.sites["green bay"]
131
+ assert_equal "greensboro.craigslist.org", geo_listing_us070209.sites["greensboro"]
132
+ assert_equal "greenville.craigslist.org", geo_listing_us070209.sites["greenville / upstate"]
133
+ assert_equal "gulfport.craigslist.org", geo_listing_us070209.sites["gulfport / biloxi"]
134
+ assert_equal "norfolk.craigslist.org", geo_listing_us070209.sites["hampton roads"]
135
+ assert_equal "harrisburg.craigslist.org", geo_listing_us070209.sites["harrisburg"]
136
+ assert_equal "harrisonburg.craigslist.org", geo_listing_us070209.sites["harrisonburg"]
137
+ assert_equal "hartford.craigslist.org", geo_listing_us070209.sites["hartford"]
138
+ assert_equal "hattiesburg.craigslist.org", geo_listing_us070209.sites["hattiesburg"]
139
+ assert_equal "honolulu.craigslist.org", geo_listing_us070209.sites["hawaii"]
140
+ assert_equal "hickory.craigslist.org", geo_listing_us070209.sites["hickory / lenoir"]
141
+ assert_equal "hiltonhead.craigslist.org", geo_listing_us070209.sites["hilton head"]
142
+ assert_equal "houston.craigslist.org", geo_listing_us070209.sites["houston"]
143
+ assert_equal "hudsonvalley.craigslist.org", geo_listing_us070209.sites["hudson valley"]
144
+ assert_equal "humboldt.craigslist.org", geo_listing_us070209.sites["humboldt county"]
145
+ assert_equal "huntington.craigslist.org", geo_listing_us070209.sites["huntington-ashland"]
146
+ assert_equal "huntsville.craigslist.org", geo_listing_us070209.sites["huntsville"]
147
+ assert_equal "imperial.craigslist.org", geo_listing_us070209.sites["imperial county"]
148
+ assert_equal "indianapolis.craigslist.org", geo_listing_us070209.sites["indianapolis"]
149
+ assert_equal "inlandempire.craigslist.org", geo_listing_us070209.sites["inland empire"]
150
+ assert_equal "iowacity.craigslist.org", geo_listing_us070209.sites["iowa city"]
151
+ assert_equal "ithaca.craigslist.org", geo_listing_us070209.sites["ithaca"]
152
+ assert_equal "jxn.craigslist.org", geo_listing_us070209.sites["jackson, MI"]
153
+ assert_equal "jackson.craigslist.org", geo_listing_us070209.sites["jackson, MS"]
154
+ assert_equal "jacksontn.craigslist.org", geo_listing_us070209.sites["jackson, TN"]
155
+ assert_equal "jacksonville.craigslist.org", geo_listing_us070209.sites["jacksonville"]
156
+ assert_equal "janesville.craigslist.org", geo_listing_us070209.sites["janesville"]
157
+ assert_equal "jerseyshore.craigslist.org", geo_listing_us070209.sites["jersey shore"]
158
+ assert_equal "jonesboro.craigslist.org", geo_listing_us070209.sites["jonesboro"]
159
+ assert_equal "joplin.craigslist.org", geo_listing_us070209.sites["joplin"]
160
+ assert_equal "kalamazoo.craigslist.org", geo_listing_us070209.sites["kalamazoo"]
161
+ assert_equal "kansascity.craigslist.org", geo_listing_us070209.sites["kansas city, MO"]
162
+ assert_equal "kpr.craigslist.org", geo_listing_us070209.sites["kennewick-pasco-richland"]
163
+ assert_equal "racine.craigslist.org", geo_listing_us070209.sites["kenosha-racine"]
164
+ assert_equal "killeen.craigslist.org", geo_listing_us070209.sites["killeen / temple / ft hood"]
165
+ assert_equal "knoxville.craigslist.org", geo_listing_us070209.sites["knoxville"]
166
+ assert_equal "lacrosse.craigslist.org", geo_listing_us070209.sites["la crosse"]
167
+ assert_equal "lafayette.craigslist.org", geo_listing_us070209.sites["lafayette"]
168
+ assert_equal "tippecanoe.craigslist.org", geo_listing_us070209.sites["lafayette / west lafayette"]
169
+ assert_equal "lakecharles.craigslist.org", geo_listing_us070209.sites["lake charles"]
170
+ assert_equal "lakeland.craigslist.org", geo_listing_us070209.sites["lakeland"]
171
+ assert_equal "lancaster.craigslist.org", geo_listing_us070209.sites["lancaster, PA"]
172
+ assert_equal "lansing.craigslist.org", geo_listing_us070209.sites["lansing"]
173
+ assert_equal "laredo.craigslist.org", geo_listing_us070209.sites["laredo"]
174
+ assert_equal "lascruces.craigslist.org", geo_listing_us070209.sites["las cruces"]
175
+ assert_equal "lasvegas.craigslist.org", geo_listing_us070209.sites["las vegas"]
176
+ assert_equal "lawrence.craigslist.org", geo_listing_us070209.sites["lawrence"]
177
+ assert_equal "lawton.craigslist.org", geo_listing_us070209.sites["lawton"]
178
+ assert_equal "allentown.craigslist.org", geo_listing_us070209.sites["lehigh valley"]
179
+ assert_equal "lexington.craigslist.org", geo_listing_us070209.sites["lexington, KY"]
180
+ assert_equal "limaohio.craigslist.org", geo_listing_us070209.sites["lima / findlay"]
181
+ assert_equal "lincoln.craigslist.org", geo_listing_us070209.sites["lincoln"]
182
+ assert_equal "littlerock.craigslist.org", geo_listing_us070209.sites["little rock"]
183
+ assert_equal "logan.craigslist.org", geo_listing_us070209.sites["logan"]
184
+ assert_equal "longisland.craigslist.org", geo_listing_us070209.sites["long island"]
185
+ assert_equal "losangeles.craigslist.org", geo_listing_us070209.sites["los angeles"]
186
+ assert_equal "louisville.craigslist.org", geo_listing_us070209.sites["louisville"]
187
+ assert_equal "lubbock.craigslist.org", geo_listing_us070209.sites["lubbock"]
188
+ assert_equal "lynchburg.craigslist.org", geo_listing_us070209.sites["lynchburg"]
189
+ assert_equal "macon.craigslist.org", geo_listing_us070209.sites["macon"]
190
+ assert_equal "madison.craigslist.org", geo_listing_us070209.sites["madison"]
191
+ assert_equal "maine.craigslist.org", geo_listing_us070209.sites["maine"]
192
+ assert_equal "ksu.craigslist.org", geo_listing_us070209.sites["manhattan, KS"]
193
+ assert_equal "mankato.craigslist.org", geo_listing_us070209.sites["mankato"]
194
+ assert_equal "mansfield.craigslist.org", geo_listing_us070209.sites["mansfield"]
195
+ assert_equal "martinsburg.craigslist.org", geo_listing_us070209.sites["martinsburg"]
196
+ assert_equal "mcallen.craigslist.org", geo_listing_us070209.sites["mcallen / edinburg"]
197
+ assert_equal "medford.craigslist.org", geo_listing_us070209.sites["medford-ashland-klamath"]
198
+ assert_equal "memphis.craigslist.org", geo_listing_us070209.sites["memphis, TN"]
199
+ assert_equal "mendocino.craigslist.org", geo_listing_us070209.sites["mendocino county"]
200
+ assert_equal "merced.craigslist.org", geo_listing_us070209.sites["merced"]
201
+ assert_equal "milwaukee.craigslist.org", geo_listing_us070209.sites["milwaukee"]
202
+ assert_equal "minneapolis.craigslist.org", geo_listing_us070209.sites["minneapolis / st paul"]
203
+ assert_equal "mobile.craigslist.org", geo_listing_us070209.sites["mobile"]
204
+ assert_equal "modesto.craigslist.org", geo_listing_us070209.sites["modesto"]
205
+ assert_equal "mohave.craigslist.org", geo_listing_us070209.sites["mohave county"]
206
+ assert_equal "monroe.craigslist.org", geo_listing_us070209.sites["monroe, LA"]
207
+ assert_equal "montana.craigslist.org", geo_listing_us070209.sites["montana"]
208
+ assert_equal "monterey.craigslist.org", geo_listing_us070209.sites["monterey bay"]
209
+ assert_equal "montgomery.craigslist.org", geo_listing_us070209.sites["montgomery"]
210
+ assert_equal "morgantown.craigslist.org", geo_listing_us070209.sites["morgantown"]
211
+ assert_equal "muncie.craigslist.org", geo_listing_us070209.sites["muncie / anderson"]
212
+ assert_equal "muskegon.craigslist.org", geo_listing_us070209.sites["muskegon"]
213
+ assert_equal "myrtlebeach.craigslist.org", geo_listing_us070209.sites["myrtle beach"]
214
+ assert_equal "nashville.craigslist.org", geo_listing_us070209.sites["nashville"]
215
+ assert_equal "nh.craigslist.org", geo_listing_us070209.sites["new hampshire"]
216
+ assert_equal "newhaven.craigslist.org", geo_listing_us070209.sites["new haven"]
217
+ assert_equal "neworleans.craigslist.org", geo_listing_us070209.sites["new orleans"]
218
+ assert_equal "newyork.craigslist.org", geo_listing_us070209.sites["new york city"]
219
+ assert_equal "nd.craigslist.org", geo_listing_us070209.sites["north dakota"]
220
+ assert_equal "newjersey.craigslist.org", geo_listing_us070209.sites["north jersey"]
221
+ assert_equal "northmiss.craigslist.org", geo_listing_us070209.sites["north mississippi"]
222
+ assert_equal "nmi.craigslist.org", geo_listing_us070209.sites["northern michigan"]
223
+ assert_equal "nwct.craigslist.org", geo_listing_us070209.sites["northwest CT"]
224
+ assert_equal "ocala.craigslist.org", geo_listing_us070209.sites["ocala"]
225
+ assert_equal "odessa.craigslist.org", geo_listing_us070209.sites["odessa / midland"]
226
+ assert_equal "ogden.craigslist.org", geo_listing_us070209.sites["ogden-clearfield"]
227
+ assert_equal "oklahomacity.craigslist.org", geo_listing_us070209.sites["oklahoma city"]
228
+ assert_equal "olympic.craigslist.org", geo_listing_us070209.sites["olympic peninsula"]
229
+ assert_equal "omaha.craigslist.org", geo_listing_us070209.sites["omaha / council bluffs"]
230
+ assert_equal "orangecounty.craigslist.org", geo_listing_us070209.sites["orange county"]
231
+ assert_equal "oregoncoast.craigslist.org", geo_listing_us070209.sites["oregon coast"]
232
+ assert_equal "orlando.craigslist.org", geo_listing_us070209.sites["orlando"]
233
+ assert_equal "outerbanks.craigslist.org", geo_listing_us070209.sites["outer banks"]
234
+ assert_equal "palmsprings.craigslist.org", geo_listing_us070209.sites["palm springs, CA"]
235
+ assert_equal "panamacity.craigslist.org", geo_listing_us070209.sites["panama city, FL"]
236
+ assert_equal "parkersburg.craigslist.org", geo_listing_us070209.sites["parkersburg-marietta"]
237
+ assert_equal "pensacola.craigslist.org", geo_listing_us070209.sites["pensacola / panhandle"]
238
+ assert_equal "peoria.craigslist.org", geo_listing_us070209.sites["peoria"]
239
+ assert_equal "philadelphia.craigslist.org", geo_listing_us070209.sites["philadelphia"]
240
+ assert_equal "phoenix.craigslist.org", geo_listing_us070209.sites["phoenix"]
241
+ assert_equal "pittsburgh.craigslist.org", geo_listing_us070209.sites["pittsburgh"]
242
+ assert_equal "plattsburgh.craigslist.org", geo_listing_us070209.sites["plattsburgh-adirondacks"]
243
+ assert_equal "poconos.craigslist.org", geo_listing_us070209.sites["poconos"]
244
+ assert_equal "porthuron.craigslist.org", geo_listing_us070209.sites["port huron"]
245
+ assert_equal "portland.craigslist.org", geo_listing_us070209.sites["portland, OR"]
246
+ assert_equal "prescott.craigslist.org", geo_listing_us070209.sites["prescott"]
247
+ assert_equal "provo.craigslist.org", geo_listing_us070209.sites["provo / orem"]
248
+ assert_equal "pueblo.craigslist.org", geo_listing_us070209.sites["pueblo"]
249
+ assert_equal "pullman.craigslist.org", geo_listing_us070209.sites["pullman / moscow"]
250
+ assert_equal "quadcities.craigslist.org", geo_listing_us070209.sites["quad cities, IA/IL"]
251
+ assert_equal "raleigh.craigslist.org", geo_listing_us070209.sites["raleigh / durham / CH"]
252
+ assert_equal "reading.craigslist.org", geo_listing_us070209.sites["reading"]
253
+ assert_equal "redding.craigslist.org", geo_listing_us070209.sites["redding"]
254
+ assert_equal "reno.craigslist.org", geo_listing_us070209.sites["reno / tahoe"]
255
+ assert_equal "providence.craigslist.org", geo_listing_us070209.sites["rhode island"]
256
+ assert_equal "richmond.craigslist.org", geo_listing_us070209.sites["richmond"]
257
+ assert_equal "roanoke.craigslist.org", geo_listing_us070209.sites["roanoke"]
258
+ assert_equal "rmn.craigslist.org", geo_listing_us070209.sites["rochester, MN"]
259
+ assert_equal "rochester.craigslist.org", geo_listing_us070209.sites["rochester, NY"]
260
+ assert_equal "rockford.craigslist.org", geo_listing_us070209.sites["rockford"]
261
+ assert_equal "rockies.craigslist.org", geo_listing_us070209.sites["rocky mountains"]
262
+ assert_equal "roseburg.craigslist.org", geo_listing_us070209.sites["roseburg"]
263
+ assert_equal "roswell.craigslist.org", geo_listing_us070209.sites["roswell / carlsbad"]
264
+ assert_equal "sacramento.craigslist.org", geo_listing_us070209.sites["sacramento"]
265
+ assert_equal "saginaw.craigslist.org", geo_listing_us070209.sites["saginaw-midland-baycity"]
266
+ assert_equal "salem.craigslist.org", geo_listing_us070209.sites["salem, OR"]
267
+ assert_equal "saltlakecity.craigslist.org", geo_listing_us070209.sites["salt lake city"]
268
+ assert_equal "sanantonio.craigslist.org", geo_listing_us070209.sites["san antonio"]
269
+ assert_equal "sandiego.craigslist.org", geo_listing_us070209.sites["san diego"]
270
+ assert_equal "slo.craigslist.org", geo_listing_us070209.sites["san luis obispo"]
271
+ assert_equal "sanmarcos.craigslist.org", geo_listing_us070209.sites["san marcos"]
272
+ assert_equal "sandusky.craigslist.org", geo_listing_us070209.sites["sandusky"]
273
+ assert_equal "santabarbara.craigslist.org", geo_listing_us070209.sites["santa barbara"]
274
+ assert_equal "santafe.craigslist.org", geo_listing_us070209.sites["santa fe / taos"]
275
+ assert_equal "sarasota.craigslist.org", geo_listing_us070209.sites["sarasota-bradenton"]
276
+ assert_equal "savannah.craigslist.org", geo_listing_us070209.sites["savannah"]
277
+ assert_equal "scranton.craigslist.org", geo_listing_us070209.sites["scranton / wilkes-barre"]
278
+ assert_equal "seattle.craigslist.org", geo_listing_us070209.sites["seattle-tacoma"]
279
+ assert_equal "sheboygan.craigslist.org", geo_listing_us070209.sites["sheboygan, WI"]
280
+ assert_equal "shreveport.craigslist.org", geo_listing_us070209.sites["shreveport"]
281
+ assert_equal "sierravista.craigslist.org", geo_listing_us070209.sites["sierra vista"]
282
+ assert_equal "siouxcity.craigslist.org", geo_listing_us070209.sites["sioux city, IA"]
283
+ assert_equal "skagit.craigslist.org", geo_listing_us070209.sites["skagit / island / SJI"]
284
+ assert_equal "southbend.craigslist.org", geo_listing_us070209.sites["south bend / michiana"]
285
+ assert_equal "southcoast.craigslist.org", geo_listing_us070209.sites["south coast"]
286
+ assert_equal "sd.craigslist.org", geo_listing_us070209.sites["south dakota"]
287
+ assert_equal "miami.craigslist.org", geo_listing_us070209.sites["south florida"]
288
+ assert_equal "southjersey.craigslist.org", geo_listing_us070209.sites["south jersey"]
289
+ assert_equal "semo.craigslist.org", geo_listing_us070209.sites["southeast missouri"]
290
+ assert_equal "smd.craigslist.org", geo_listing_us070209.sites["southern maryland"]
291
+ assert_equal "swmi.craigslist.org", geo_listing_us070209.sites["southwest michigan"]
292
+ assert_equal "spacecoast.craigslist.org", geo_listing_us070209.sites["space coast"]
293
+ assert_equal "spokane.craigslist.org", geo_listing_us070209.sites["spokane / coeur d'alene"]
294
+ assert_equal "springfieldil.craigslist.org", geo_listing_us070209.sites["springfield, IL"]
295
+ assert_equal "springfield.craigslist.org", geo_listing_us070209.sites["springfield, MO"]
296
+ assert_equal "staugustine.craigslist.org", geo_listing_us070209.sites["st augustine"]
297
+ assert_equal "stcloud.craigslist.org", geo_listing_us070209.sites["st cloud"]
298
+ assert_equal "stgeorge.craigslist.org", geo_listing_us070209.sites["st george"]
299
+ assert_equal "stlouis.craigslist.org", geo_listing_us070209.sites["st louis, MO"]
300
+ assert_equal "pennstate.craigslist.org", geo_listing_us070209.sites["state college"]
301
+ assert_equal "stillwater.craigslist.org", geo_listing_us070209.sites["stillwater"]
302
+ assert_equal "stockton.craigslist.org", geo_listing_us070209.sites["stockton"]
303
+ assert_equal "syracuse.craigslist.org", geo_listing_us070209.sites["syracuse"]
304
+ assert_equal "tallahassee.craigslist.org", geo_listing_us070209.sites["tallahassee"]
305
+ assert_equal "tampa.craigslist.org", geo_listing_us070209.sites["tampa bay area"]
306
+ assert_equal "terrahaute.craigslist.org", geo_listing_us070209.sites["terre haute"]
307
+ assert_equal "texarkana.craigslist.org", geo_listing_us070209.sites["texarkana"]
308
+ assert_equal "toledo.craigslist.org", geo_listing_us070209.sites["toledo"]
309
+ assert_equal "topeka.craigslist.org", geo_listing_us070209.sites["topeka"]
310
+ assert_equal "treasure.craigslist.org", geo_listing_us070209.sites["treasure coast"]
311
+ assert_equal "tricities.craigslist.org", geo_listing_us070209.sites["tri-cities, TN"]
312
+ assert_equal "tucson.craigslist.org", geo_listing_us070209.sites["tucson"]
313
+ assert_equal "tulsa.craigslist.org", geo_listing_us070209.sites["tulsa"]
314
+ assert_equal "tuscaloosa.craigslist.org", geo_listing_us070209.sites["tuscaloosa"]
315
+ assert_equal "twinfalls.craigslist.org", geo_listing_us070209.sites["twin falls"]
316
+ assert_equal "easttexas.craigslist.org", geo_listing_us070209.sites["tyler / east TX"]
317
+ assert_equal "up.craigslist.org", geo_listing_us070209.sites["upper peninsula"]
318
+ assert_equal "utica.craigslist.org", geo_listing_us070209.sites["utica"]
319
+ assert_equal "valdosta.craigslist.org", geo_listing_us070209.sites["valdosta"]
320
+ assert_equal "ventura.craigslist.org", geo_listing_us070209.sites["ventura county"]
321
+ assert_equal "burlington.craigslist.org", geo_listing_us070209.sites["vermont"]
322
+ assert_equal "victoriatx.craigslist.org", geo_listing_us070209.sites["victoria, TX"]
323
+ assert_equal "visalia.craigslist.org", geo_listing_us070209.sites["visalia-tulare"]
324
+ assert_equal "waco.craigslist.org", geo_listing_us070209.sites["waco"]
325
+ assert_equal "washingtondc.craigslist.org", geo_listing_us070209.sites["washington, DC"]
326
+ assert_equal "waterloo.craigslist.org", geo_listing_us070209.sites["waterloo / cedar falls"]
327
+ assert_equal "watertown.craigslist.org", geo_listing_us070209.sites["watertown"]
328
+ assert_equal "wausau.craigslist.org", geo_listing_us070209.sites["wausau"]
329
+ assert_equal "wenatchee.craigslist.org", geo_listing_us070209.sites["wenatchee"]
330
+ assert_equal "wv.craigslist.org", geo_listing_us070209.sites["west virginia (old)"]
331
+ assert_equal "westky.craigslist.org", geo_listing_us070209.sites["western KY"]
332
+ assert_equal "westmd.craigslist.org", geo_listing_us070209.sites["western maryland"]
333
+ assert_equal "westernmass.craigslist.org", geo_listing_us070209.sites["western massachusetts"]
334
+ assert_equal "westslope.craigslist.org", geo_listing_us070209.sites["western slope"]
335
+ assert_equal "wheeling.craigslist.org", geo_listing_us070209.sites["wheeling, WV"]
336
+ assert_equal "wichita.craigslist.org", geo_listing_us070209.sites["wichita"]
337
+ assert_equal "wichitafalls.craigslist.org", geo_listing_us070209.sites["wichita falls"]
338
+ assert_equal "williamsport.craigslist.org", geo_listing_us070209.sites["williamsport"]
339
+ assert_equal "wilmington.craigslist.org", geo_listing_us070209.sites["wilmington, NC"]
340
+ assert_equal "winstonsalem.craigslist.org", geo_listing_us070209.sites["winston-salem"]
341
+ assert_equal "worcester.craigslist.org", geo_listing_us070209.sites["worcester / central MA"]
342
+ assert_equal "wyoming.craigslist.org", geo_listing_us070209.sites["wyoming"]
343
+ assert_equal "yakima.craigslist.org", geo_listing_us070209.sites["yakima"]
344
+ assert_equal "york.craigslist.org", geo_listing_us070209.sites["york, PA"]
345
+ assert_equal "youngstown.craigslist.org", geo_listing_us070209.sites["youngstown"]
346
+ assert_equal "yubasutter.craigslist.org", geo_listing_us070209.sites["yuba-sutter"]
347
+ assert_equal "yuma.craigslist.org", geo_listing_us070209.sites["yuma"]
348
+
349
+ geo_listing_cn070209 = CraigScrape::GeoListings.new relative_uri_for(
350
+ 'geolisting_samples/geo_listing_cn070209.html'
351
+ )
352
+ assert_equal "china", geo_listing_cn070209.location
353
+ assert_equal 6, geo_listing_cn070209.sites.length
354
+ assert_equal "beijing.craigslist.com.cn", geo_listing_cn070209.sites["beijing"]
355
+ assert_equal "guangzhou.craigslist.com.cn", geo_listing_cn070209.sites["guangzhou"]
356
+ assert_equal "hangzhou.craigslist.org", geo_listing_cn070209.sites["hangzhou"]
357
+ assert_equal "hongkong.craigslist.org", geo_listing_cn070209.sites["hong kong"]
358
+ assert_equal "shanghai.craigslist.com.cn", geo_listing_cn070209.sites["shanghai"]
359
+ assert_equal "shenzhen.craigslist.org", geo_listing_cn070209.sites["shenzhen"]
360
+
361
+ geo_listing_ca070209 = CraigScrape::GeoListings.new relative_uri_for(
362
+ 'geolisting_samples/geo_listing_ca070209.html'
363
+ )
364
+ assert_equal "canada", geo_listing_ca070209.location
365
+ assert_equal 47, geo_listing_ca070209.sites.length
366
+ assert_equal "barrie.craigslist.ca", geo_listing_ca070209.sites["barrie"]
367
+ assert_equal "belleville.craigslist.ca", geo_listing_ca070209.sites["belleville, ON"]
368
+ assert_equal "calgary.craigslist.ca", geo_listing_ca070209.sites["calgary"]
369
+ assert_equal "chatham.craigslist.ca", geo_listing_ca070209.sites["chatham-kent"]
370
+ assert_equal "comoxvalley.craigslist.ca", geo_listing_ca070209.sites["comox valley"]
371
+ assert_equal "cornwall.craigslist.ca", geo_listing_ca070209.sites["cornwall, ON"]
372
+ assert_equal "cranbrook.craigslist.ca", geo_listing_ca070209.sites["cranbrook, BC"]
373
+ assert_equal "edmonton.craigslist.ca", geo_listing_ca070209.sites["edmonton"]
374
+ assert_equal "abbotsford.craigslist.ca", geo_listing_ca070209.sites["fraser valley"]
375
+ assert_equal "ftmcmurray.craigslist.ca", geo_listing_ca070209.sites["ft mcmurray"]
376
+ assert_equal "guelph.craigslist.ca", geo_listing_ca070209.sites["guelph"]
377
+ assert_equal "halifax.craigslist.ca", geo_listing_ca070209.sites["halifax"]
378
+ assert_equal "hamilton.craigslist.ca", geo_listing_ca070209.sites["hamilton-burlington"]
379
+ assert_equal "kamloops.craigslist.ca", geo_listing_ca070209.sites["kamloops"]
380
+ assert_equal "kelowna.craigslist.ca", geo_listing_ca070209.sites["kelowna"]
381
+ assert_equal "kingston.craigslist.ca", geo_listing_ca070209.sites["kingston, ON"]
382
+ assert_equal "kitchener.craigslist.ca", geo_listing_ca070209.sites["kitchener-waterloo-cambridge"]
383
+ assert_equal "lethbridge.craigslist.ca", geo_listing_ca070209.sites["lethbridge"]
384
+ assert_equal "londonon.craigslist.ca", geo_listing_ca070209.sites["london, ON"]
385
+ assert_equal "montreal.craigslist.ca", geo_listing_ca070209.sites["montreal"]
386
+ assert_equal "nanaimo.craigslist.ca", geo_listing_ca070209.sites["nanaimo"]
387
+ assert_equal "newbrunswick.craigslist.ca", geo_listing_ca070209.sites["new brunswick"]
388
+ assert_equal "newfoundland.craigslist.ca", geo_listing_ca070209.sites["newfoundland / labrador"]
389
+ assert_equal "niagara.craigslist.ca", geo_listing_ca070209.sites["niagara region"]
390
+ assert_equal "ottawa.craigslist.ca", geo_listing_ca070209.sites["ottawa-hull-gatineau"]
391
+ assert_equal "owensound.craigslist.ca", geo_listing_ca070209.sites["owen sound"]
392
+ assert_equal "peterborough.craigslist.ca", geo_listing_ca070209.sites["peterborough"]
393
+ assert_equal "pei.craigslist.ca", geo_listing_ca070209.sites["prince edward island"]
394
+ assert_equal "princegeorge.craigslist.ca", geo_listing_ca070209.sites["prince george"]
395
+ assert_equal "quebec.craigslist.ca", geo_listing_ca070209.sites["quebec city"]
396
+ assert_equal "reddeer.craigslist.ca", geo_listing_ca070209.sites["red deer"]
397
+ assert_equal "regina.craigslist.ca", geo_listing_ca070209.sites["regina"]
398
+ assert_equal "saguenay.craigslist.ca", geo_listing_ca070209.sites["saguenay"]
399
+ assert_equal "sarnia.craigslist.ca", geo_listing_ca070209.sites["sarnia"]
400
+ assert_equal "saskatoon.craigslist.ca", geo_listing_ca070209.sites["saskatoon"]
401
+ assert_equal "soo.craigslist.ca", geo_listing_ca070209.sites["sault ste marie, ON"]
402
+ assert_equal "sherbrooke.craigslist.ca", geo_listing_ca070209.sites["sherbrooke"]
403
+ assert_equal "sudbury.craigslist.ca", geo_listing_ca070209.sites["sudbury"]
404
+ assert_equal "territories.craigslist.ca", geo_listing_ca070209.sites["territories"]
405
+ assert_equal "thunderbay.craigslist.ca", geo_listing_ca070209.sites["thunder bay"]
406
+ assert_equal "toronto.craigslist.ca", geo_listing_ca070209.sites["toronto"]
407
+ assert_equal "troisrivieres.craigslist.ca", geo_listing_ca070209.sites["trois-rivieres"]
408
+ assert_equal "vancouver.craigslist.ca", geo_listing_ca070209.sites["vancouver, BC"]
409
+ assert_equal "victoria.craigslist.ca", geo_listing_ca070209.sites["victoria"]
410
+ assert_equal "whistler.craigslist.ca", geo_listing_ca070209.sites["whistler, BC"]
411
+ assert_equal "windsor.craigslist.ca", geo_listing_ca070209.sites["windsor"]
412
+ assert_equal "winnipeg.craigslist.ca", geo_listing_ca070209.sites["winnipeg"]
413
+
414
+ geo_listing_ca_sk07020 = CraigScrape::GeoListings.new relative_uri_for(
415
+ 'geolisting_samples/geo_listing_ca_sk070209.html'
416
+ )
417
+ assert_equal "canada", geo_listing_ca_sk07020.location
418
+ assert_equal(
419
+ { "saskatoon" => "saskatoon.craigslist.ca", "regina" => "regina.craigslist.ca" },
420
+ geo_listing_ca_sk07020.sites
421
+ )
422
+ end
423
+
424
+ def test_sites_in_path
425
+ # This was really tough to test, and in the end, I don't know just how useful this really is...
426
+ hier_dir = relative_uri_for 'geolisting_samples/hierarchy_test071009/'
427
+
428
+ %w(
429
+ us/fl/miami /us/fl/miami/ us/fl/miami/ /us/fl/miami us/fl/miami/nonsense
430
+ us/fl/miami/nonsense/more-nonsense us/fl/miami/south\ florida
431
+ ).each do |path|
432
+ assert_equal ["miami.craigslist.org"], CraigScrape::GeoListings.sites_in_path( path, hier_dir )
433
+ end
434
+
435
+ %w( us/fl /us/fl us/fl/ /us/fl/ ).each do |path|
436
+ assert_equal(
437
+ %w(
438
+ jacksonville panamacity orlando fortmyers keys tallahassee ocala gainesville tampa
439
+ pensacola daytona treasure sarasota staugustine spacecoast lakeland miami
440
+ ).collect{|p| "#{p}.craigslist.org"},
441
+ CraigScrape::GeoListings.sites_in_path( path, hier_dir )
442
+ )
443
+ end
444
+
445
+ # This tests those escaped funky paths. I *think* this file-based test is actually indicative
446
+ # that the http-retrieval version works as well;
447
+ us_fl_mia_ftmeyers = CraigScrape::GeoListings.sites_in_path(
448
+ "us/fl/ft myers \\/ SW florida", hier_dir
449
+ )
450
+ assert_equal ["fortmyers.craigslist.org"], us_fl_mia_ftmeyers
451
+
452
+ # make sure we puke on obvious bad-stuff. I *think* this file-based test is actually indicative
453
+ # that the http-retrieval version works as well:
454
+ assert_raise(CraigScrape::GeoListings::BadGeoListingPath) do
455
+ CraigScrape::GeoListings.sites_in_path "us/fl/nonexist", hier_dir
456
+ end
457
+
458
+ assert_raise(CraigScrape::GeoListings::BadGeoListingPath) do
459
+ # You'll notice that we could actually guess a decent match, but we wont :
460
+ CraigScrape::GeoListings.sites_in_path "us/fl/miami/nonexist", hier_dir
461
+ end
462
+ end
463
+
464
+ def test_sites_in_path
465
+ hier_dir = relative_uri_for 'geolisting_samples/hierarchy_test071009/'
466
+
467
+ assert_equal(
468
+ %w(miami.craigslist.org),
469
+ CraigScrape::GeoListings.find_sites(
470
+ ["us/fl/south florida","+ us/fl/south florida", "-newyork.craigslist.org"],
471
+ hier_dir
472
+ )
473
+ )
474
+
475
+ assert_equal(
476
+ %w(
477
+ jacksonville panamacity orlando fortmyers keys tallahassee ocala gainesville tampa
478
+ pensacola daytona treasure sarasota staugustine spacecoast lakeland newyork
479
+ ).collect{|p| "#{p}.craigslist.org"},
480
+ CraigScrape::GeoListings.find_sites( ["us/fl","-us/fl/miami", "+ newyork.craigslist.org"], hier_dir)
481
+ )
482
+
483
+ assert_equal(
484
+ %w(
485
+ westmd fortcollins charleston fayetteville dallas mendocino wichita valdosta terrahaute rockford erie
486
+ decatur cedarrapids stillwater collegestation charlestonwv albany sacramento houston kalamazoo fortsmith
487
+ maine minneapolis stockton pennstate bend grandisland palmsprings nmi waterloo topeka eastnc greenbay york
488
+ utica stgeorge oklahomacity grandrapids eastidaho lancaster gulfport sandiego reading kpr fresno iowacity
489
+ chicago tuscaloosa smd monterey yubasutter victoriatx sd knoxville gadsden jonesboro ksu youngstown toledo
490
+ lascruces annarbor danville delaware parkersburg appleton stcloud richmond muskegon jerseyshore redding
491
+ ithaca hartford evansville corpuschristi binghamton chico modesto lynchburg hattiesburg morgantown
492
+ harrisonburg lubbock carbondale florencesc imperial wenatchee semo savannah prescott lacrosse longisland
493
+ huntsville santabarbara janesville mankato santafe pullman louisville lexington brunswick duluth columbus
494
+ hudsonvalley pittsburgh wheeling westky waco shreveport eastoregon corvallis winstonsalem denver
495
+ tippecanoe newhaven shoals wv greenville lansing detroit athensohio easttexas sanantonio raleigh phoenix
496
+ honolulu inlandempire pueblo chattanooga lawton worcester twinfalls roseburg roanoke fredericksburg
497
+ annapolis asheville seattle scranton quadcities oregoncoast stlouis newyork mobile atlanta visalia
498
+ clarksville providence kansascity galveston madison bham harrisburg muncie bloomington anchorage ventura
499
+ up tricities rockies elpaso slo indianapolis fayar columbusga bellingham abilene wichitafalls boston
500
+ mcallen bn sierravista lasvegas sanmarcos nwct farmington mansfield jacksontn bgky altoona eugene
501
+ lafayette boone odessa spokane norfolk hickory burlington nashville lawrence hiltonhead elmira westernmass
502
+ southjersey myrtlebeach dothan goldcountry lincoln martinsburg dubuque brownsville washingtondc tucson
503
+ columbiamo jxn yakima sheboygan olympic humboldt newjersey cosprings springfield beaumont macon eauclaire
504
+ batonrouge buffalo mohave wilmington rochester sfbay northmiss bakersfield neworleans catskills wausau
505
+ akroncanton cnj merced chambana flint capecod nh yuma tulsa charlottesville easternshore desmoines
506
+ athensga austin newlondon outerbanks fortwayne dayton wyoming watertown provo medford texarkana cleveland
507
+ memphis amarillo limaohio augusta flagstaff jackson plattsburgh peoria skagit saltlakecity saginaw
508
+ portland syracuse swmi baltimore monroe littlerock boise laredo boulder philadelphia sandusky salem rmn
509
+ montgomery blacksburg centralmich logan albuquerque losangeles poconos westslope southbend siouxcity reno
510
+ porthuron greensboro orangecounty fargo ogden charlotte allentown joplin chautauqua lakecharles omaha
511
+ springfieldil roswell montana killeen milwaukee nd williamsport columbia racine southcoast ames huntington
512
+ cincinnati auburn miami
513
+ ).collect{|p| "#{p}.craigslist.org"},
514
+ CraigScrape::GeoListings.find_sites(
515
+ ["us","- us/fl", "+ us/fl/miami", ' -jacksonville.craigslist.org'], hier_dir
516
+ )
517
+ )
518
+
519
+ end
520
+
521
+ end