olek-libcraigscrape 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/scraper.rb CHANGED
@@ -5,14 +5,14 @@
5
5
  # - Basic http and connection handling methods
6
6
  # - html utility methods used by objects
7
7
  # - Common Errors
8
- # You should never need to include this file directly, as all of libcraigscrape's objects and methods
8
+ # You should never need to include this file directly, as all of libcraigscrape's objects and methods
9
9
  # are loaded when you use <tt>require 'libcraigscrape'</tt> in your code.
10
10
  #
11
11
 
12
- # Scraper is a general-pupose base class for all libcraigscrape Objects. Scraper facilitates all http-related
12
+ # Scraper is a general-pupose base class for all libcraigscrape Objects. Scraper facilitates all http-related
13
13
  # functionality, and adds some useful helpers for dealing with eager-loading of http-objects and general html
14
14
  # methods. It also contains the http-related cattr_accessors:
15
- #
15
+ #
16
16
  # <b>logger</b> - a Logger object to debug http notices too. Defaults to nil
17
17
  #
18
18
  # <b>retries_on_fetch_fail</b> - The number of times to retry a failed uri download. Defaults to 8
@@ -23,31 +23,22 @@
23
23
  #
24
24
  # <b>sleep_between_404_retries</b> - The amount of seconds to sleep, between successive attempts in the case of a Resource Not Found error. Defaults to 3.
25
25
  #
26
+
26
27
  class CraigScrape::Scraper
27
28
  cattr_accessor :logger
28
- cattr_accessor :sleep_between_fetch_retries
29
- cattr_accessor :retries_on_fetch_fail
30
- cattr_accessor :retries_on_404_fail
31
- cattr_accessor :sleep_between_404_retries
32
- cattr_accessor :maximum_redirects_per_request
33
29
 
34
30
  URL_PARTS = /^(?:([^\:]+)\:\/\/([^\/]*))?(.*)$/
35
31
  HTML_TAG = /<\/?[^>]*>/
36
- # We have to specify this to nokogiri. Sometimes it tries to figure out encoding on its own, and craigslist users post crazy bytes sometimes
32
+ # We have to specify this to nokogiri. Sometimes it tries to figure out encoding on its own, and craigslist users post crazy bytes sometimes
37
33
  HTML_ENCODING = "UTF-8"
38
34
 
35
+ HTTP_HEADERS = { "Cache-Control" => "no-cache", "Pragma" => "no-cache",
36
+ "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
37
+ "User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.168 Safari/535.19"}
38
+
39
39
  # Returns the full url that corresponds to this resource
40
40
  attr_reader :url
41
41
 
42
- # Set some defaults:
43
- self.retries_on_fetch_fail = 8
44
- self.sleep_between_fetch_retries = 30
45
-
46
- self.retries_on_404_fail = 3
47
- self.sleep_between_404_retries = 3
48
-
49
- self.maximum_redirects_per_request = 20
50
-
51
42
  class BadConstructionError < StandardError #:nodoc:
52
43
  end
53
44
 
@@ -57,15 +48,9 @@ class CraigScrape::Scraper
57
48
  class BadUrlError < StandardError #:nodoc:
58
49
  end
59
50
 
60
- class MaxRedirectError < StandardError #:nodoc:
61
- end
62
-
63
51
  class FetchError < StandardError #:nodoc:
64
52
  end
65
-
66
- class ResourceNotFoundError < StandardError #:nodoc:
67
- end
68
-
53
+
69
54
  # Scraper Objects can be created from either a full URL (string), or a Hash.
70
55
  # Currently, this initializer isn't intended to be called from libcraigslist API users, though
71
56
  # if you know what you're doing - feel free to try this out.
@@ -88,7 +73,7 @@ class CraigScrape::Scraper
88
73
  raise BadConstructionError, ("Unrecognized parameter passed to %s.new %s}" % [self.class.to_s, init_via.class.inspect])
89
74
  end
90
75
  end
91
-
76
+
92
77
  # Indicates whether the resource has yet been retrieved from its associated url.
93
78
  # This is useful to distinguish whether the instance was instantiated for the purpose of an eager-load,
94
79
  # but hasn't yet been fetched.
@@ -101,21 +86,27 @@ class CraigScrape::Scraper
101
86
  end
102
87
 
103
88
  private
104
-
89
+
105
90
  # Returns text with all html tags removed.
106
91
  def strip_html(str)
107
- str.gsub HTML_TAG, "" if str
92
+ he_decode(str).gsub HTML_TAG, "" if str
108
93
  end
109
-
94
+
110
95
  # Easy way to fail noisily:
111
- def parse_error!; raise ParseError, "Error while parsing %s:\n %s" % [self.class.to_s, html]; end
112
-
96
+ def parse_error!(fields = nil)
97
+ raise ParseError, "Error while parsing %s:\n %s%s" % [
98
+ self.class.to_s, html,
99
+ (fields) ? ("\nRequired fields missing: %s" % fields.join(', ')) : '']
100
+ end
101
+
113
102
  # Returns text with all html entities converted to respective ascii character.
114
103
  def he_decode(text); self.class.he_decode text; end
115
104
 
116
105
  # Returns text with all html entities converted to respective ascii character.
117
- def self.he_decode(text); HTMLEntities.new.decode text; end
118
-
106
+ def self.he_decode(text)
107
+ HTMLEntities.new.decode text
108
+ end
109
+
119
110
  # Derives a full url, using the current object's url and the provided href
120
111
  def url_from_href(href) #:nodoc:
121
112
  scheme, host, path = $1, $2, $3 if URL_PARTS.match href
@@ -132,81 +123,33 @@ class CraigScrape::Scraper
132
123
 
133
124
  '%s://%s%s' % [scheme, host, path]
134
125
  end
126
+
127
+ def fetch_uri(uri)
128
+ logger.info "Requesting: %s" % [@url.inspect] if logger
135
129
 
136
- def fetch_uri(uri, redirect_count = 0)
137
- logger.info "Requesting (%d): %s" % [redirect_count, @url.inspect] if logger
138
-
139
- raise MaxRedirectError, "Max redirects (#{redirect_count}) reached for URL: #{@url}" if redirect_count > self.maximum_redirects_per_request-1
140
-
141
- case uri.scheme
130
+ (case uri.scheme
142
131
  when 'file'
143
132
  # If this is a directory, we'll try to approximate http a bit by loading a '/index.html'
144
- File.read( File.directory?(uri.path) ? "#{uri.path}/index.html" : uri.path )
133
+ File.read( File.directory?(uri.path) ?
134
+ "#{uri.path}/index.html" : uri.path , :encoding => 'BINARY')
145
135
  when /^http[s]?/
146
- fetch_http uri, redirect_count
136
+ resp = Typhoeus.get uri.to_s, :followlocation => true,
137
+ :headers => HTTP_HEADERS
138
+ resp.response_body
147
139
  else
148
140
  raise BadUrlError, "Unknown URI scheme for the url: #{@url}"
149
- end
150
- end
151
-
152
- def fetch_http(uri, redirect_count = 0)
153
- fetch_attempts = 0
154
- resource_not_found_attempts = 0
155
-
156
- begin
157
- # This handles the redirects for us
158
- resp, data = Net::HTTP.new( uri.host, uri.port).get uri.request_uri
159
-
160
- if resp.response.code == "200"
161
- # Check for gzip, and decode:
162
- data = Zlib::GzipReader.new(StringIO.new(data)).read if resp.response.header['Content-Encoding'] == 'gzip'
163
-
164
- data
165
- elsif resp.response['Location']
166
- redirect_to = resp.response['Location']
167
-
168
- fetch_uri URI.parse(url_from_href(redirect_to)), redirect_count+1
169
- else
170
- # Sometimes Craigslist seems to return 404's for no good reason, and a subsequent fetch will give you what you want
171
- raise ResourceNotFoundError, 'Unable to fetch "%s" (%s)' % [ @url, resp.response.code ]
172
- end
173
- rescue ResourceNotFoundError => err
174
- logger.info err.message if logger
175
-
176
- resource_not_found_attempts += 1
177
-
178
- if resource_not_found_attempts <= self.retries_on_404_fail
179
- sleep self.sleep_between_404_retries if self.sleep_between_404_retries
180
- logger.info 'Retrying ....' if logger
181
- retry
182
- else
183
- raise err
184
- end
185
- rescue FetchError,Timeout::Error,Errno::ECONNRESET,EOFError => err
186
- logger.info 'Timeout error while requesting "%s"' % @url if logger and err.class == Timeout::Error
187
- logger.info 'Connection reset while requesting "%s"' % @url if logger and err.class == Errno::ECONNRESET
188
-
189
- fetch_attempts += 1
190
-
191
- if fetch_attempts <= self.retries_on_fetch_fail
192
- sleep self.sleep_between_fetch_retries if self.sleep_between_fetch_retries
193
- logger.info 'Retrying fetch ....' if logger
194
- retry
195
- else
196
- raise err
197
- end
198
- end
141
+ end).force_encoding("ISO-8859-1").encode("UTF-8")
199
142
  end
200
-
143
+
201
144
  # Returns a string, of the current URI's source code
202
145
  def html_source
203
146
  @html_source ||= fetch_uri uri if uri
204
147
  @html_source
205
148
  end
206
-
149
+
207
150
  # Returns an Nokogiri parse, of the current URI
208
151
  def html
209
152
  @html ||= Nokogiri::HTML html_source, nil, HTML_ENCODING if html_source
210
153
  @html
211
154
  end
212
- end
155
+ end
@@ -2,36 +2,36 @@ module LibcraigscrapeTestHelpers
2
2
  def relative_uri_for(filename)
3
3
  'file://%s/%s' % [File.dirname(File.expand_path(__FILE__)), filename]
4
4
  end
5
-
5
+
6
6
  def pp_assertions(obj, obj_name)
7
7
  probable_accessors = (obj.methods-obj.class.superclass.methods)
8
8
 
9
9
  puts
10
10
  probable_accessors.sort.each do |m|
11
11
  val = obj.send(m.to_sym)
12
-
12
+
13
13
  # There's a good number of transformations worth doing here, I'll just start like this for now:
14
14
  if val.kind_of? Time
15
15
  # I've decided this is the the easiest way to understand and test a time
16
16
  val = val.to_a
17
17
  m = "#{m}.to_a"
18
18
  end
19
-
20
- if val.kind_of? Hash and val.length > 5
19
+
20
+ if val.kind_of? Hash and val.length > 5
21
21
  puts "assert_equal %s, %s.%s.length" % [val.length.inspect,obj_name,m]
22
-
23
- val.keys.sort{|a,b| a <=> b }.each do |k|
22
+
23
+ val.keys.sort{|a,b| a <=> b }.each do |k|
24
24
  puts "assert_equal %s, %s.%s[%s]" % [val[k].inspect,obj_name,m,k.inspect]
25
25
  end
26
26
  # elsif val.kind_of? Array
27
27
  # puts "assert_equal %s, %s.%s.length" % [val.length.inspect,obj_name,m]
28
- #
29
- # val.each_index do |i|
28
+ #
29
+ # val.each_index do |i|
30
30
  # pp_assertions val[i], "%s.%s[%s]" % [obj_name,m,i.inspect]
31
31
  # end
32
32
  else
33
33
  puts "assert_equal %s, %s.%s" % [val.inspect,obj_name,m]
34
34
  end
35
- end
35
+ end
36
36
  end
37
- end
37
+ end
@@ -6,13 +6,13 @@ require File.dirname(__FILE__)+'/libcraigscrape_test_helpers'
6
6
 
7
7
  class CraigslistGeolistingTest < Test::Unit::TestCase
8
8
  include LibcraigscrapeTestHelpers
9
-
9
+
10
10
  def test_pukes
11
11
  assert_raise(CraigScrape::Scraper::ParseError) do
12
12
  CraigScrape::GeoListings.new( relative_uri_for('google.html') ).sites
13
13
  end
14
14
  end
15
-
15
+
16
16
  def test_geo_listings
17
17
  geo_listing_us070209 = CraigScrape::GeoListings.new relative_uri_for(
18
18
  'geolisting_samples/geo_listing_us070209.html'
@@ -345,10 +345,10 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
345
345
  assert_equal "youngstown.craigslist.org", geo_listing_us070209.sites["youngstown"]
346
346
  assert_equal "yubasutter.craigslist.org", geo_listing_us070209.sites["yuba-sutter"]
347
347
  assert_equal "yuma.craigslist.org", geo_listing_us070209.sites["yuma"]
348
-
348
+
349
349
  geo_listing_cn070209 = CraigScrape::GeoListings.new relative_uri_for(
350
350
  'geolisting_samples/geo_listing_cn070209.html'
351
- )
351
+ )
352
352
  assert_equal "china", geo_listing_cn070209.location
353
353
  assert_equal 6, geo_listing_cn070209.sites.length
354
354
  assert_equal "beijing.craigslist.com.cn", geo_listing_cn070209.sites["beijing"]
@@ -357,10 +357,10 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
357
357
  assert_equal "hongkong.craigslist.org", geo_listing_cn070209.sites["hong kong"]
358
358
  assert_equal "shanghai.craigslist.com.cn", geo_listing_cn070209.sites["shanghai"]
359
359
  assert_equal "shenzhen.craigslist.org", geo_listing_cn070209.sites["shenzhen"]
360
-
360
+
361
361
  geo_listing_ca070209 = CraigScrape::GeoListings.new relative_uri_for(
362
362
  'geolisting_samples/geo_listing_ca070209.html'
363
- )
363
+ )
364
364
  assert_equal "canada", geo_listing_ca070209.location
365
365
  assert_equal 47, geo_listing_ca070209.sites.length
366
366
  assert_equal "barrie.craigslist.ca", geo_listing_ca070209.sites["barrie"]
@@ -410,28 +410,28 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
410
410
  assert_equal "whistler.craigslist.ca", geo_listing_ca070209.sites["whistler, BC"]
411
411
  assert_equal "windsor.craigslist.ca", geo_listing_ca070209.sites["windsor"]
412
412
  assert_equal "winnipeg.craigslist.ca", geo_listing_ca070209.sites["winnipeg"]
413
-
413
+
414
414
  geo_listing_ca_sk07020 = CraigScrape::GeoListings.new relative_uri_for(
415
415
  'geolisting_samples/geo_listing_ca_sk070209.html'
416
- )
416
+ )
417
417
  assert_equal "canada", geo_listing_ca_sk07020.location
418
- assert_equal(
419
- { "saskatoon" => "saskatoon.craigslist.ca", "regina" => "regina.craigslist.ca" },
418
+ assert_equal(
419
+ { "saskatoon" => "saskatoon.craigslist.ca", "regina" => "regina.craigslist.ca" },
420
420
  geo_listing_ca_sk07020.sites
421
421
  )
422
422
  end
423
-
423
+
424
424
  def test_sites_in_path
425
425
  # This was really tough to test, and in the end, I don't know just how useful this really is...
426
426
  hier_dir = relative_uri_for 'geolisting_samples/hierarchy_test071009/'
427
-
427
+
428
428
  %w(
429
- us/fl/miami /us/fl/miami/ us/fl/miami/ /us/fl/miami us/fl/miami/nonsense
429
+ us/fl/miami /us/fl/miami/ us/fl/miami/ /us/fl/miami us/fl/miami/nonsense
430
430
  us/fl/miami/nonsense/more-nonsense us/fl/miami/south\ florida
431
431
  ).each do |path|
432
432
  assert_equal ["miami.craigslist.org"], CraigScrape::GeoListings.sites_in_path( path, hier_dir )
433
433
  end
434
-
434
+
435
435
  %w( us/fl /us/fl us/fl/ /us/fl/ ).each do |path|
436
436
  assert_equal(
437
437
  %w(
@@ -441,20 +441,20 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
441
441
  CraigScrape::GeoListings.sites_in_path( path, hier_dir )
442
442
  )
443
443
  end
444
-
444
+
445
445
  # This tests those escaped funky paths. I *think* this file-based test is actually indicative
446
446
  # that the http-retrieval version works as well;
447
447
  us_fl_mia_ftmeyers = CraigScrape::GeoListings.sites_in_path(
448
448
  "us/fl/ft myers \\/ SW florida", hier_dir
449
449
  )
450
450
  assert_equal ["fortmyers.craigslist.org"], us_fl_mia_ftmeyers
451
-
451
+
452
452
  # make sure we puke on obvious bad-stuff. I *think* this file-based test is actually indicative
453
453
  # that the http-retrieval version works as well:
454
454
  assert_raise(CraigScrape::GeoListings::BadGeoListingPath) do
455
455
  CraigScrape::GeoListings.sites_in_path "us/fl/nonexist", hier_dir
456
456
  end
457
-
457
+
458
458
  assert_raise(CraigScrape::GeoListings::BadGeoListingPath) do
459
459
  # You'll notice that we could actually guess a decent match, but we wont :
460
460
  CraigScrape::GeoListings.sites_in_path "us/fl/miami/nonexist", hier_dir
@@ -465,57 +465,57 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
465
465
  hier_dir = relative_uri_for 'geolisting_samples/hierarchy_test071009/'
466
466
 
467
467
  assert_equal(
468
- %w(miami.craigslist.org),
469
- CraigScrape::GeoListings.find_sites(
470
- ["us/fl/south florida","+ us/fl/south florida", "-newyork.craigslist.org"],
468
+ %w(miami.craigslist.org),
469
+ CraigScrape::GeoListings.find_sites(
470
+ ["us/fl/south florida","+ us/fl/south florida", "-newyork.craigslist.org"],
471
471
  hier_dir
472
472
  )
473
473
  )
474
-
474
+
475
475
  assert_equal(
476
476
  %w(
477
- jacksonville panamacity orlando fortmyers keys tallahassee ocala gainesville tampa
477
+ jacksonville panamacity orlando fortmyers keys tallahassee ocala gainesville tampa
478
478
  pensacola daytona treasure sarasota staugustine spacecoast lakeland newyork
479
- ).collect{|p| "#{p}.craigslist.org"},
480
- CraigScrape::GeoListings.find_sites( ["us/fl","-us/fl/miami", "+ newyork.craigslist.org"], hier_dir)
479
+ ).collect{|p| "#{p}.craigslist.org"}.sort,
480
+ CraigScrape::GeoListings.find_sites( ["us/fl","-us/fl/miami", "+ newyork.craigslist.org"], hier_dir).sort
481
481
  )
482
482
 
483
483
  assert_equal(
484
484
  %w(
485
- westmd fortcollins charleston fayetteville dallas mendocino wichita valdosta terrahaute rockford erie
486
- decatur cedarrapids stillwater collegestation charlestonwv albany sacramento houston kalamazoo fortsmith
485
+ westmd fortcollins charleston fayetteville dallas mendocino wichita valdosta terrahaute rockford erie
486
+ decatur cedarrapids stillwater collegestation charlestonwv albany sacramento houston kalamazoo fortsmith
487
487
  maine minneapolis stockton pennstate bend grandisland palmsprings nmi waterloo topeka eastnc greenbay york
488
- utica stgeorge oklahomacity grandrapids eastidaho lancaster gulfport sandiego reading kpr fresno iowacity
489
- chicago tuscaloosa smd monterey yubasutter victoriatx sd knoxville gadsden jonesboro ksu youngstown toledo
490
- lascruces annarbor danville delaware parkersburg appleton stcloud richmond muskegon jerseyshore redding
491
- ithaca hartford evansville corpuschristi binghamton chico modesto lynchburg hattiesburg morgantown
492
- harrisonburg lubbock carbondale florencesc imperial wenatchee semo savannah prescott lacrosse longisland
493
- huntsville santabarbara janesville mankato santafe pullman louisville lexington brunswick duluth columbus
494
- hudsonvalley pittsburgh wheeling westky waco shreveport eastoregon corvallis winstonsalem denver
495
- tippecanoe newhaven shoals wv greenville lansing detroit athensohio easttexas sanantonio raleigh phoenix
496
- honolulu inlandempire pueblo chattanooga lawton worcester twinfalls roseburg roanoke fredericksburg
497
- annapolis asheville seattle scranton quadcities oregoncoast stlouis newyork mobile atlanta visalia
498
- clarksville providence kansascity galveston madison bham harrisburg muncie bloomington anchorage ventura
499
- up tricities rockies elpaso slo indianapolis fayar columbusga bellingham abilene wichitafalls boston
500
- mcallen bn sierravista lasvegas sanmarcos nwct farmington mansfield jacksontn bgky altoona eugene
501
- lafayette boone odessa spokane norfolk hickory burlington nashville lawrence hiltonhead elmira westernmass
502
- southjersey myrtlebeach dothan goldcountry lincoln martinsburg dubuque brownsville washingtondc tucson
503
- columbiamo jxn yakima sheboygan olympic humboldt newjersey cosprings springfield beaumont macon eauclaire
504
- batonrouge buffalo mohave wilmington rochester sfbay northmiss bakersfield neworleans catskills wausau
505
- akroncanton cnj merced chambana flint capecod nh yuma tulsa charlottesville easternshore desmoines
506
- athensga austin newlondon outerbanks fortwayne dayton wyoming watertown provo medford texarkana cleveland
507
- memphis amarillo limaohio augusta flagstaff jackson plattsburgh peoria skagit saltlakecity saginaw
508
- portland syracuse swmi baltimore monroe littlerock boise laredo boulder philadelphia sandusky salem rmn
509
- montgomery blacksburg centralmich logan albuquerque losangeles poconos westslope southbend siouxcity reno
510
- porthuron greensboro orangecounty fargo ogden charlotte allentown joplin chautauqua lakecharles omaha
511
- springfieldil roswell montana killeen milwaukee nd williamsport columbia racine southcoast ames huntington
488
+ utica stgeorge oklahomacity grandrapids eastidaho lancaster gulfport sandiego reading kpr fresno iowacity
489
+ chicago tuscaloosa smd monterey yubasutter victoriatx sd knoxville gadsden jonesboro ksu youngstown toledo
490
+ lascruces annarbor danville delaware parkersburg appleton stcloud richmond muskegon jerseyshore redding
491
+ ithaca hartford evansville corpuschristi binghamton chico modesto lynchburg hattiesburg morgantown
492
+ harrisonburg lubbock carbondale florencesc imperial wenatchee semo savannah prescott lacrosse longisland
493
+ huntsville santabarbara janesville mankato santafe pullman louisville lexington brunswick duluth columbus
494
+ hudsonvalley pittsburgh wheeling westky waco shreveport eastoregon corvallis winstonsalem denver
495
+ tippecanoe newhaven shoals wv greenville lansing detroit athensohio easttexas sanantonio raleigh phoenix
496
+ honolulu inlandempire pueblo chattanooga lawton worcester twinfalls roseburg roanoke fredericksburg
497
+ annapolis asheville seattle scranton quadcities oregoncoast stlouis newyork mobile atlanta visalia
498
+ clarksville providence kansascity galveston madison bham harrisburg muncie bloomington anchorage ventura
499
+ up tricities rockies elpaso slo indianapolis fayar columbusga bellingham abilene wichitafalls boston
500
+ mcallen bn sierravista lasvegas sanmarcos nwct farmington mansfield jacksontn bgky altoona eugene
501
+ lafayette boone odessa spokane norfolk hickory burlington nashville lawrence hiltonhead elmira westernmass
502
+ southjersey myrtlebeach dothan goldcountry lincoln martinsburg dubuque brownsville washingtondc tucson
503
+ columbiamo jxn yakima sheboygan olympic humboldt newjersey cosprings springfield beaumont macon eauclaire
504
+ batonrouge buffalo mohave wilmington rochester sfbay northmiss bakersfield neworleans catskills wausau
505
+ akroncanton cnj merced chambana flint capecod nh yuma tulsa charlottesville easternshore desmoines
506
+ athensga austin newlondon outerbanks fortwayne dayton wyoming watertown provo medford texarkana cleveland
507
+ memphis amarillo limaohio augusta flagstaff jackson plattsburgh peoria skagit saltlakecity saginaw
508
+ portland syracuse swmi baltimore monroe littlerock boise laredo boulder philadelphia sandusky salem rmn
509
+ montgomery blacksburg centralmich logan albuquerque losangeles poconos westslope southbend siouxcity reno
510
+ porthuron greensboro orangecounty fargo ogden charlotte allentown joplin chautauqua lakecharles omaha
511
+ springfieldil roswell montana killeen milwaukee nd williamsport columbia racine southcoast ames huntington
512
512
  cincinnati auburn miami
513
- ).collect{|p| "#{p}.craigslist.org"},
513
+ ).collect{|p| "#{p}.craigslist.org"}.sort,
514
514
  CraigScrape::GeoListings.find_sites(
515
515
  ["us","- us/fl", "+ us/fl/miami", ' -jacksonville.craigslist.org'], hier_dir
516
- )
516
+ ).sort
517
517
  )
518
-
518
+
519
519
  end
520
520
 
521
521
  end