olek-libcraigscrape 1.0.3 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/scraper.rb CHANGED
@@ -5,14 +5,14 @@
5
5
  # - Basic http and connection handling methods
6
6
  # - html utility methods used by objects
7
7
  # - Common Errors
8
- # You should never need to include this file directly, as all of libcraigscrape's objects and methods
8
+ # You should never need to include this file directly, as all of libcraigscrape's objects and methods
9
9
  # are loaded when you use <tt>require 'libcraigscrape'</tt> in your code.
10
10
  #
11
11
 
12
- # Scraper is a general-pupose base class for all libcraigscrape Objects. Scraper facilitates all http-related
12
+ # Scraper is a general-pupose base class for all libcraigscrape Objects. Scraper facilitates all http-related
13
13
  # functionality, and adds some useful helpers for dealing with eager-loading of http-objects and general html
14
14
  # methods. It also contains the http-related cattr_accessors:
15
- #
15
+ #
16
16
  # <b>logger</b> - a Logger object to debug http notices too. Defaults to nil
17
17
  #
18
18
  # <b>retries_on_fetch_fail</b> - The number of times to retry a failed uri download. Defaults to 8
@@ -23,31 +23,22 @@
23
23
  #
24
24
  # <b>sleep_between_404_retries</b> - The amount of seconds to sleep, between successive attempts in the case of a Resource Not Found error. Defaults to 3.
25
25
  #
26
+
26
27
  class CraigScrape::Scraper
27
28
  cattr_accessor :logger
28
- cattr_accessor :sleep_between_fetch_retries
29
- cattr_accessor :retries_on_fetch_fail
30
- cattr_accessor :retries_on_404_fail
31
- cattr_accessor :sleep_between_404_retries
32
- cattr_accessor :maximum_redirects_per_request
33
29
 
34
30
  URL_PARTS = /^(?:([^\:]+)\:\/\/([^\/]*))?(.*)$/
35
31
  HTML_TAG = /<\/?[^>]*>/
36
- # We have to specify this to nokogiri. Sometimes it tries to figure out encoding on its own, and craigslist users post crazy bytes sometimes
32
+ # We have to specify this to nokogiri. Sometimes it tries to figure out encoding on its own, and craigslist users post crazy bytes sometimes
37
33
  HTML_ENCODING = "UTF-8"
38
34
 
35
+ HTTP_HEADERS = { "Cache-Control" => "no-cache", "Pragma" => "no-cache",
36
+ "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
37
+ "User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.168 Safari/535.19"}
38
+
39
39
  # Returns the full url that corresponds to this resource
40
40
  attr_reader :url
41
41
 
42
- # Set some defaults:
43
- self.retries_on_fetch_fail = 8
44
- self.sleep_between_fetch_retries = 30
45
-
46
- self.retries_on_404_fail = 3
47
- self.sleep_between_404_retries = 3
48
-
49
- self.maximum_redirects_per_request = 20
50
-
51
42
  class BadConstructionError < StandardError #:nodoc:
52
43
  end
53
44
 
@@ -57,15 +48,9 @@ class CraigScrape::Scraper
57
48
  class BadUrlError < StandardError #:nodoc:
58
49
  end
59
50
 
60
- class MaxRedirectError < StandardError #:nodoc:
61
- end
62
-
63
51
  class FetchError < StandardError #:nodoc:
64
52
  end
65
-
66
- class ResourceNotFoundError < StandardError #:nodoc:
67
- end
68
-
53
+
69
54
  # Scraper Objects can be created from either a full URL (string), or a Hash.
70
55
  # Currently, this initializer isn't intended to be called from libcraigslist API users, though
71
56
  # if you know what you're doing - feel free to try this out.
@@ -88,7 +73,7 @@ class CraigScrape::Scraper
88
73
  raise BadConstructionError, ("Unrecognized parameter passed to %s.new %s}" % [self.class.to_s, init_via.class.inspect])
89
74
  end
90
75
  end
91
-
76
+
92
77
  # Indicates whether the resource has yet been retrieved from its associated url.
93
78
  # This is useful to distinguish whether the instance was instantiated for the purpose of an eager-load,
94
79
  # but hasn't yet been fetched.
@@ -101,21 +86,27 @@ class CraigScrape::Scraper
101
86
  end
102
87
 
103
88
  private
104
-
89
+
105
90
  # Returns text with all html tags removed.
106
91
  def strip_html(str)
107
- str.gsub HTML_TAG, "" if str
92
+ he_decode(str).gsub HTML_TAG, "" if str
108
93
  end
109
-
94
+
110
95
  # Easy way to fail noisily:
111
- def parse_error!; raise ParseError, "Error while parsing %s:\n %s" % [self.class.to_s, html]; end
112
-
96
+ def parse_error!(fields = nil)
97
+ raise ParseError, "Error while parsing %s:\n %s%s" % [
98
+ self.class.to_s, html,
99
+ (fields) ? ("\nRequired fields missing: %s" % fields.join(', ')) : '']
100
+ end
101
+
113
102
  # Returns text with all html entities converted to respective ascii character.
114
103
  def he_decode(text); self.class.he_decode text; end
115
104
 
116
105
  # Returns text with all html entities converted to respective ascii character.
117
- def self.he_decode(text); HTMLEntities.new.decode text; end
118
-
106
+ def self.he_decode(text)
107
+ HTMLEntities.new.decode text
108
+ end
109
+
119
110
  # Derives a full url, using the current object's url and the provided href
120
111
  def url_from_href(href) #:nodoc:
121
112
  scheme, host, path = $1, $2, $3 if URL_PARTS.match href
@@ -132,81 +123,33 @@ class CraigScrape::Scraper
132
123
 
133
124
  '%s://%s%s' % [scheme, host, path]
134
125
  end
126
+
127
+ def fetch_uri(uri)
128
+ logger.info "Requesting: %s" % [@url.inspect] if logger
135
129
 
136
- def fetch_uri(uri, redirect_count = 0)
137
- logger.info "Requesting (%d): %s" % [redirect_count, @url.inspect] if logger
138
-
139
- raise MaxRedirectError, "Max redirects (#{redirect_count}) reached for URL: #{@url}" if redirect_count > self.maximum_redirects_per_request-1
140
-
141
- case uri.scheme
130
+ (case uri.scheme
142
131
  when 'file'
143
132
  # If this is a directory, we'll try to approximate http a bit by loading a '/index.html'
144
- File.read( File.directory?(uri.path) ? "#{uri.path}/index.html" : uri.path )
133
+ File.read( File.directory?(uri.path) ?
134
+ "#{uri.path}/index.html" : uri.path , :encoding => 'BINARY')
145
135
  when /^http[s]?/
146
- fetch_http uri, redirect_count
136
+ resp = Typhoeus.get uri.to_s, :followlocation => true,
137
+ :headers => HTTP_HEADERS
138
+ resp.response_body
147
139
  else
148
140
  raise BadUrlError, "Unknown URI scheme for the url: #{@url}"
149
- end
150
- end
151
-
152
- def fetch_http(uri, redirect_count = 0)
153
- fetch_attempts = 0
154
- resource_not_found_attempts = 0
155
-
156
- begin
157
- # This handles the redirects for us
158
- resp, data = Net::HTTP.new( uri.host, uri.port).get uri.request_uri
159
-
160
- if resp.response.code == "200"
161
- # Check for gzip, and decode:
162
- data = Zlib::GzipReader.new(StringIO.new(data)).read if resp.response.header['Content-Encoding'] == 'gzip'
163
-
164
- data
165
- elsif resp.response['Location']
166
- redirect_to = resp.response['Location']
167
-
168
- fetch_uri URI.parse(url_from_href(redirect_to)), redirect_count+1
169
- else
170
- # Sometimes Craigslist seems to return 404's for no good reason, and a subsequent fetch will give you what you want
171
- raise ResourceNotFoundError, 'Unable to fetch "%s" (%s)' % [ @url, resp.response.code ]
172
- end
173
- rescue ResourceNotFoundError => err
174
- logger.info err.message if logger
175
-
176
- resource_not_found_attempts += 1
177
-
178
- if resource_not_found_attempts <= self.retries_on_404_fail
179
- sleep self.sleep_between_404_retries if self.sleep_between_404_retries
180
- logger.info 'Retrying ....' if logger
181
- retry
182
- else
183
- raise err
184
- end
185
- rescue FetchError,Timeout::Error,Errno::ECONNRESET,EOFError => err
186
- logger.info 'Timeout error while requesting "%s"' % @url if logger and err.class == Timeout::Error
187
- logger.info 'Connection reset while requesting "%s"' % @url if logger and err.class == Errno::ECONNRESET
188
-
189
- fetch_attempts += 1
190
-
191
- if fetch_attempts <= self.retries_on_fetch_fail
192
- sleep self.sleep_between_fetch_retries if self.sleep_between_fetch_retries
193
- logger.info 'Retrying fetch ....' if logger
194
- retry
195
- else
196
- raise err
197
- end
198
- end
141
+ end).force_encoding("ISO-8859-1").encode("UTF-8")
199
142
  end
200
-
143
+
201
144
  # Returns a string, of the current URI's source code
202
145
  def html_source
203
146
  @html_source ||= fetch_uri uri if uri
204
147
  @html_source
205
148
  end
206
-
149
+
207
150
  # Returns an Nokogiri parse, of the current URI
208
151
  def html
209
152
  @html ||= Nokogiri::HTML html_source, nil, HTML_ENCODING if html_source
210
153
  @html
211
154
  end
212
- end
155
+ end
@@ -2,36 +2,36 @@ module LibcraigscrapeTestHelpers
2
2
  def relative_uri_for(filename)
3
3
  'file://%s/%s' % [File.dirname(File.expand_path(__FILE__)), filename]
4
4
  end
5
-
5
+
6
6
  def pp_assertions(obj, obj_name)
7
7
  probable_accessors = (obj.methods-obj.class.superclass.methods)
8
8
 
9
9
  puts
10
10
  probable_accessors.sort.each do |m|
11
11
  val = obj.send(m.to_sym)
12
-
12
+
13
13
  # There's a good number of transformations worth doing here, I'll just start like this for now:
14
14
  if val.kind_of? Time
15
15
  # I've decided this is the the easiest way to understand and test a time
16
16
  val = val.to_a
17
17
  m = "#{m}.to_a"
18
18
  end
19
-
20
- if val.kind_of? Hash and val.length > 5
19
+
20
+ if val.kind_of? Hash and val.length > 5
21
21
  puts "assert_equal %s, %s.%s.length" % [val.length.inspect,obj_name,m]
22
-
23
- val.keys.sort{|a,b| a <=> b }.each do |k|
22
+
23
+ val.keys.sort{|a,b| a <=> b }.each do |k|
24
24
  puts "assert_equal %s, %s.%s[%s]" % [val[k].inspect,obj_name,m,k.inspect]
25
25
  end
26
26
  # elsif val.kind_of? Array
27
27
  # puts "assert_equal %s, %s.%s.length" % [val.length.inspect,obj_name,m]
28
- #
29
- # val.each_index do |i|
28
+ #
29
+ # val.each_index do |i|
30
30
  # pp_assertions val[i], "%s.%s[%s]" % [obj_name,m,i.inspect]
31
31
  # end
32
32
  else
33
33
  puts "assert_equal %s, %s.%s" % [val.inspect,obj_name,m]
34
34
  end
35
- end
35
+ end
36
36
  end
37
- end
37
+ end
@@ -6,13 +6,13 @@ require File.dirname(__FILE__)+'/libcraigscrape_test_helpers'
6
6
 
7
7
  class CraigslistGeolistingTest < Test::Unit::TestCase
8
8
  include LibcraigscrapeTestHelpers
9
-
9
+
10
10
  def test_pukes
11
11
  assert_raise(CraigScrape::Scraper::ParseError) do
12
12
  CraigScrape::GeoListings.new( relative_uri_for('google.html') ).sites
13
13
  end
14
14
  end
15
-
15
+
16
16
  def test_geo_listings
17
17
  geo_listing_us070209 = CraigScrape::GeoListings.new relative_uri_for(
18
18
  'geolisting_samples/geo_listing_us070209.html'
@@ -345,10 +345,10 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
345
345
  assert_equal "youngstown.craigslist.org", geo_listing_us070209.sites["youngstown"]
346
346
  assert_equal "yubasutter.craigslist.org", geo_listing_us070209.sites["yuba-sutter"]
347
347
  assert_equal "yuma.craigslist.org", geo_listing_us070209.sites["yuma"]
348
-
348
+
349
349
  geo_listing_cn070209 = CraigScrape::GeoListings.new relative_uri_for(
350
350
  'geolisting_samples/geo_listing_cn070209.html'
351
- )
351
+ )
352
352
  assert_equal "china", geo_listing_cn070209.location
353
353
  assert_equal 6, geo_listing_cn070209.sites.length
354
354
  assert_equal "beijing.craigslist.com.cn", geo_listing_cn070209.sites["beijing"]
@@ -357,10 +357,10 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
357
357
  assert_equal "hongkong.craigslist.org", geo_listing_cn070209.sites["hong kong"]
358
358
  assert_equal "shanghai.craigslist.com.cn", geo_listing_cn070209.sites["shanghai"]
359
359
  assert_equal "shenzhen.craigslist.org", geo_listing_cn070209.sites["shenzhen"]
360
-
360
+
361
361
  geo_listing_ca070209 = CraigScrape::GeoListings.new relative_uri_for(
362
362
  'geolisting_samples/geo_listing_ca070209.html'
363
- )
363
+ )
364
364
  assert_equal "canada", geo_listing_ca070209.location
365
365
  assert_equal 47, geo_listing_ca070209.sites.length
366
366
  assert_equal "barrie.craigslist.ca", geo_listing_ca070209.sites["barrie"]
@@ -410,28 +410,28 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
410
410
  assert_equal "whistler.craigslist.ca", geo_listing_ca070209.sites["whistler, BC"]
411
411
  assert_equal "windsor.craigslist.ca", geo_listing_ca070209.sites["windsor"]
412
412
  assert_equal "winnipeg.craigslist.ca", geo_listing_ca070209.sites["winnipeg"]
413
-
413
+
414
414
  geo_listing_ca_sk07020 = CraigScrape::GeoListings.new relative_uri_for(
415
415
  'geolisting_samples/geo_listing_ca_sk070209.html'
416
- )
416
+ )
417
417
  assert_equal "canada", geo_listing_ca_sk07020.location
418
- assert_equal(
419
- { "saskatoon" => "saskatoon.craigslist.ca", "regina" => "regina.craigslist.ca" },
418
+ assert_equal(
419
+ { "saskatoon" => "saskatoon.craigslist.ca", "regina" => "regina.craigslist.ca" },
420
420
  geo_listing_ca_sk07020.sites
421
421
  )
422
422
  end
423
-
423
+
424
424
  def test_sites_in_path
425
425
  # This was really tough to test, and in the end, I don't know just how useful this really is...
426
426
  hier_dir = relative_uri_for 'geolisting_samples/hierarchy_test071009/'
427
-
427
+
428
428
  %w(
429
- us/fl/miami /us/fl/miami/ us/fl/miami/ /us/fl/miami us/fl/miami/nonsense
429
+ us/fl/miami /us/fl/miami/ us/fl/miami/ /us/fl/miami us/fl/miami/nonsense
430
430
  us/fl/miami/nonsense/more-nonsense us/fl/miami/south\ florida
431
431
  ).each do |path|
432
432
  assert_equal ["miami.craigslist.org"], CraigScrape::GeoListings.sites_in_path( path, hier_dir )
433
433
  end
434
-
434
+
435
435
  %w( us/fl /us/fl us/fl/ /us/fl/ ).each do |path|
436
436
  assert_equal(
437
437
  %w(
@@ -441,20 +441,20 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
441
441
  CraigScrape::GeoListings.sites_in_path( path, hier_dir )
442
442
  )
443
443
  end
444
-
444
+
445
445
  # This tests those escaped funky paths. I *think* this file-based test is actually indicative
446
446
  # that the http-retrieval version works as well;
447
447
  us_fl_mia_ftmeyers = CraigScrape::GeoListings.sites_in_path(
448
448
  "us/fl/ft myers \\/ SW florida", hier_dir
449
449
  )
450
450
  assert_equal ["fortmyers.craigslist.org"], us_fl_mia_ftmeyers
451
-
451
+
452
452
  # make sure we puke on obvious bad-stuff. I *think* this file-based test is actually indicative
453
453
  # that the http-retrieval version works as well:
454
454
  assert_raise(CraigScrape::GeoListings::BadGeoListingPath) do
455
455
  CraigScrape::GeoListings.sites_in_path "us/fl/nonexist", hier_dir
456
456
  end
457
-
457
+
458
458
  assert_raise(CraigScrape::GeoListings::BadGeoListingPath) do
459
459
  # You'll notice that we could actually guess a decent match, but we wont :
460
460
  CraigScrape::GeoListings.sites_in_path "us/fl/miami/nonexist", hier_dir
@@ -465,57 +465,57 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
465
465
  hier_dir = relative_uri_for 'geolisting_samples/hierarchy_test071009/'
466
466
 
467
467
  assert_equal(
468
- %w(miami.craigslist.org),
469
- CraigScrape::GeoListings.find_sites(
470
- ["us/fl/south florida","+ us/fl/south florida", "-newyork.craigslist.org"],
468
+ %w(miami.craigslist.org),
469
+ CraigScrape::GeoListings.find_sites(
470
+ ["us/fl/south florida","+ us/fl/south florida", "-newyork.craigslist.org"],
471
471
  hier_dir
472
472
  )
473
473
  )
474
-
474
+
475
475
  assert_equal(
476
476
  %w(
477
- jacksonville panamacity orlando fortmyers keys tallahassee ocala gainesville tampa
477
+ jacksonville panamacity orlando fortmyers keys tallahassee ocala gainesville tampa
478
478
  pensacola daytona treasure sarasota staugustine spacecoast lakeland newyork
479
- ).collect{|p| "#{p}.craigslist.org"},
480
- CraigScrape::GeoListings.find_sites( ["us/fl","-us/fl/miami", "+ newyork.craigslist.org"], hier_dir)
479
+ ).collect{|p| "#{p}.craigslist.org"}.sort,
480
+ CraigScrape::GeoListings.find_sites( ["us/fl","-us/fl/miami", "+ newyork.craigslist.org"], hier_dir).sort
481
481
  )
482
482
 
483
483
  assert_equal(
484
484
  %w(
485
- westmd fortcollins charleston fayetteville dallas mendocino wichita valdosta terrahaute rockford erie
486
- decatur cedarrapids stillwater collegestation charlestonwv albany sacramento houston kalamazoo fortsmith
485
+ westmd fortcollins charleston fayetteville dallas mendocino wichita valdosta terrahaute rockford erie
486
+ decatur cedarrapids stillwater collegestation charlestonwv albany sacramento houston kalamazoo fortsmith
487
487
  maine minneapolis stockton pennstate bend grandisland palmsprings nmi waterloo topeka eastnc greenbay york
488
- utica stgeorge oklahomacity grandrapids eastidaho lancaster gulfport sandiego reading kpr fresno iowacity
489
- chicago tuscaloosa smd monterey yubasutter victoriatx sd knoxville gadsden jonesboro ksu youngstown toledo
490
- lascruces annarbor danville delaware parkersburg appleton stcloud richmond muskegon jerseyshore redding
491
- ithaca hartford evansville corpuschristi binghamton chico modesto lynchburg hattiesburg morgantown
492
- harrisonburg lubbock carbondale florencesc imperial wenatchee semo savannah prescott lacrosse longisland
493
- huntsville santabarbara janesville mankato santafe pullman louisville lexington brunswick duluth columbus
494
- hudsonvalley pittsburgh wheeling westky waco shreveport eastoregon corvallis winstonsalem denver
495
- tippecanoe newhaven shoals wv greenville lansing detroit athensohio easttexas sanantonio raleigh phoenix
496
- honolulu inlandempire pueblo chattanooga lawton worcester twinfalls roseburg roanoke fredericksburg
497
- annapolis asheville seattle scranton quadcities oregoncoast stlouis newyork mobile atlanta visalia
498
- clarksville providence kansascity galveston madison bham harrisburg muncie bloomington anchorage ventura
499
- up tricities rockies elpaso slo indianapolis fayar columbusga bellingham abilene wichitafalls boston
500
- mcallen bn sierravista lasvegas sanmarcos nwct farmington mansfield jacksontn bgky altoona eugene
501
- lafayette boone odessa spokane norfolk hickory burlington nashville lawrence hiltonhead elmira westernmass
502
- southjersey myrtlebeach dothan goldcountry lincoln martinsburg dubuque brownsville washingtondc tucson
503
- columbiamo jxn yakima sheboygan olympic humboldt newjersey cosprings springfield beaumont macon eauclaire
504
- batonrouge buffalo mohave wilmington rochester sfbay northmiss bakersfield neworleans catskills wausau
505
- akroncanton cnj merced chambana flint capecod nh yuma tulsa charlottesville easternshore desmoines
506
- athensga austin newlondon outerbanks fortwayne dayton wyoming watertown provo medford texarkana cleveland
507
- memphis amarillo limaohio augusta flagstaff jackson plattsburgh peoria skagit saltlakecity saginaw
508
- portland syracuse swmi baltimore monroe littlerock boise laredo boulder philadelphia sandusky salem rmn
509
- montgomery blacksburg centralmich logan albuquerque losangeles poconos westslope southbend siouxcity reno
510
- porthuron greensboro orangecounty fargo ogden charlotte allentown joplin chautauqua lakecharles omaha
511
- springfieldil roswell montana killeen milwaukee nd williamsport columbia racine southcoast ames huntington
488
+ utica stgeorge oklahomacity grandrapids eastidaho lancaster gulfport sandiego reading kpr fresno iowacity
489
+ chicago tuscaloosa smd monterey yubasutter victoriatx sd knoxville gadsden jonesboro ksu youngstown toledo
490
+ lascruces annarbor danville delaware parkersburg appleton stcloud richmond muskegon jerseyshore redding
491
+ ithaca hartford evansville corpuschristi binghamton chico modesto lynchburg hattiesburg morgantown
492
+ harrisonburg lubbock carbondale florencesc imperial wenatchee semo savannah prescott lacrosse longisland
493
+ huntsville santabarbara janesville mankato santafe pullman louisville lexington brunswick duluth columbus
494
+ hudsonvalley pittsburgh wheeling westky waco shreveport eastoregon corvallis winstonsalem denver
495
+ tippecanoe newhaven shoals wv greenville lansing detroit athensohio easttexas sanantonio raleigh phoenix
496
+ honolulu inlandempire pueblo chattanooga lawton worcester twinfalls roseburg roanoke fredericksburg
497
+ annapolis asheville seattle scranton quadcities oregoncoast stlouis newyork mobile atlanta visalia
498
+ clarksville providence kansascity galveston madison bham harrisburg muncie bloomington anchorage ventura
499
+ up tricities rockies elpaso slo indianapolis fayar columbusga bellingham abilene wichitafalls boston
500
+ mcallen bn sierravista lasvegas sanmarcos nwct farmington mansfield jacksontn bgky altoona eugene
501
+ lafayette boone odessa spokane norfolk hickory burlington nashville lawrence hiltonhead elmira westernmass
502
+ southjersey myrtlebeach dothan goldcountry lincoln martinsburg dubuque brownsville washingtondc tucson
503
+ columbiamo jxn yakima sheboygan olympic humboldt newjersey cosprings springfield beaumont macon eauclaire
504
+ batonrouge buffalo mohave wilmington rochester sfbay northmiss bakersfield neworleans catskills wausau
505
+ akroncanton cnj merced chambana flint capecod nh yuma tulsa charlottesville easternshore desmoines
506
+ athensga austin newlondon outerbanks fortwayne dayton wyoming watertown provo medford texarkana cleveland
507
+ memphis amarillo limaohio augusta flagstaff jackson plattsburgh peoria skagit saltlakecity saginaw
508
+ portland syracuse swmi baltimore monroe littlerock boise laredo boulder philadelphia sandusky salem rmn
509
+ montgomery blacksburg centralmich logan albuquerque losangeles poconos westslope southbend siouxcity reno
510
+ porthuron greensboro orangecounty fargo ogden charlotte allentown joplin chautauqua lakecharles omaha
511
+ springfieldil roswell montana killeen milwaukee nd williamsport columbia racine southcoast ames huntington
512
512
  cincinnati auburn miami
513
- ).collect{|p| "#{p}.craigslist.org"},
513
+ ).collect{|p| "#{p}.craigslist.org"}.sort,
514
514
  CraigScrape::GeoListings.find_sites(
515
515
  ["us","- us/fl", "+ us/fl/miami", ' -jacksonville.craigslist.org'], hier_dir
516
- )
516
+ ).sort
517
517
  )
518
-
518
+
519
519
  end
520
520
 
521
521
  end