olek-libcraigscrape 1.0.3 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +12 -6
- data/COPYING.LESSER +1 -1
- data/README +10 -10
- data/Rakefile +5 -54
- data/bin/craig_report_schema.yml +3 -3
- data/bin/craigwatch +32 -44
- data/bin/report_mailer/report.html.erb +17 -0
- data/bin/report_mailer/{craigslist_report.plain.erb → report.text.erb} +6 -6
- data/lib/geo_listings.rb +24 -24
- data/lib/libcraigscrape.rb +6 -11
- data/lib/listings.rb +62 -45
- data/lib/posting.rb +153 -106
- data/lib/scraper.rb +37 -94
- data/test/libcraigscrape_test_helpers.rb +10 -10
- data/test/test_craigslist_geolisting.rb +53 -53
- data/test/test_craigslist_listing.rb +26 -26
- data/test/test_craigslist_posting.rb +39 -38
- metadata +38 -114
- data/bin/report_mailer/craigslist_report.html.erb +0 -17
data/lib/scraper.rb
CHANGED
@@ -5,14 +5,14 @@
|
|
5
5
|
# - Basic http and connection handling methods
|
6
6
|
# - html utility methods used by objects
|
7
7
|
# - Common Errors
|
8
|
-
# You should never need to include this file directly, as all of libcraigscrape's objects and methods
|
8
|
+
# You should never need to include this file directly, as all of libcraigscrape's objects and methods
|
9
9
|
# are loaded when you use <tt>require 'libcraigscrape'</tt> in your code.
|
10
10
|
#
|
11
11
|
|
12
|
-
# Scraper is a general-pupose base class for all libcraigscrape Objects. Scraper facilitates all http-related
|
12
|
+
# Scraper is a general-pupose base class for all libcraigscrape Objects. Scraper facilitates all http-related
|
13
13
|
# functionality, and adds some useful helpers for dealing with eager-loading of http-objects and general html
|
14
14
|
# methods. It also contains the http-related cattr_accessors:
|
15
|
-
#
|
15
|
+
#
|
16
16
|
# <b>logger</b> - a Logger object to debug http notices too. Defaults to nil
|
17
17
|
#
|
18
18
|
# <b>retries_on_fetch_fail</b> - The number of times to retry a failed uri download. Defaults to 8
|
@@ -23,31 +23,22 @@
|
|
23
23
|
#
|
24
24
|
# <b>sleep_between_404_retries</b> - The amount of seconds to sleep, between successive attempts in the case of a Resource Not Found error. Defaults to 3.
|
25
25
|
#
|
26
|
+
|
26
27
|
class CraigScrape::Scraper
|
27
28
|
cattr_accessor :logger
|
28
|
-
cattr_accessor :sleep_between_fetch_retries
|
29
|
-
cattr_accessor :retries_on_fetch_fail
|
30
|
-
cattr_accessor :retries_on_404_fail
|
31
|
-
cattr_accessor :sleep_between_404_retries
|
32
|
-
cattr_accessor :maximum_redirects_per_request
|
33
29
|
|
34
30
|
URL_PARTS = /^(?:([^\:]+)\:\/\/([^\/]*))?(.*)$/
|
35
31
|
HTML_TAG = /<\/?[^>]*>/
|
36
|
-
# We have to specify this to nokogiri. Sometimes it tries to figure out encoding on its own, and craigslist users post crazy bytes sometimes
|
32
|
+
# We have to specify this to nokogiri. Sometimes it tries to figure out encoding on its own, and craigslist users post crazy bytes sometimes
|
37
33
|
HTML_ENCODING = "UTF-8"
|
38
34
|
|
35
|
+
HTTP_HEADERS = { "Cache-Control" => "no-cache", "Pragma" => "no-cache",
|
36
|
+
"Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
37
|
+
"User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.168 Safari/535.19"}
|
38
|
+
|
39
39
|
# Returns the full url that corresponds to this resource
|
40
40
|
attr_reader :url
|
41
41
|
|
42
|
-
# Set some defaults:
|
43
|
-
self.retries_on_fetch_fail = 8
|
44
|
-
self.sleep_between_fetch_retries = 30
|
45
|
-
|
46
|
-
self.retries_on_404_fail = 3
|
47
|
-
self.sleep_between_404_retries = 3
|
48
|
-
|
49
|
-
self.maximum_redirects_per_request = 20
|
50
|
-
|
51
42
|
class BadConstructionError < StandardError #:nodoc:
|
52
43
|
end
|
53
44
|
|
@@ -57,15 +48,9 @@ class CraigScrape::Scraper
|
|
57
48
|
class BadUrlError < StandardError #:nodoc:
|
58
49
|
end
|
59
50
|
|
60
|
-
class MaxRedirectError < StandardError #:nodoc:
|
61
|
-
end
|
62
|
-
|
63
51
|
class FetchError < StandardError #:nodoc:
|
64
52
|
end
|
65
|
-
|
66
|
-
class ResourceNotFoundError < StandardError #:nodoc:
|
67
|
-
end
|
68
|
-
|
53
|
+
|
69
54
|
# Scraper Objects can be created from either a full URL (string), or a Hash.
|
70
55
|
# Currently, this initializer isn't intended to be called from libcraigslist API users, though
|
71
56
|
# if you know what you're doing - feel free to try this out.
|
@@ -88,7 +73,7 @@ class CraigScrape::Scraper
|
|
88
73
|
raise BadConstructionError, ("Unrecognized parameter passed to %s.new %s}" % [self.class.to_s, init_via.class.inspect])
|
89
74
|
end
|
90
75
|
end
|
91
|
-
|
76
|
+
|
92
77
|
# Indicates whether the resource has yet been retrieved from its associated url.
|
93
78
|
# This is useful to distinguish whether the instance was instantiated for the purpose of an eager-load,
|
94
79
|
# but hasn't yet been fetched.
|
@@ -101,21 +86,27 @@ class CraigScrape::Scraper
|
|
101
86
|
end
|
102
87
|
|
103
88
|
private
|
104
|
-
|
89
|
+
|
105
90
|
# Returns text with all html tags removed.
|
106
91
|
def strip_html(str)
|
107
|
-
str.gsub HTML_TAG, "" if str
|
92
|
+
he_decode(str).gsub HTML_TAG, "" if str
|
108
93
|
end
|
109
|
-
|
94
|
+
|
110
95
|
# Easy way to fail noisily:
|
111
|
-
def parse_error
|
112
|
-
|
96
|
+
def parse_error!(fields = nil)
|
97
|
+
raise ParseError, "Error while parsing %s:\n %s%s" % [
|
98
|
+
self.class.to_s, html,
|
99
|
+
(fields) ? ("\nRequired fields missing: %s" % fields.join(', ')) : '']
|
100
|
+
end
|
101
|
+
|
113
102
|
# Returns text with all html entities converted to respective ascii character.
|
114
103
|
def he_decode(text); self.class.he_decode text; end
|
115
104
|
|
116
105
|
# Returns text with all html entities converted to respective ascii character.
|
117
|
-
def self.he_decode(text)
|
118
|
-
|
106
|
+
def self.he_decode(text)
|
107
|
+
HTMLEntities.new.decode text
|
108
|
+
end
|
109
|
+
|
119
110
|
# Derives a full url, using the current object's url and the provided href
|
120
111
|
def url_from_href(href) #:nodoc:
|
121
112
|
scheme, host, path = $1, $2, $3 if URL_PARTS.match href
|
@@ -132,81 +123,33 @@ class CraigScrape::Scraper
|
|
132
123
|
|
133
124
|
'%s://%s%s' % [scheme, host, path]
|
134
125
|
end
|
126
|
+
|
127
|
+
def fetch_uri(uri)
|
128
|
+
logger.info "Requesting: %s" % [@url.inspect] if logger
|
135
129
|
|
136
|
-
|
137
|
-
logger.info "Requesting (%d): %s" % [redirect_count, @url.inspect] if logger
|
138
|
-
|
139
|
-
raise MaxRedirectError, "Max redirects (#{redirect_count}) reached for URL: #{@url}" if redirect_count > self.maximum_redirects_per_request-1
|
140
|
-
|
141
|
-
case uri.scheme
|
130
|
+
(case uri.scheme
|
142
131
|
when 'file'
|
143
132
|
# If this is a directory, we'll try to approximate http a bit by loading a '/index.html'
|
144
|
-
File.read( File.directory?(uri.path) ?
|
133
|
+
File.read( File.directory?(uri.path) ?
|
134
|
+
"#{uri.path}/index.html" : uri.path , :encoding => 'BINARY')
|
145
135
|
when /^http[s]?/
|
146
|
-
|
136
|
+
resp = Typhoeus.get uri.to_s, :followlocation => true,
|
137
|
+
:headers => HTTP_HEADERS
|
138
|
+
resp.response_body
|
147
139
|
else
|
148
140
|
raise BadUrlError, "Unknown URI scheme for the url: #{@url}"
|
149
|
-
end
|
150
|
-
end
|
151
|
-
|
152
|
-
def fetch_http(uri, redirect_count = 0)
|
153
|
-
fetch_attempts = 0
|
154
|
-
resource_not_found_attempts = 0
|
155
|
-
|
156
|
-
begin
|
157
|
-
# This handles the redirects for us
|
158
|
-
resp, data = Net::HTTP.new( uri.host, uri.port).get uri.request_uri
|
159
|
-
|
160
|
-
if resp.response.code == "200"
|
161
|
-
# Check for gzip, and decode:
|
162
|
-
data = Zlib::GzipReader.new(StringIO.new(data)).read if resp.response.header['Content-Encoding'] == 'gzip'
|
163
|
-
|
164
|
-
data
|
165
|
-
elsif resp.response['Location']
|
166
|
-
redirect_to = resp.response['Location']
|
167
|
-
|
168
|
-
fetch_uri URI.parse(url_from_href(redirect_to)), redirect_count+1
|
169
|
-
else
|
170
|
-
# Sometimes Craigslist seems to return 404's for no good reason, and a subsequent fetch will give you what you want
|
171
|
-
raise ResourceNotFoundError, 'Unable to fetch "%s" (%s)' % [ @url, resp.response.code ]
|
172
|
-
end
|
173
|
-
rescue ResourceNotFoundError => err
|
174
|
-
logger.info err.message if logger
|
175
|
-
|
176
|
-
resource_not_found_attempts += 1
|
177
|
-
|
178
|
-
if resource_not_found_attempts <= self.retries_on_404_fail
|
179
|
-
sleep self.sleep_between_404_retries if self.sleep_between_404_retries
|
180
|
-
logger.info 'Retrying ....' if logger
|
181
|
-
retry
|
182
|
-
else
|
183
|
-
raise err
|
184
|
-
end
|
185
|
-
rescue FetchError,Timeout::Error,Errno::ECONNRESET,EOFError => err
|
186
|
-
logger.info 'Timeout error while requesting "%s"' % @url if logger and err.class == Timeout::Error
|
187
|
-
logger.info 'Connection reset while requesting "%s"' % @url if logger and err.class == Errno::ECONNRESET
|
188
|
-
|
189
|
-
fetch_attempts += 1
|
190
|
-
|
191
|
-
if fetch_attempts <= self.retries_on_fetch_fail
|
192
|
-
sleep self.sleep_between_fetch_retries if self.sleep_between_fetch_retries
|
193
|
-
logger.info 'Retrying fetch ....' if logger
|
194
|
-
retry
|
195
|
-
else
|
196
|
-
raise err
|
197
|
-
end
|
198
|
-
end
|
141
|
+
end).force_encoding("ISO-8859-1").encode("UTF-8")
|
199
142
|
end
|
200
|
-
|
143
|
+
|
201
144
|
# Returns a string, of the current URI's source code
|
202
145
|
def html_source
|
203
146
|
@html_source ||= fetch_uri uri if uri
|
204
147
|
@html_source
|
205
148
|
end
|
206
|
-
|
149
|
+
|
207
150
|
# Returns an Nokogiri parse, of the current URI
|
208
151
|
def html
|
209
152
|
@html ||= Nokogiri::HTML html_source, nil, HTML_ENCODING if html_source
|
210
153
|
@html
|
211
154
|
end
|
212
|
-
end
|
155
|
+
end
|
@@ -2,36 +2,36 @@ module LibcraigscrapeTestHelpers
|
|
2
2
|
def relative_uri_for(filename)
|
3
3
|
'file://%s/%s' % [File.dirname(File.expand_path(__FILE__)), filename]
|
4
4
|
end
|
5
|
-
|
5
|
+
|
6
6
|
def pp_assertions(obj, obj_name)
|
7
7
|
probable_accessors = (obj.methods-obj.class.superclass.methods)
|
8
8
|
|
9
9
|
puts
|
10
10
|
probable_accessors.sort.each do |m|
|
11
11
|
val = obj.send(m.to_sym)
|
12
|
-
|
12
|
+
|
13
13
|
# There's a good number of transformations worth doing here, I'll just start like this for now:
|
14
14
|
if val.kind_of? Time
|
15
15
|
# I've decided this is the the easiest way to understand and test a time
|
16
16
|
val = val.to_a
|
17
17
|
m = "#{m}.to_a"
|
18
18
|
end
|
19
|
-
|
20
|
-
if val.kind_of? Hash and val.length > 5
|
19
|
+
|
20
|
+
if val.kind_of? Hash and val.length > 5
|
21
21
|
puts "assert_equal %s, %s.%s.length" % [val.length.inspect,obj_name,m]
|
22
|
-
|
23
|
-
val.keys.sort{|a,b| a <=> b }.each do |k|
|
22
|
+
|
23
|
+
val.keys.sort{|a,b| a <=> b }.each do |k|
|
24
24
|
puts "assert_equal %s, %s.%s[%s]" % [val[k].inspect,obj_name,m,k.inspect]
|
25
25
|
end
|
26
26
|
# elsif val.kind_of? Array
|
27
27
|
# puts "assert_equal %s, %s.%s.length" % [val.length.inspect,obj_name,m]
|
28
|
-
#
|
29
|
-
# val.each_index do |i|
|
28
|
+
#
|
29
|
+
# val.each_index do |i|
|
30
30
|
# pp_assertions val[i], "%s.%s[%s]" % [obj_name,m,i.inspect]
|
31
31
|
# end
|
32
32
|
else
|
33
33
|
puts "assert_equal %s, %s.%s" % [val.inspect,obj_name,m]
|
34
34
|
end
|
35
|
-
end
|
35
|
+
end
|
36
36
|
end
|
37
|
-
end
|
37
|
+
end
|
@@ -6,13 +6,13 @@ require File.dirname(__FILE__)+'/libcraigscrape_test_helpers'
|
|
6
6
|
|
7
7
|
class CraigslistGeolistingTest < Test::Unit::TestCase
|
8
8
|
include LibcraigscrapeTestHelpers
|
9
|
-
|
9
|
+
|
10
10
|
def test_pukes
|
11
11
|
assert_raise(CraigScrape::Scraper::ParseError) do
|
12
12
|
CraigScrape::GeoListings.new( relative_uri_for('google.html') ).sites
|
13
13
|
end
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
def test_geo_listings
|
17
17
|
geo_listing_us070209 = CraigScrape::GeoListings.new relative_uri_for(
|
18
18
|
'geolisting_samples/geo_listing_us070209.html'
|
@@ -345,10 +345,10 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
|
|
345
345
|
assert_equal "youngstown.craigslist.org", geo_listing_us070209.sites["youngstown"]
|
346
346
|
assert_equal "yubasutter.craigslist.org", geo_listing_us070209.sites["yuba-sutter"]
|
347
347
|
assert_equal "yuma.craigslist.org", geo_listing_us070209.sites["yuma"]
|
348
|
-
|
348
|
+
|
349
349
|
geo_listing_cn070209 = CraigScrape::GeoListings.new relative_uri_for(
|
350
350
|
'geolisting_samples/geo_listing_cn070209.html'
|
351
|
-
)
|
351
|
+
)
|
352
352
|
assert_equal "china", geo_listing_cn070209.location
|
353
353
|
assert_equal 6, geo_listing_cn070209.sites.length
|
354
354
|
assert_equal "beijing.craigslist.com.cn", geo_listing_cn070209.sites["beijing"]
|
@@ -357,10 +357,10 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
|
|
357
357
|
assert_equal "hongkong.craigslist.org", geo_listing_cn070209.sites["hong kong"]
|
358
358
|
assert_equal "shanghai.craigslist.com.cn", geo_listing_cn070209.sites["shanghai"]
|
359
359
|
assert_equal "shenzhen.craigslist.org", geo_listing_cn070209.sites["shenzhen"]
|
360
|
-
|
360
|
+
|
361
361
|
geo_listing_ca070209 = CraigScrape::GeoListings.new relative_uri_for(
|
362
362
|
'geolisting_samples/geo_listing_ca070209.html'
|
363
|
-
)
|
363
|
+
)
|
364
364
|
assert_equal "canada", geo_listing_ca070209.location
|
365
365
|
assert_equal 47, geo_listing_ca070209.sites.length
|
366
366
|
assert_equal "barrie.craigslist.ca", geo_listing_ca070209.sites["barrie"]
|
@@ -410,28 +410,28 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
|
|
410
410
|
assert_equal "whistler.craigslist.ca", geo_listing_ca070209.sites["whistler, BC"]
|
411
411
|
assert_equal "windsor.craigslist.ca", geo_listing_ca070209.sites["windsor"]
|
412
412
|
assert_equal "winnipeg.craigslist.ca", geo_listing_ca070209.sites["winnipeg"]
|
413
|
-
|
413
|
+
|
414
414
|
geo_listing_ca_sk07020 = CraigScrape::GeoListings.new relative_uri_for(
|
415
415
|
'geolisting_samples/geo_listing_ca_sk070209.html'
|
416
|
-
)
|
416
|
+
)
|
417
417
|
assert_equal "canada", geo_listing_ca_sk07020.location
|
418
|
-
assert_equal(
|
419
|
-
{ "saskatoon" => "saskatoon.craigslist.ca", "regina" => "regina.craigslist.ca" },
|
418
|
+
assert_equal(
|
419
|
+
{ "saskatoon" => "saskatoon.craigslist.ca", "regina" => "regina.craigslist.ca" },
|
420
420
|
geo_listing_ca_sk07020.sites
|
421
421
|
)
|
422
422
|
end
|
423
|
-
|
423
|
+
|
424
424
|
def test_sites_in_path
|
425
425
|
# This was really tough to test, and in the end, I don't know just how useful this really is...
|
426
426
|
hier_dir = relative_uri_for 'geolisting_samples/hierarchy_test071009/'
|
427
|
-
|
427
|
+
|
428
428
|
%w(
|
429
|
-
us/fl/miami /us/fl/miami/ us/fl/miami/ /us/fl/miami us/fl/miami/nonsense
|
429
|
+
us/fl/miami /us/fl/miami/ us/fl/miami/ /us/fl/miami us/fl/miami/nonsense
|
430
430
|
us/fl/miami/nonsense/more-nonsense us/fl/miami/south\ florida
|
431
431
|
).each do |path|
|
432
432
|
assert_equal ["miami.craigslist.org"], CraigScrape::GeoListings.sites_in_path( path, hier_dir )
|
433
433
|
end
|
434
|
-
|
434
|
+
|
435
435
|
%w( us/fl /us/fl us/fl/ /us/fl/ ).each do |path|
|
436
436
|
assert_equal(
|
437
437
|
%w(
|
@@ -441,20 +441,20 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
|
|
441
441
|
CraigScrape::GeoListings.sites_in_path( path, hier_dir )
|
442
442
|
)
|
443
443
|
end
|
444
|
-
|
444
|
+
|
445
445
|
# This tests those escaped funky paths. I *think* this file-based test is actually indicative
|
446
446
|
# that the http-retrieval version works as well;
|
447
447
|
us_fl_mia_ftmeyers = CraigScrape::GeoListings.sites_in_path(
|
448
448
|
"us/fl/ft myers \\/ SW florida", hier_dir
|
449
449
|
)
|
450
450
|
assert_equal ["fortmyers.craigslist.org"], us_fl_mia_ftmeyers
|
451
|
-
|
451
|
+
|
452
452
|
# make sure we puke on obvious bad-stuff. I *think* this file-based test is actually indicative
|
453
453
|
# that the http-retrieval version works as well:
|
454
454
|
assert_raise(CraigScrape::GeoListings::BadGeoListingPath) do
|
455
455
|
CraigScrape::GeoListings.sites_in_path "us/fl/nonexist", hier_dir
|
456
456
|
end
|
457
|
-
|
457
|
+
|
458
458
|
assert_raise(CraigScrape::GeoListings::BadGeoListingPath) do
|
459
459
|
# You'll notice that we could actually guess a decent match, but we wont :
|
460
460
|
CraigScrape::GeoListings.sites_in_path "us/fl/miami/nonexist", hier_dir
|
@@ -465,57 +465,57 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
|
|
465
465
|
hier_dir = relative_uri_for 'geolisting_samples/hierarchy_test071009/'
|
466
466
|
|
467
467
|
assert_equal(
|
468
|
-
%w(miami.craigslist.org),
|
469
|
-
CraigScrape::GeoListings.find_sites(
|
470
|
-
["us/fl/south florida","+ us/fl/south florida", "-newyork.craigslist.org"],
|
468
|
+
%w(miami.craigslist.org),
|
469
|
+
CraigScrape::GeoListings.find_sites(
|
470
|
+
["us/fl/south florida","+ us/fl/south florida", "-newyork.craigslist.org"],
|
471
471
|
hier_dir
|
472
472
|
)
|
473
473
|
)
|
474
|
-
|
474
|
+
|
475
475
|
assert_equal(
|
476
476
|
%w(
|
477
|
-
jacksonville panamacity orlando fortmyers keys tallahassee ocala gainesville tampa
|
477
|
+
jacksonville panamacity orlando fortmyers keys tallahassee ocala gainesville tampa
|
478
478
|
pensacola daytona treasure sarasota staugustine spacecoast lakeland newyork
|
479
|
-
).collect{|p| "#{p}.craigslist.org"},
|
480
|
-
CraigScrape::GeoListings.find_sites( ["us/fl","-us/fl/miami", "+ newyork.craigslist.org"], hier_dir)
|
479
|
+
).collect{|p| "#{p}.craigslist.org"}.sort,
|
480
|
+
CraigScrape::GeoListings.find_sites( ["us/fl","-us/fl/miami", "+ newyork.craigslist.org"], hier_dir).sort
|
481
481
|
)
|
482
482
|
|
483
483
|
assert_equal(
|
484
484
|
%w(
|
485
|
-
westmd fortcollins charleston fayetteville dallas mendocino wichita valdosta terrahaute rockford erie
|
486
|
-
decatur cedarrapids stillwater collegestation charlestonwv albany sacramento houston kalamazoo fortsmith
|
485
|
+
westmd fortcollins charleston fayetteville dallas mendocino wichita valdosta terrahaute rockford erie
|
486
|
+
decatur cedarrapids stillwater collegestation charlestonwv albany sacramento houston kalamazoo fortsmith
|
487
487
|
maine minneapolis stockton pennstate bend grandisland palmsprings nmi waterloo topeka eastnc greenbay york
|
488
|
-
utica stgeorge oklahomacity grandrapids eastidaho lancaster gulfport sandiego reading kpr fresno iowacity
|
489
|
-
chicago tuscaloosa smd monterey yubasutter victoriatx sd knoxville gadsden jonesboro ksu youngstown toledo
|
490
|
-
lascruces annarbor danville delaware parkersburg appleton stcloud richmond muskegon jerseyshore redding
|
491
|
-
ithaca hartford evansville corpuschristi binghamton chico modesto lynchburg hattiesburg morgantown
|
492
|
-
harrisonburg lubbock carbondale florencesc imperial wenatchee semo savannah prescott lacrosse longisland
|
493
|
-
huntsville santabarbara janesville mankato santafe pullman louisville lexington brunswick duluth columbus
|
494
|
-
hudsonvalley pittsburgh wheeling westky waco shreveport eastoregon corvallis winstonsalem denver
|
495
|
-
tippecanoe newhaven shoals wv greenville lansing detroit athensohio easttexas sanantonio raleigh phoenix
|
496
|
-
honolulu inlandempire pueblo chattanooga lawton worcester twinfalls roseburg roanoke fredericksburg
|
497
|
-
annapolis asheville seattle scranton quadcities oregoncoast stlouis newyork mobile atlanta visalia
|
498
|
-
clarksville providence kansascity galveston madison bham harrisburg muncie bloomington anchorage ventura
|
499
|
-
up tricities rockies elpaso slo indianapolis fayar columbusga bellingham abilene wichitafalls boston
|
500
|
-
mcallen bn sierravista lasvegas sanmarcos nwct farmington mansfield jacksontn bgky altoona eugene
|
501
|
-
lafayette boone odessa spokane norfolk hickory burlington nashville lawrence hiltonhead elmira westernmass
|
502
|
-
southjersey myrtlebeach dothan goldcountry lincoln martinsburg dubuque brownsville washingtondc tucson
|
503
|
-
columbiamo jxn yakima sheboygan olympic humboldt newjersey cosprings springfield beaumont macon eauclaire
|
504
|
-
batonrouge buffalo mohave wilmington rochester sfbay northmiss bakersfield neworleans catskills wausau
|
505
|
-
akroncanton cnj merced chambana flint capecod nh yuma tulsa charlottesville easternshore desmoines
|
506
|
-
athensga austin newlondon outerbanks fortwayne dayton wyoming watertown provo medford texarkana cleveland
|
507
|
-
memphis amarillo limaohio augusta flagstaff jackson plattsburgh peoria skagit saltlakecity saginaw
|
508
|
-
portland syracuse swmi baltimore monroe littlerock boise laredo boulder philadelphia sandusky salem rmn
|
509
|
-
montgomery blacksburg centralmich logan albuquerque losangeles poconos westslope southbend siouxcity reno
|
510
|
-
porthuron greensboro orangecounty fargo ogden charlotte allentown joplin chautauqua lakecharles omaha
|
511
|
-
springfieldil roswell montana killeen milwaukee nd williamsport columbia racine southcoast ames huntington
|
488
|
+
utica stgeorge oklahomacity grandrapids eastidaho lancaster gulfport sandiego reading kpr fresno iowacity
|
489
|
+
chicago tuscaloosa smd monterey yubasutter victoriatx sd knoxville gadsden jonesboro ksu youngstown toledo
|
490
|
+
lascruces annarbor danville delaware parkersburg appleton stcloud richmond muskegon jerseyshore redding
|
491
|
+
ithaca hartford evansville corpuschristi binghamton chico modesto lynchburg hattiesburg morgantown
|
492
|
+
harrisonburg lubbock carbondale florencesc imperial wenatchee semo savannah prescott lacrosse longisland
|
493
|
+
huntsville santabarbara janesville mankato santafe pullman louisville lexington brunswick duluth columbus
|
494
|
+
hudsonvalley pittsburgh wheeling westky waco shreveport eastoregon corvallis winstonsalem denver
|
495
|
+
tippecanoe newhaven shoals wv greenville lansing detroit athensohio easttexas sanantonio raleigh phoenix
|
496
|
+
honolulu inlandempire pueblo chattanooga lawton worcester twinfalls roseburg roanoke fredericksburg
|
497
|
+
annapolis asheville seattle scranton quadcities oregoncoast stlouis newyork mobile atlanta visalia
|
498
|
+
clarksville providence kansascity galveston madison bham harrisburg muncie bloomington anchorage ventura
|
499
|
+
up tricities rockies elpaso slo indianapolis fayar columbusga bellingham abilene wichitafalls boston
|
500
|
+
mcallen bn sierravista lasvegas sanmarcos nwct farmington mansfield jacksontn bgky altoona eugene
|
501
|
+
lafayette boone odessa spokane norfolk hickory burlington nashville lawrence hiltonhead elmira westernmass
|
502
|
+
southjersey myrtlebeach dothan goldcountry lincoln martinsburg dubuque brownsville washingtondc tucson
|
503
|
+
columbiamo jxn yakima sheboygan olympic humboldt newjersey cosprings springfield beaumont macon eauclaire
|
504
|
+
batonrouge buffalo mohave wilmington rochester sfbay northmiss bakersfield neworleans catskills wausau
|
505
|
+
akroncanton cnj merced chambana flint capecod nh yuma tulsa charlottesville easternshore desmoines
|
506
|
+
athensga austin newlondon outerbanks fortwayne dayton wyoming watertown provo medford texarkana cleveland
|
507
|
+
memphis amarillo limaohio augusta flagstaff jackson plattsburgh peoria skagit saltlakecity saginaw
|
508
|
+
portland syracuse swmi baltimore monroe littlerock boise laredo boulder philadelphia sandusky salem rmn
|
509
|
+
montgomery blacksburg centralmich logan albuquerque losangeles poconos westslope southbend siouxcity reno
|
510
|
+
porthuron greensboro orangecounty fargo ogden charlotte allentown joplin chautauqua lakecharles omaha
|
511
|
+
springfieldil roswell montana killeen milwaukee nd williamsport columbia racine southcoast ames huntington
|
512
512
|
cincinnati auburn miami
|
513
|
-
).collect{|p| "#{p}.craigslist.org"},
|
513
|
+
).collect{|p| "#{p}.craigslist.org"}.sort,
|
514
514
|
CraigScrape::GeoListings.find_sites(
|
515
515
|
["us","- us/fl", "+ us/fl/miami", ' -jacksonville.craigslist.org'], hier_dir
|
516
|
-
)
|
516
|
+
).sort
|
517
517
|
)
|
518
|
-
|
518
|
+
|
519
519
|
end
|
520
520
|
|
521
521
|
end
|