olek-libcraigscrape 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +12 -6
- data/COPYING.LESSER +1 -1
- data/README +10 -10
- data/Rakefile +5 -54
- data/bin/craig_report_schema.yml +3 -3
- data/bin/craigwatch +32 -44
- data/bin/report_mailer/report.html.erb +17 -0
- data/bin/report_mailer/{craigslist_report.plain.erb → report.text.erb} +6 -6
- data/lib/geo_listings.rb +24 -24
- data/lib/libcraigscrape.rb +6 -11
- data/lib/listings.rb +62 -45
- data/lib/posting.rb +153 -106
- data/lib/scraper.rb +37 -94
- data/test/libcraigscrape_test_helpers.rb +10 -10
- data/test/test_craigslist_geolisting.rb +53 -53
- data/test/test_craigslist_listing.rb +26 -26
- data/test/test_craigslist_posting.rb +39 -38
- metadata +38 -114
- data/bin/report_mailer/craigslist_report.html.erb +0 -17
data/lib/scraper.rb
CHANGED
@@ -5,14 +5,14 @@
|
|
5
5
|
# - Basic http and connection handling methods
|
6
6
|
# - html utility methods used by objects
|
7
7
|
# - Common Errors
|
8
|
-
# You should never need to include this file directly, as all of libcraigscrape's objects and methods
|
8
|
+
# You should never need to include this file directly, as all of libcraigscrape's objects and methods
|
9
9
|
# are loaded when you use <tt>require 'libcraigscrape'</tt> in your code.
|
10
10
|
#
|
11
11
|
|
12
|
-
# Scraper is a general-pupose base class for all libcraigscrape Objects. Scraper facilitates all http-related
|
12
|
+
# Scraper is a general-pupose base class for all libcraigscrape Objects. Scraper facilitates all http-related
|
13
13
|
# functionality, and adds some useful helpers for dealing with eager-loading of http-objects and general html
|
14
14
|
# methods. It also contains the http-related cattr_accessors:
|
15
|
-
#
|
15
|
+
#
|
16
16
|
# <b>logger</b> - a Logger object to debug http notices too. Defaults to nil
|
17
17
|
#
|
18
18
|
# <b>retries_on_fetch_fail</b> - The number of times to retry a failed uri download. Defaults to 8
|
@@ -23,31 +23,22 @@
|
|
23
23
|
#
|
24
24
|
# <b>sleep_between_404_retries</b> - The amount of seconds to sleep, between successive attempts in the case of a Resource Not Found error. Defaults to 3.
|
25
25
|
#
|
26
|
+
|
26
27
|
class CraigScrape::Scraper
|
27
28
|
cattr_accessor :logger
|
28
|
-
cattr_accessor :sleep_between_fetch_retries
|
29
|
-
cattr_accessor :retries_on_fetch_fail
|
30
|
-
cattr_accessor :retries_on_404_fail
|
31
|
-
cattr_accessor :sleep_between_404_retries
|
32
|
-
cattr_accessor :maximum_redirects_per_request
|
33
29
|
|
34
30
|
URL_PARTS = /^(?:([^\:]+)\:\/\/([^\/]*))?(.*)$/
|
35
31
|
HTML_TAG = /<\/?[^>]*>/
|
36
|
-
# We have to specify this to nokogiri. Sometimes it tries to figure out encoding on its own, and craigslist users post crazy bytes sometimes
|
32
|
+
# We have to specify this to nokogiri. Sometimes it tries to figure out encoding on its own, and craigslist users post crazy bytes sometimes
|
37
33
|
HTML_ENCODING = "UTF-8"
|
38
34
|
|
35
|
+
HTTP_HEADERS = { "Cache-Control" => "no-cache", "Pragma" => "no-cache",
|
36
|
+
"Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
37
|
+
"User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.168 Safari/535.19"}
|
38
|
+
|
39
39
|
# Returns the full url that corresponds to this resource
|
40
40
|
attr_reader :url
|
41
41
|
|
42
|
-
# Set some defaults:
|
43
|
-
self.retries_on_fetch_fail = 8
|
44
|
-
self.sleep_between_fetch_retries = 30
|
45
|
-
|
46
|
-
self.retries_on_404_fail = 3
|
47
|
-
self.sleep_between_404_retries = 3
|
48
|
-
|
49
|
-
self.maximum_redirects_per_request = 20
|
50
|
-
|
51
42
|
class BadConstructionError < StandardError #:nodoc:
|
52
43
|
end
|
53
44
|
|
@@ -57,15 +48,9 @@ class CraigScrape::Scraper
|
|
57
48
|
class BadUrlError < StandardError #:nodoc:
|
58
49
|
end
|
59
50
|
|
60
|
-
class MaxRedirectError < StandardError #:nodoc:
|
61
|
-
end
|
62
|
-
|
63
51
|
class FetchError < StandardError #:nodoc:
|
64
52
|
end
|
65
|
-
|
66
|
-
class ResourceNotFoundError < StandardError #:nodoc:
|
67
|
-
end
|
68
|
-
|
53
|
+
|
69
54
|
# Scraper Objects can be created from either a full URL (string), or a Hash.
|
70
55
|
# Currently, this initializer isn't intended to be called from libcraigslist API users, though
|
71
56
|
# if you know what you're doing - feel free to try this out.
|
@@ -88,7 +73,7 @@ class CraigScrape::Scraper
|
|
88
73
|
raise BadConstructionError, ("Unrecognized parameter passed to %s.new %s}" % [self.class.to_s, init_via.class.inspect])
|
89
74
|
end
|
90
75
|
end
|
91
|
-
|
76
|
+
|
92
77
|
# Indicates whether the resource has yet been retrieved from its associated url.
|
93
78
|
# This is useful to distinguish whether the instance was instantiated for the purpose of an eager-load,
|
94
79
|
# but hasn't yet been fetched.
|
@@ -101,21 +86,27 @@ class CraigScrape::Scraper
|
|
101
86
|
end
|
102
87
|
|
103
88
|
private
|
104
|
-
|
89
|
+
|
105
90
|
# Returns text with all html tags removed.
|
106
91
|
def strip_html(str)
|
107
|
-
str.gsub HTML_TAG, "" if str
|
92
|
+
he_decode(str).gsub HTML_TAG, "" if str
|
108
93
|
end
|
109
|
-
|
94
|
+
|
110
95
|
# Easy way to fail noisily:
|
111
|
-
def parse_error
|
112
|
-
|
96
|
+
def parse_error!(fields = nil)
|
97
|
+
raise ParseError, "Error while parsing %s:\n %s%s" % [
|
98
|
+
self.class.to_s, html,
|
99
|
+
(fields) ? ("\nRequired fields missing: %s" % fields.join(', ')) : '']
|
100
|
+
end
|
101
|
+
|
113
102
|
# Returns text with all html entities converted to respective ascii character.
|
114
103
|
def he_decode(text); self.class.he_decode text; end
|
115
104
|
|
116
105
|
# Returns text with all html entities converted to respective ascii character.
|
117
|
-
def self.he_decode(text)
|
118
|
-
|
106
|
+
def self.he_decode(text)
|
107
|
+
HTMLEntities.new.decode text
|
108
|
+
end
|
109
|
+
|
119
110
|
# Derives a full url, using the current object's url and the provided href
|
120
111
|
def url_from_href(href) #:nodoc:
|
121
112
|
scheme, host, path = $1, $2, $3 if URL_PARTS.match href
|
@@ -132,81 +123,33 @@ class CraigScrape::Scraper
|
|
132
123
|
|
133
124
|
'%s://%s%s' % [scheme, host, path]
|
134
125
|
end
|
126
|
+
|
127
|
+
def fetch_uri(uri)
|
128
|
+
logger.info "Requesting: %s" % [@url.inspect] if logger
|
135
129
|
|
136
|
-
|
137
|
-
logger.info "Requesting (%d): %s" % [redirect_count, @url.inspect] if logger
|
138
|
-
|
139
|
-
raise MaxRedirectError, "Max redirects (#{redirect_count}) reached for URL: #{@url}" if redirect_count > self.maximum_redirects_per_request-1
|
140
|
-
|
141
|
-
case uri.scheme
|
130
|
+
(case uri.scheme
|
142
131
|
when 'file'
|
143
132
|
# If this is a directory, we'll try to approximate http a bit by loading a '/index.html'
|
144
|
-
File.read( File.directory?(uri.path) ?
|
133
|
+
File.read( File.directory?(uri.path) ?
|
134
|
+
"#{uri.path}/index.html" : uri.path , :encoding => 'BINARY')
|
145
135
|
when /^http[s]?/
|
146
|
-
|
136
|
+
resp = Typhoeus.get uri.to_s, :followlocation => true,
|
137
|
+
:headers => HTTP_HEADERS
|
138
|
+
resp.response_body
|
147
139
|
else
|
148
140
|
raise BadUrlError, "Unknown URI scheme for the url: #{@url}"
|
149
|
-
end
|
150
|
-
end
|
151
|
-
|
152
|
-
def fetch_http(uri, redirect_count = 0)
|
153
|
-
fetch_attempts = 0
|
154
|
-
resource_not_found_attempts = 0
|
155
|
-
|
156
|
-
begin
|
157
|
-
# This handles the redirects for us
|
158
|
-
resp, data = Net::HTTP.new( uri.host, uri.port).get uri.request_uri
|
159
|
-
|
160
|
-
if resp.response.code == "200"
|
161
|
-
# Check for gzip, and decode:
|
162
|
-
data = Zlib::GzipReader.new(StringIO.new(data)).read if resp.response.header['Content-Encoding'] == 'gzip'
|
163
|
-
|
164
|
-
data
|
165
|
-
elsif resp.response['Location']
|
166
|
-
redirect_to = resp.response['Location']
|
167
|
-
|
168
|
-
fetch_uri URI.parse(url_from_href(redirect_to)), redirect_count+1
|
169
|
-
else
|
170
|
-
# Sometimes Craigslist seems to return 404's for no good reason, and a subsequent fetch will give you what you want
|
171
|
-
raise ResourceNotFoundError, 'Unable to fetch "%s" (%s)' % [ @url, resp.response.code ]
|
172
|
-
end
|
173
|
-
rescue ResourceNotFoundError => err
|
174
|
-
logger.info err.message if logger
|
175
|
-
|
176
|
-
resource_not_found_attempts += 1
|
177
|
-
|
178
|
-
if resource_not_found_attempts <= self.retries_on_404_fail
|
179
|
-
sleep self.sleep_between_404_retries if self.sleep_between_404_retries
|
180
|
-
logger.info 'Retrying ....' if logger
|
181
|
-
retry
|
182
|
-
else
|
183
|
-
raise err
|
184
|
-
end
|
185
|
-
rescue FetchError,Timeout::Error,Errno::ECONNRESET,EOFError => err
|
186
|
-
logger.info 'Timeout error while requesting "%s"' % @url if logger and err.class == Timeout::Error
|
187
|
-
logger.info 'Connection reset while requesting "%s"' % @url if logger and err.class == Errno::ECONNRESET
|
188
|
-
|
189
|
-
fetch_attempts += 1
|
190
|
-
|
191
|
-
if fetch_attempts <= self.retries_on_fetch_fail
|
192
|
-
sleep self.sleep_between_fetch_retries if self.sleep_between_fetch_retries
|
193
|
-
logger.info 'Retrying fetch ....' if logger
|
194
|
-
retry
|
195
|
-
else
|
196
|
-
raise err
|
197
|
-
end
|
198
|
-
end
|
141
|
+
end).force_encoding("ISO-8859-1").encode("UTF-8")
|
199
142
|
end
|
200
|
-
|
143
|
+
|
201
144
|
# Returns a string, of the current URI's source code
|
202
145
|
def html_source
|
203
146
|
@html_source ||= fetch_uri uri if uri
|
204
147
|
@html_source
|
205
148
|
end
|
206
|
-
|
149
|
+
|
207
150
|
# Returns an Nokogiri parse, of the current URI
|
208
151
|
def html
|
209
152
|
@html ||= Nokogiri::HTML html_source, nil, HTML_ENCODING if html_source
|
210
153
|
@html
|
211
154
|
end
|
212
|
-
end
|
155
|
+
end
|
@@ -2,36 +2,36 @@ module LibcraigscrapeTestHelpers
|
|
2
2
|
def relative_uri_for(filename)
|
3
3
|
'file://%s/%s' % [File.dirname(File.expand_path(__FILE__)), filename]
|
4
4
|
end
|
5
|
-
|
5
|
+
|
6
6
|
def pp_assertions(obj, obj_name)
|
7
7
|
probable_accessors = (obj.methods-obj.class.superclass.methods)
|
8
8
|
|
9
9
|
puts
|
10
10
|
probable_accessors.sort.each do |m|
|
11
11
|
val = obj.send(m.to_sym)
|
12
|
-
|
12
|
+
|
13
13
|
# There's a good number of transformations worth doing here, I'll just start like this for now:
|
14
14
|
if val.kind_of? Time
|
15
15
|
# I've decided this is the the easiest way to understand and test a time
|
16
16
|
val = val.to_a
|
17
17
|
m = "#{m}.to_a"
|
18
18
|
end
|
19
|
-
|
20
|
-
if val.kind_of? Hash and val.length > 5
|
19
|
+
|
20
|
+
if val.kind_of? Hash and val.length > 5
|
21
21
|
puts "assert_equal %s, %s.%s.length" % [val.length.inspect,obj_name,m]
|
22
|
-
|
23
|
-
val.keys.sort{|a,b| a <=> b }.each do |k|
|
22
|
+
|
23
|
+
val.keys.sort{|a,b| a <=> b }.each do |k|
|
24
24
|
puts "assert_equal %s, %s.%s[%s]" % [val[k].inspect,obj_name,m,k.inspect]
|
25
25
|
end
|
26
26
|
# elsif val.kind_of? Array
|
27
27
|
# puts "assert_equal %s, %s.%s.length" % [val.length.inspect,obj_name,m]
|
28
|
-
#
|
29
|
-
# val.each_index do |i|
|
28
|
+
#
|
29
|
+
# val.each_index do |i|
|
30
30
|
# pp_assertions val[i], "%s.%s[%s]" % [obj_name,m,i.inspect]
|
31
31
|
# end
|
32
32
|
else
|
33
33
|
puts "assert_equal %s, %s.%s" % [val.inspect,obj_name,m]
|
34
34
|
end
|
35
|
-
end
|
35
|
+
end
|
36
36
|
end
|
37
|
-
end
|
37
|
+
end
|
@@ -6,13 +6,13 @@ require File.dirname(__FILE__)+'/libcraigscrape_test_helpers'
|
|
6
6
|
|
7
7
|
class CraigslistGeolistingTest < Test::Unit::TestCase
|
8
8
|
include LibcraigscrapeTestHelpers
|
9
|
-
|
9
|
+
|
10
10
|
def test_pukes
|
11
11
|
assert_raise(CraigScrape::Scraper::ParseError) do
|
12
12
|
CraigScrape::GeoListings.new( relative_uri_for('google.html') ).sites
|
13
13
|
end
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
def test_geo_listings
|
17
17
|
geo_listing_us070209 = CraigScrape::GeoListings.new relative_uri_for(
|
18
18
|
'geolisting_samples/geo_listing_us070209.html'
|
@@ -345,10 +345,10 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
|
|
345
345
|
assert_equal "youngstown.craigslist.org", geo_listing_us070209.sites["youngstown"]
|
346
346
|
assert_equal "yubasutter.craigslist.org", geo_listing_us070209.sites["yuba-sutter"]
|
347
347
|
assert_equal "yuma.craigslist.org", geo_listing_us070209.sites["yuma"]
|
348
|
-
|
348
|
+
|
349
349
|
geo_listing_cn070209 = CraigScrape::GeoListings.new relative_uri_for(
|
350
350
|
'geolisting_samples/geo_listing_cn070209.html'
|
351
|
-
)
|
351
|
+
)
|
352
352
|
assert_equal "china", geo_listing_cn070209.location
|
353
353
|
assert_equal 6, geo_listing_cn070209.sites.length
|
354
354
|
assert_equal "beijing.craigslist.com.cn", geo_listing_cn070209.sites["beijing"]
|
@@ -357,10 +357,10 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
|
|
357
357
|
assert_equal "hongkong.craigslist.org", geo_listing_cn070209.sites["hong kong"]
|
358
358
|
assert_equal "shanghai.craigslist.com.cn", geo_listing_cn070209.sites["shanghai"]
|
359
359
|
assert_equal "shenzhen.craigslist.org", geo_listing_cn070209.sites["shenzhen"]
|
360
|
-
|
360
|
+
|
361
361
|
geo_listing_ca070209 = CraigScrape::GeoListings.new relative_uri_for(
|
362
362
|
'geolisting_samples/geo_listing_ca070209.html'
|
363
|
-
)
|
363
|
+
)
|
364
364
|
assert_equal "canada", geo_listing_ca070209.location
|
365
365
|
assert_equal 47, geo_listing_ca070209.sites.length
|
366
366
|
assert_equal "barrie.craigslist.ca", geo_listing_ca070209.sites["barrie"]
|
@@ -410,28 +410,28 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
|
|
410
410
|
assert_equal "whistler.craigslist.ca", geo_listing_ca070209.sites["whistler, BC"]
|
411
411
|
assert_equal "windsor.craigslist.ca", geo_listing_ca070209.sites["windsor"]
|
412
412
|
assert_equal "winnipeg.craigslist.ca", geo_listing_ca070209.sites["winnipeg"]
|
413
|
-
|
413
|
+
|
414
414
|
geo_listing_ca_sk07020 = CraigScrape::GeoListings.new relative_uri_for(
|
415
415
|
'geolisting_samples/geo_listing_ca_sk070209.html'
|
416
|
-
)
|
416
|
+
)
|
417
417
|
assert_equal "canada", geo_listing_ca_sk07020.location
|
418
|
-
assert_equal(
|
419
|
-
{ "saskatoon" => "saskatoon.craigslist.ca", "regina" => "regina.craigslist.ca" },
|
418
|
+
assert_equal(
|
419
|
+
{ "saskatoon" => "saskatoon.craigslist.ca", "regina" => "regina.craigslist.ca" },
|
420
420
|
geo_listing_ca_sk07020.sites
|
421
421
|
)
|
422
422
|
end
|
423
|
-
|
423
|
+
|
424
424
|
def test_sites_in_path
|
425
425
|
# This was really tough to test, and in the end, I don't know just how useful this really is...
|
426
426
|
hier_dir = relative_uri_for 'geolisting_samples/hierarchy_test071009/'
|
427
|
-
|
427
|
+
|
428
428
|
%w(
|
429
|
-
us/fl/miami /us/fl/miami/ us/fl/miami/ /us/fl/miami us/fl/miami/nonsense
|
429
|
+
us/fl/miami /us/fl/miami/ us/fl/miami/ /us/fl/miami us/fl/miami/nonsense
|
430
430
|
us/fl/miami/nonsense/more-nonsense us/fl/miami/south\ florida
|
431
431
|
).each do |path|
|
432
432
|
assert_equal ["miami.craigslist.org"], CraigScrape::GeoListings.sites_in_path( path, hier_dir )
|
433
433
|
end
|
434
|
-
|
434
|
+
|
435
435
|
%w( us/fl /us/fl us/fl/ /us/fl/ ).each do |path|
|
436
436
|
assert_equal(
|
437
437
|
%w(
|
@@ -441,20 +441,20 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
|
|
441
441
|
CraigScrape::GeoListings.sites_in_path( path, hier_dir )
|
442
442
|
)
|
443
443
|
end
|
444
|
-
|
444
|
+
|
445
445
|
# This tests those escaped funky paths. I *think* this file-based test is actually indicative
|
446
446
|
# that the http-retrieval version works as well;
|
447
447
|
us_fl_mia_ftmeyers = CraigScrape::GeoListings.sites_in_path(
|
448
448
|
"us/fl/ft myers \\/ SW florida", hier_dir
|
449
449
|
)
|
450
450
|
assert_equal ["fortmyers.craigslist.org"], us_fl_mia_ftmeyers
|
451
|
-
|
451
|
+
|
452
452
|
# make sure we puke on obvious bad-stuff. I *think* this file-based test is actually indicative
|
453
453
|
# that the http-retrieval version works as well:
|
454
454
|
assert_raise(CraigScrape::GeoListings::BadGeoListingPath) do
|
455
455
|
CraigScrape::GeoListings.sites_in_path "us/fl/nonexist", hier_dir
|
456
456
|
end
|
457
|
-
|
457
|
+
|
458
458
|
assert_raise(CraigScrape::GeoListings::BadGeoListingPath) do
|
459
459
|
# You'll notice that we could actually guess a decent match, but we wont :
|
460
460
|
CraigScrape::GeoListings.sites_in_path "us/fl/miami/nonexist", hier_dir
|
@@ -465,57 +465,57 @@ class CraigslistGeolistingTest < Test::Unit::TestCase
|
|
465
465
|
hier_dir = relative_uri_for 'geolisting_samples/hierarchy_test071009/'
|
466
466
|
|
467
467
|
assert_equal(
|
468
|
-
%w(miami.craigslist.org),
|
469
|
-
CraigScrape::GeoListings.find_sites(
|
470
|
-
["us/fl/south florida","+ us/fl/south florida", "-newyork.craigslist.org"],
|
468
|
+
%w(miami.craigslist.org),
|
469
|
+
CraigScrape::GeoListings.find_sites(
|
470
|
+
["us/fl/south florida","+ us/fl/south florida", "-newyork.craigslist.org"],
|
471
471
|
hier_dir
|
472
472
|
)
|
473
473
|
)
|
474
|
-
|
474
|
+
|
475
475
|
assert_equal(
|
476
476
|
%w(
|
477
|
-
jacksonville panamacity orlando fortmyers keys tallahassee ocala gainesville tampa
|
477
|
+
jacksonville panamacity orlando fortmyers keys tallahassee ocala gainesville tampa
|
478
478
|
pensacola daytona treasure sarasota staugustine spacecoast lakeland newyork
|
479
|
-
).collect{|p| "#{p}.craigslist.org"},
|
480
|
-
CraigScrape::GeoListings.find_sites( ["us/fl","-us/fl/miami", "+ newyork.craigslist.org"], hier_dir)
|
479
|
+
).collect{|p| "#{p}.craigslist.org"}.sort,
|
480
|
+
CraigScrape::GeoListings.find_sites( ["us/fl","-us/fl/miami", "+ newyork.craigslist.org"], hier_dir).sort
|
481
481
|
)
|
482
482
|
|
483
483
|
assert_equal(
|
484
484
|
%w(
|
485
|
-
westmd fortcollins charleston fayetteville dallas mendocino wichita valdosta terrahaute rockford erie
|
486
|
-
decatur cedarrapids stillwater collegestation charlestonwv albany sacramento houston kalamazoo fortsmith
|
485
|
+
westmd fortcollins charleston fayetteville dallas mendocino wichita valdosta terrahaute rockford erie
|
486
|
+
decatur cedarrapids stillwater collegestation charlestonwv albany sacramento houston kalamazoo fortsmith
|
487
487
|
maine minneapolis stockton pennstate bend grandisland palmsprings nmi waterloo topeka eastnc greenbay york
|
488
|
-
utica stgeorge oklahomacity grandrapids eastidaho lancaster gulfport sandiego reading kpr fresno iowacity
|
489
|
-
chicago tuscaloosa smd monterey yubasutter victoriatx sd knoxville gadsden jonesboro ksu youngstown toledo
|
490
|
-
lascruces annarbor danville delaware parkersburg appleton stcloud richmond muskegon jerseyshore redding
|
491
|
-
ithaca hartford evansville corpuschristi binghamton chico modesto lynchburg hattiesburg morgantown
|
492
|
-
harrisonburg lubbock carbondale florencesc imperial wenatchee semo savannah prescott lacrosse longisland
|
493
|
-
huntsville santabarbara janesville mankato santafe pullman louisville lexington brunswick duluth columbus
|
494
|
-
hudsonvalley pittsburgh wheeling westky waco shreveport eastoregon corvallis winstonsalem denver
|
495
|
-
tippecanoe newhaven shoals wv greenville lansing detroit athensohio easttexas sanantonio raleigh phoenix
|
496
|
-
honolulu inlandempire pueblo chattanooga lawton worcester twinfalls roseburg roanoke fredericksburg
|
497
|
-
annapolis asheville seattle scranton quadcities oregoncoast stlouis newyork mobile atlanta visalia
|
498
|
-
clarksville providence kansascity galveston madison bham harrisburg muncie bloomington anchorage ventura
|
499
|
-
up tricities rockies elpaso slo indianapolis fayar columbusga bellingham abilene wichitafalls boston
|
500
|
-
mcallen bn sierravista lasvegas sanmarcos nwct farmington mansfield jacksontn bgky altoona eugene
|
501
|
-
lafayette boone odessa spokane norfolk hickory burlington nashville lawrence hiltonhead elmira westernmass
|
502
|
-
southjersey myrtlebeach dothan goldcountry lincoln martinsburg dubuque brownsville washingtondc tucson
|
503
|
-
columbiamo jxn yakima sheboygan olympic humboldt newjersey cosprings springfield beaumont macon eauclaire
|
504
|
-
batonrouge buffalo mohave wilmington rochester sfbay northmiss bakersfield neworleans catskills wausau
|
505
|
-
akroncanton cnj merced chambana flint capecod nh yuma tulsa charlottesville easternshore desmoines
|
506
|
-
athensga austin newlondon outerbanks fortwayne dayton wyoming watertown provo medford texarkana cleveland
|
507
|
-
memphis amarillo limaohio augusta flagstaff jackson plattsburgh peoria skagit saltlakecity saginaw
|
508
|
-
portland syracuse swmi baltimore monroe littlerock boise laredo boulder philadelphia sandusky salem rmn
|
509
|
-
montgomery blacksburg centralmich logan albuquerque losangeles poconos westslope southbend siouxcity reno
|
510
|
-
porthuron greensboro orangecounty fargo ogden charlotte allentown joplin chautauqua lakecharles omaha
|
511
|
-
springfieldil roswell montana killeen milwaukee nd williamsport columbia racine southcoast ames huntington
|
488
|
+
utica stgeorge oklahomacity grandrapids eastidaho lancaster gulfport sandiego reading kpr fresno iowacity
|
489
|
+
chicago tuscaloosa smd monterey yubasutter victoriatx sd knoxville gadsden jonesboro ksu youngstown toledo
|
490
|
+
lascruces annarbor danville delaware parkersburg appleton stcloud richmond muskegon jerseyshore redding
|
491
|
+
ithaca hartford evansville corpuschristi binghamton chico modesto lynchburg hattiesburg morgantown
|
492
|
+
harrisonburg lubbock carbondale florencesc imperial wenatchee semo savannah prescott lacrosse longisland
|
493
|
+
huntsville santabarbara janesville mankato santafe pullman louisville lexington brunswick duluth columbus
|
494
|
+
hudsonvalley pittsburgh wheeling westky waco shreveport eastoregon corvallis winstonsalem denver
|
495
|
+
tippecanoe newhaven shoals wv greenville lansing detroit athensohio easttexas sanantonio raleigh phoenix
|
496
|
+
honolulu inlandempire pueblo chattanooga lawton worcester twinfalls roseburg roanoke fredericksburg
|
497
|
+
annapolis asheville seattle scranton quadcities oregoncoast stlouis newyork mobile atlanta visalia
|
498
|
+
clarksville providence kansascity galveston madison bham harrisburg muncie bloomington anchorage ventura
|
499
|
+
up tricities rockies elpaso slo indianapolis fayar columbusga bellingham abilene wichitafalls boston
|
500
|
+
mcallen bn sierravista lasvegas sanmarcos nwct farmington mansfield jacksontn bgky altoona eugene
|
501
|
+
lafayette boone odessa spokane norfolk hickory burlington nashville lawrence hiltonhead elmira westernmass
|
502
|
+
southjersey myrtlebeach dothan goldcountry lincoln martinsburg dubuque brownsville washingtondc tucson
|
503
|
+
columbiamo jxn yakima sheboygan olympic humboldt newjersey cosprings springfield beaumont macon eauclaire
|
504
|
+
batonrouge buffalo mohave wilmington rochester sfbay northmiss bakersfield neworleans catskills wausau
|
505
|
+
akroncanton cnj merced chambana flint capecod nh yuma tulsa charlottesville easternshore desmoines
|
506
|
+
athensga austin newlondon outerbanks fortwayne dayton wyoming watertown provo medford texarkana cleveland
|
507
|
+
memphis amarillo limaohio augusta flagstaff jackson plattsburgh peoria skagit saltlakecity saginaw
|
508
|
+
portland syracuse swmi baltimore monroe littlerock boise laredo boulder philadelphia sandusky salem rmn
|
509
|
+
montgomery blacksburg centralmich logan albuquerque losangeles poconos westslope southbend siouxcity reno
|
510
|
+
porthuron greensboro orangecounty fargo ogden charlotte allentown joplin chautauqua lakecharles omaha
|
511
|
+
springfieldil roswell montana killeen milwaukee nd williamsport columbia racine southcoast ames huntington
|
512
512
|
cincinnati auburn miami
|
513
|
-
).collect{|p| "#{p}.craigslist.org"},
|
513
|
+
).collect{|p| "#{p}.craigslist.org"}.sort,
|
514
514
|
CraigScrape::GeoListings.find_sites(
|
515
515
|
["us","- us/fl", "+ us/fl/miami", ' -jacksonville.craigslist.org'], hier_dir
|
516
|
-
)
|
516
|
+
).sort
|
517
517
|
)
|
518
|
-
|
518
|
+
|
519
519
|
end
|
520
520
|
|
521
521
|
end
|