libcraigscrape 0.6 → 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +8 -0
- data/Rakefile +6 -35
- data/lib/libcraigscrape.rb +90 -56
- data/test/listing_samples/empty_listings.html +128 -0
- data/test/post_samples/1207457727.html +92 -0
- data/test/post_samples/this_post_has_been_deleted_by_its_author.html +37 -0
- data/test/test_craigslist_listing.rb +67 -12
- metadata +5 -2
data/CHANGELOG
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
== Change Log
|
2
2
|
|
3
|
+
=== Release 0.6.5 (Jun 8, 2009)
|
4
|
+
- Added PostFull::deleted_by_author? , added test case for said condition
|
5
|
+
- Fixed a bug that caused the library to die in weird ways if there wasn't a title tag on a parsed page
|
6
|
+
- Apparently Craigslist starting gzip-encoding *some* listings. Added gzip decoding support
|
7
|
+
- Found a bug when parsing the location field on some full_posts in the apa sections
|
8
|
+
- Added support for file:// uri's int he scrape_* functions, and revised the tests to use these uri's
|
9
|
+
- Fixed a bug that caused errors to be raised with legitimately empty listing pages
|
10
|
+
|
3
11
|
=== Release 0.6.0 (May 21, 2009)
|
4
12
|
- Added PostFull::flagged_for_removal?
|
5
13
|
- Fixed a couple small parse bugs found in production
|
data/Rakefile
CHANGED
@@ -11,7 +11,7 @@ include FileUtils
|
|
11
11
|
RbConfig = Config unless defined? RbConfig
|
12
12
|
|
13
13
|
NAME = "libcraigscrape"
|
14
|
-
VERS = ENV['VERSION'] || "0.6"
|
14
|
+
VERS = ENV['VERSION'] || "0.6.5"
|
15
15
|
PKG = "#{NAME}-#{VERS}"
|
16
16
|
|
17
17
|
RDOC_OPTS = ['--quiet', '--title', 'The libcraigscrape Reference', '--main', 'README', '--inline-source']
|
@@ -57,8 +57,11 @@ Rake::RDocTask.new do |rdoc|
|
|
57
57
|
end
|
58
58
|
|
59
59
|
Rake::GemPackageTask.new(SPEC) do |p|
|
60
|
-
|
61
|
-
|
60
|
+
p.need_tar = false
|
61
|
+
p.need_tar_gz = true
|
62
|
+
p.need_tar_bz2 = true
|
63
|
+
p.need_zip = true
|
64
|
+
p.gem_spec = SPEC
|
62
65
|
end
|
63
66
|
|
64
67
|
task "lib" do
|
@@ -74,35 +77,3 @@ task :uninstall => [:clean] do
|
|
74
77
|
sh %{sudo gem uninstall #{NAME}}
|
75
78
|
end
|
76
79
|
|
77
|
-
task :pkg_archives do
|
78
|
-
base_dir = File.dirname __FILE__
|
79
|
-
package_name = '%s-%s' % [NAME,VERS]
|
80
|
-
packages_base = "#{base_dir}/pkg"
|
81
|
-
packaging_dir = '%s/%s' % [ packages_base,package_name ]
|
82
|
-
|
83
|
-
begin
|
84
|
-
# First we create a proper package-X.X directory:
|
85
|
-
PKG_FILES.each do |p_f|
|
86
|
-
base_file = '%s/%s' % [base_dir, p_f]
|
87
|
-
packaged_file = '%s/%s' % [packaging_dir, p_f]
|
88
|
-
packaged_file_dirname = File.dirname packaged_file
|
89
|
-
|
90
|
-
# We really don't care to do anything about these - we'll recreate it when/if its needed
|
91
|
-
next if File.directory? base_file
|
92
|
-
|
93
|
-
FileUtils.mkdir_p packaged_file_dirname unless File.directory? packaged_file_dirname
|
94
|
-
|
95
|
-
FileUtils.cp base_file, packaged_file unless File.exists? packaged_file
|
96
|
-
end
|
97
|
-
|
98
|
-
# Remove any old archives we'd be replacing:
|
99
|
-
%w(zip tar.bz2).each{ |ext| FileUtils.rm "#{packaging_dir}.#{ext}" if File.exist? "#{packaging_dir}.#{ext}" }
|
100
|
-
|
101
|
-
# Now let's create some archives:
|
102
|
-
sh %{cd #{packages_base} && tar -cjvf #{package_name}.tar.bz2 #{package_name}}
|
103
|
-
sh %{cd #{packages_base} && zip -r #{package_name}.zip #{package_name}}
|
104
|
-
ensure
|
105
|
-
# Delete that temp directory we created at the start here
|
106
|
-
FileUtils.rmtree packaging_dir
|
107
|
-
end
|
108
|
-
end
|
data/lib/libcraigscrape.rb
CHANGED
@@ -2,10 +2,11 @@
|
|
2
2
|
#
|
3
3
|
# All of libcraigscrape's objects and methods are loaded when you use <tt>require 'libcraigscrape'</tt> in your code.
|
4
4
|
#
|
5
|
+
require 'net/http'
|
6
|
+
require 'zlib'
|
5
7
|
|
6
8
|
require 'rubygems'
|
7
9
|
require 'hpricot'
|
8
|
-
require 'net/http'
|
9
10
|
require 'htmlentities'
|
10
11
|
require 'activesupport'
|
11
12
|
|
@@ -31,10 +32,14 @@ class CraigScrape
|
|
31
32
|
end
|
32
33
|
|
33
34
|
module ParseObjectHelper #:nodoc:
|
35
|
+
private
|
34
36
|
def he_decode(text)
|
35
37
|
HTMLEntities.new.decode text
|
36
38
|
end
|
37
39
|
end
|
40
|
+
|
41
|
+
class BadUrlError < StandardError #:nodoc:
|
42
|
+
end
|
38
43
|
|
39
44
|
class ParseError < StandardError #:nodoc:
|
40
45
|
end
|
@@ -92,7 +97,7 @@ class CraigScrape
|
|
92
97
|
|
93
98
|
title = page.at('title')
|
94
99
|
@title = he_decode title.inner_html if title
|
95
|
-
@title = nil if @title.length ==0
|
100
|
+
@title = nil if @title and @title.length ==0
|
96
101
|
|
97
102
|
@full_section = []
|
98
103
|
(page/"div[@class='bchead']//a").each do |a|
|
@@ -125,18 +130,56 @@ class CraigScrape
|
|
125
130
|
# This will make it easier for the next guy to work with if wants to parse out the information we're disgarding...
|
126
131
|
parse_craig_body Hpricot.parse(craigbody_as_s) if craigbody_as_s
|
127
132
|
|
133
|
+
# We'll first set these edge cases to false, unless the block below decides otherwise
|
134
|
+
@flagged_for_removal = false
|
135
|
+
@deleted_by_author = false
|
136
|
+
|
137
|
+
# Time to check for errors and edge cases
|
138
|
+
if [@contents,@posting_id,@post_time,@title].all?{|f| f.nil?}
|
139
|
+
case @header.gsub(HTML_TAG, "")
|
140
|
+
when "This posting has been flagged for removal"
|
141
|
+
@flagged_for_removal = true
|
142
|
+
when "This posting has been deleted by its author."
|
143
|
+
@deleted_by_author = true
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
128
147
|
# Validate that required fields are present:
|
129
|
-
raise ParseError, "Unable to parse PostFull: %s" % page.to_html if !flagged_for_removal? and [
|
148
|
+
raise ParseError, "Unable to parse PostFull: %s" % page.to_html if !flagged_for_removal? and !deleted_by_author? and [
|
130
149
|
@contents,@posting_id,@post_time,@header,@title,@full_section
|
131
150
|
].any?{|f| f.nil? or (f.respond_to? :length and f.length == 0)}
|
132
151
|
end
|
152
|
+
|
153
|
+
# Returns true if this Post was parsed, and merely a 'Flagged for Removal' page
|
154
|
+
def flagged_for_removal?; @flagged_for_removal; end
|
155
|
+
|
156
|
+
# Returns true if this Post was parsed, and represents a 'This posting has been deleted by its author.' notice
|
157
|
+
def deleted_by_author?; @deleted_by_author; end
|
158
|
+
|
159
|
+
# Returns the price (as float) of the item, as best ascertained by the post header
|
160
|
+
def price
|
161
|
+
$1.to_f if @title and @header and PRICE.match(@header.gsub(/#{@title}/, ''))
|
162
|
+
end
|
163
|
+
|
164
|
+
# Returns the post contents with all html tags removed
|
165
|
+
def contents_as_plain
|
166
|
+
@contents.gsub HTML_TAG, "" if @contents
|
167
|
+
end
|
168
|
+
|
169
|
+
private
|
133
170
|
|
134
171
|
# I left this here as a stub, since someone may want to parse more then what I'm currently scraping from this part of the page
|
135
172
|
def parse_craig_body(craigbody_els) #:nodoc:
|
136
173
|
# Location (when explicitly defined):
|
137
|
-
cursor = craigbody_els.at 'ul'
|
138
|
-
|
139
|
-
|
174
|
+
cursor = craigbody_els.at 'ul' unless @location
|
175
|
+
|
176
|
+
# Apa section includes other things in the li's (cats/dogs ok fields)
|
177
|
+
cursor.children.each do |li|
|
178
|
+
if LOCATION.match li.inner_html
|
179
|
+
@location = he_decode($1) and break
|
180
|
+
break
|
181
|
+
end
|
182
|
+
end if cursor
|
140
183
|
|
141
184
|
# Real estate listings can work a little different for location:
|
142
185
|
unless @location
|
@@ -151,24 +194,6 @@ class CraigScrape
|
|
151
194
|
|
152
195
|
@images = (img_table / 'img').collect{|i| i[:src]} if img_table
|
153
196
|
end
|
154
|
-
|
155
|
-
# Returns true if this Post was parsed, and merely a 'Flagged for Removal' page
|
156
|
-
def flagged_for_removal?
|
157
|
-
(
|
158
|
-
[@contents,@posting_id,@post_time,@title].all?{|f| f.nil?} and
|
159
|
-
@header.gsub(HTML_TAG, "") == "This posting has been flagged for removal"
|
160
|
-
)
|
161
|
-
end
|
162
|
-
|
163
|
-
# Returns the price (as float) of the item, as best ascertained by the post header
|
164
|
-
def price
|
165
|
-
$1.to_f if @title and @header and PRICE.match(@header.gsub(/#{@title}/, ''))
|
166
|
-
end
|
167
|
-
|
168
|
-
# Returns the post contents with all html tags removed
|
169
|
-
def contents_as_plain
|
170
|
-
@contents.gsub HTML_TAG, "" if @contents
|
171
|
-
end
|
172
197
|
end
|
173
198
|
|
174
199
|
# Listings represents a parsed Craigslist listing page and is generally returned by CraigScrape.scrape_listing
|
@@ -207,7 +232,7 @@ class CraigScrape
|
|
207
232
|
@next_page_href = next_link[:href] if next_link
|
208
233
|
|
209
234
|
# Validate that required fields are present:
|
210
|
-
raise ParseError, "Unable to parse Listings: %s" % page.to_html
|
235
|
+
raise ParseError, "Unable to parse Listings: %s" % page.to_html if tags_worth_parsing.length > 0 and @posts.length == 0
|
211
236
|
end
|
212
237
|
|
213
238
|
end
|
@@ -304,7 +329,7 @@ class CraigScrape
|
|
304
329
|
|
305
330
|
# Requests and returns the PostFull object that corresponds with this summary's full_url
|
306
331
|
def full_post
|
307
|
-
@full_post
|
332
|
+
@full_post ||= CraigScrape.scrape_full_post full_url if full_url
|
308
333
|
|
309
334
|
@full_post
|
310
335
|
end
|
@@ -366,42 +391,51 @@ class CraigScrape
|
|
366
391
|
def self.scrape_posts_since(listing_url, newer_then)
|
367
392
|
self.scrape_until(listing_url) {|post| post.date <= newer_then}
|
368
393
|
end
|
369
|
-
|
394
|
+
|
370
395
|
def self.fetch_url(uri) #:nodoc:
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
# This handles the redirects for us
|
375
|
-
uri_dest = ( uri.class == String ) ? URI.parse(uri) : uri
|
376
|
-
|
377
|
-
logger.info "Requesting: %s" % uri_dest.to_s if logger
|
378
|
-
|
379
|
-
resp, data = Net::HTTP.new( uri_dest.host, uri_dest.port).get uri_dest.request_uri, nil
|
380
|
-
|
381
|
-
if resp.response.code == "200"
|
382
|
-
data
|
383
|
-
elsif resp.response['Location']
|
384
|
-
redirect_to = resp.response['Location']
|
385
|
-
self.fetch_url(redirect_to)
|
386
|
-
else
|
387
|
-
# Sometimes Craigslist seems to return 404's for no good reason, and a subsequent fetch will give you what you want
|
388
|
-
error_description = 'Unable to fetch "%s" (%s)' % [ uri_dest.to_s, resp.response.code ]
|
396
|
+
uri_dest = ( uri.class == String ) ? URI.parse(uri) : uri
|
397
|
+
|
398
|
+
logger.info "Requesting: %s" % uri_dest.to_s if logger
|
389
399
|
|
390
|
-
|
400
|
+
case uri_dest.scheme
|
401
|
+
when 'file'
|
402
|
+
File.read uri_dest.path
|
403
|
+
when /^http[s]?/
|
404
|
+
fetch_attempts = 0
|
391
405
|
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
fetch_attempts += 1
|
406
|
+
begin
|
407
|
+
# This handles the redirects for us
|
408
|
+
resp, data = Net::HTTP.new( uri_dest.host, uri_dest.port).get uri_dest.request_uri, nil
|
396
409
|
|
397
|
-
|
398
|
-
|
399
|
-
|
410
|
+
if resp.response.code == "200"
|
411
|
+
# Check for gzip, and decode:
|
412
|
+
data = Zlib::GzipReader.new(StringIO.new(data)).read if resp.response.header['Content-Encoding'] == 'gzip'
|
413
|
+
|
414
|
+
data
|
415
|
+
elsif resp.response['Location']
|
416
|
+
redirect_to = resp.response['Location']
|
417
|
+
self.fetch_url(redirect_to)
|
418
|
+
else
|
419
|
+
# Sometimes Craigslist seems to return 404's for no good reason, and a subsequent fetch will give you what you want
|
420
|
+
error_description = 'Unable to fetch "%s" (%s)' % [ uri_dest.to_s, resp.response.code ]
|
421
|
+
|
422
|
+
logger.info error_description if logger
|
423
|
+
|
424
|
+
raise FetchError, error_description
|
425
|
+
end
|
426
|
+
rescue FetchError => err
|
427
|
+
fetch_attempts += 1
|
428
|
+
|
429
|
+
if retries_on_fetch_fail <= CraigScrape.retries_on_fetch_fail
|
430
|
+
sleep CraigScrape.sleep_between_fetch_retries if CraigScrape.sleep_between_fetch_retries
|
431
|
+
retry
|
432
|
+
else
|
433
|
+
raise err
|
434
|
+
end
|
435
|
+
end
|
400
436
|
else
|
401
|
-
raise
|
402
|
-
end
|
437
|
+
raise BadUrlError, "Unknown URI scheme for the url: #{uri_dest.to_s}"
|
403
438
|
end
|
404
|
-
|
405
439
|
end
|
406
440
|
|
407
441
|
def self.uri_from_href(base_uri, href) #:nodoc:
|
@@ -0,0 +1,128 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
2
|
+
<html><head>
|
3
|
+
<title>treasure coast arts/crafts for sale classifieds - craigslist</title>
|
4
|
+
|
5
|
+
<meta name="description" content="craigslist arts/crafts for sale classifieds for treasure coast ">
|
6
|
+
<meta name="keywords" content="treasure coast arts/crafts for sale craigslist, classifieds, want ads ">
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
<link rel=alternate type="application/rss+xml" href="index.rss" title="RSS feed for craigslist | arts/crafts for sale in treasure coast ">
|
11
|
+
<link rel="stylesheet" title="craigslist" href="http://www.craigslist.org/styles/craigslist.css" type="text/css" media="all">
|
12
|
+
</head>
|
13
|
+
|
14
|
+
<body class="toc">
|
15
|
+
|
16
|
+
<a name="top"></a>
|
17
|
+
|
18
|
+
<div class="bchead"><span id="ef">
|
19
|
+
|
20
|
+
[ <a href="http://www.craigslist.org/about/help/">help</a> ]
|
21
|
+
[ <a href="https://post.craigslist.org/psl/S">post</a> ]</span>
|
22
|
+
|
23
|
+
<a href="/"> treasure coast craigslist</a> > <a href="/art/">arts/crafts for sale</a></div>
|
24
|
+
|
25
|
+
<blockquote>
|
26
|
+
<form action="/search/art" method="get" onsubmit="ckCAbb();">
|
27
|
+
|
28
|
+
<script type="text/javascript"><!--
|
29
|
+
function ckCAbb() {
|
30
|
+
t = document.getElementById("cAbb");
|
31
|
+
if (t.value == "art") { t.disabled = true; }
|
32
|
+
}
|
33
|
+
-->
|
34
|
+
</script>
|
35
|
+
|
36
|
+
<table width="95%" cellpadding="2" style="white-space: nowrap; background:#eee; border:1px solid gray;" summary="">
|
37
|
+
<tr>
|
38
|
+
<td align="right" width="1">search for:</td>
|
39
|
+
<td width="30%"><input id="query" name="query" size="30" value=""> in:
|
40
|
+
<select id="cAbb" name="catAbbreviation">
|
41
|
+
<option value="ccc">all community<option value="eee">all event<option value="sss">all for sale / wanted<option disabled value="">--<option value="art" selected> art & crafts
|
42
|
+
<option value="pts"> auto parts
|
43
|
+
<option value="bab"> baby & kid stuff
|
44
|
+
<option value="bar"> barter
|
45
|
+
<option value="bik"> bicycles
|
46
|
+
<option value="boa"> boats
|
47
|
+
<option value="bks"> books
|
48
|
+
<option value="bfs"> business
|
49
|
+
<option value="cta"> cars & trucks - all
|
50
|
+
<option value="ctd"> cars & trucks - by dealer
|
51
|
+
<option value="cto"> cars & trucks - by owner
|
52
|
+
<option value="emd"> cds / dvds / vhs
|
53
|
+
<option value="clo"> clothing
|
54
|
+
<option value="clt"> collectibles
|
55
|
+
<option value="sys"> computers & tech
|
56
|
+
<option value="ele"> electronics
|
57
|
+
<option value="grd"> farm & garden
|
58
|
+
<option value="zip"> free stuff
|
59
|
+
<option value="fua"> furniture - all
|
60
|
+
<option value="fud"> furniture - by dealer
|
61
|
+
<option value="fuo"> furniture - by owner
|
62
|
+
<option value="tag"> games & toys
|
63
|
+
<option value="gms"> garage sales
|
64
|
+
<option value="for"> general
|
65
|
+
<option value="hsh"> household
|
66
|
+
<option value="wan"> items wanted
|
67
|
+
<option value="jwl"> jewelry
|
68
|
+
<option value="mat"> materials
|
69
|
+
<option value="mcy"> motorcycles/scooters
|
70
|
+
<option value="msg"> musical instruments
|
71
|
+
<option value="pho"> photo/video
|
72
|
+
<option value="rvs"> recreational vehicles
|
73
|
+
<option value="spo"> sporting goods
|
74
|
+
<option value="tix"> tickets
|
75
|
+
<option value="tls"> tools
|
76
|
+
<option disabled value="">--<option value="ggg">all gigs<option value="hhh">all housing<option value="jjj">all jobs<option value="ppp">all personals<option value="res">all resume<option value="bbb">all services offered</select>
|
77
|
+
<input type="submit" value="Search">
|
78
|
+
</td><td>
|
79
|
+
<label><input type="checkbox" name="srchType" value="T"
|
80
|
+
title="check this box to search only posting titles"> only search titles</label>
|
81
|
+
</td>
|
82
|
+
</tr>
|
83
|
+
|
84
|
+
<tr>
|
85
|
+
<td align="right" width="1">price:</td>
|
86
|
+
<td><input name="minAsk" size="6" value="min" onfocus="value=''"> <input name="maxAsk" size="6" value="max" onfocus="value=''"> </td>
|
87
|
+
<td align="left"><label><input type="checkbox" name="hasPic" value="1"> has image</label></td>
|
88
|
+
</tr></table></form></blockquote><span id="showPics"></span><span id="hidePics"></span>
|
89
|
+
|
90
|
+
<blockquote>
|
91
|
+
<table width="95%" summary="">
|
92
|
+
<tr>
|
93
|
+
<td valign="top">[ Mon, 08 Jun 17:37:29 ]</td>
|
94
|
+
<td valign="top" id="messages"><span class="hl"> [ <a href="http://www.recalls.gov/">avoid recalled items</a> ] </span> <span class="hl"> [ <a href="/about/prohibited.items">partial list of prohibited items</a> ] </span> <span class="hl"> [<a href="/cgi-bin/success.stories.cgi">success story?</a>]</span> <span class="hl"> [ <b><a href="/about/scams">AVOIDING SCAMS & FRAUD</a></b> ] </span> <span class="hl"> [ <b><a href="/about/safety">PERSONAL SAFETY TIPS</a></b> ] </span> </td>
|
95
|
+
</tr>
|
96
|
+
</table>
|
97
|
+
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
|
102
|
+
<div id="footer">
|
103
|
+
<hr>
|
104
|
+
<span id="copy">
|
105
|
+
Copyright © 2009 craigslist, inc.<br>
|
106
|
+
<a href="#top">Back to top of page</a>
|
107
|
+
</span>
|
108
|
+
<span class="rss">
|
109
|
+
<a class="l" href="http://treasure.craigslist.org/art/index.rss">RSS</a>
|
110
|
+
<a href="http://www.craigslist.org/about/rss">(?)</a><br>
|
111
|
+
<a class="y" href="http://add.my.yahoo.com/rss?url=http://treasure.craigslist.org/art/index.rss">add to My Yahoo!</a>
|
112
|
+
</span>
|
113
|
+
</div>
|
114
|
+
<br><br>
|
115
|
+
|
116
|
+
<div id="floater"> </div>
|
117
|
+
|
118
|
+
</blockquote>
|
119
|
+
<script type="text/javascript" src="http://www.craigslist.org/js/jquery.js"></script><script type="text/javascript" src="http://www.craigslist.org/js/tocs.js"></script>
|
120
|
+
<script type="text/javascript">
|
121
|
+
<!--
|
122
|
+
initImgs();
|
123
|
+
-->
|
124
|
+
</script>
|
125
|
+
|
126
|
+
|
127
|
+
</body>
|
128
|
+
</html>
|
@@ -0,0 +1,92 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<title>2bth for no deposit req</title>
|
5
|
+
<meta name="robots" content="NOARCHIVE,NOFOLLOW">
|
6
|
+
<link rel="stylesheet" title="craigslist" href="http://www.craigslist.org/styles/craigslist.css" type="text/css" media="all">
|
7
|
+
</head>
|
8
|
+
|
9
|
+
<body onload="initFlag(1207457727)" class="posting">
|
10
|
+
|
11
|
+
<div class="bchead">
|
12
|
+
<a id="ef" href="/email.friend?postingID=1207457727">email this posting to a friend</a>
|
13
|
+
<a href="http://miami.craigslist.org">south florida craigslist</a>
|
14
|
+
> <a href="/brw/">broward county</a> > <a href="/brw/apa/">apts/housing for rent</a>
|
15
|
+
</div>
|
16
|
+
|
17
|
+
<div id="flags">
|
18
|
+
<div id="flagMsg">
|
19
|
+
please <a href="http://www.craigslist.org/about/help/flags_and_community_moderation">flag</a> with care:
|
20
|
+
</div>
|
21
|
+
<div id="flagChooser">
|
22
|
+
<br>
|
23
|
+
<a class="fl" id="flag16" href="/flag/?flagCode=16&postingID=1207457727"
|
24
|
+
title="Wrong category, wrong site, discusses another post, or otherwise misplaced">
|
25
|
+
miscategorized</a>
|
26
|
+
<br>
|
27
|
+
|
28
|
+
<a class="fl" id="flag28" href="/flag/?flagCode=28&postingID=1207457727"
|
29
|
+
title="Violates craigslist Terms Of Use or other posted guidelines">
|
30
|
+
prohibited</a>
|
31
|
+
<br>
|
32
|
+
|
33
|
+
<a class="fl" id="flag15" href="/flag/?flagCode=15&postingID=1207457727"
|
34
|
+
title="Posted too frequently, in multiple cities/categories, or is too commercial">
|
35
|
+
spam/overpost</a>
|
36
|
+
<br>
|
37
|
+
|
38
|
+
<a class="fl" id="flag9" href="/flag/?flagCode=9&postingID=1207457727"
|
39
|
+
title="Should be considered for inclusion in the Best-Of-Craigslist">
|
40
|
+
best of craigslist</a>
|
41
|
+
<br>
|
42
|
+
</div>
|
43
|
+
</div>
|
44
|
+
|
45
|
+
<div id="tsb">
|
46
|
+
<a href="http://www.craigslist.org/about/FHA.html">Stating a discriminatory preference in a housing post is illegal - please flag discriminatory posts as prohibited</a></div> <div id="tsb"> <em>Avoid scams and fraud by dealing locally!</em> Beware any arrangement involving Western Union, Moneygram, wire transfer, or a landlord/owner who is out of the country or cannot meet you in person. <a href="http://www.craigslist.org/about/scams.html">More info</a></div>
|
47
|
+
|
48
|
+
|
49
|
+
<h2>$1350 / 3br - 2bth for no deposit req (Coral Springs)</h2>
|
50
|
+
<hr>
|
51
|
+
Reply to: <a href="mailto:hous-ccpap-1207457727@craigslist.org?subject=%241350%20%2F%203br%20-%202bth%20for%20no%20deposit%20req%20(Coral%20Springs)">hous-ccpap-1207457727@craigslist.org</a> <sup>[<a href="http://www.craigslist.org/about/help/replying_to_posts" target="_blank">Errors when replying to ads?</a>]</sup><br>
|
52
|
+
Date: 2009-06-05, 6:56PM EDT<br>
|
53
|
+
<br>
|
54
|
+
<br>
|
55
|
+
<div id="userbody">
|
56
|
+
<p><br>Call!! asking for a new owner.<br> no deposit required rent to own properties. <br> <br> Defaulting payment records are not a problem, <br> we will help you protect the previous owners credit history! 202-567-6371 <br><br></p>
|
57
|
+
|
58
|
+
|
59
|
+
<br><br><ul>
|
60
|
+
<li>cats are OK - purrr
|
61
|
+
<li>dogs are OK - wooof
|
62
|
+
<li> Location: Coral Springs
|
63
|
+
<li>it's NOT ok to contact this poster with services or other commercial interests</ul>
|
64
|
+
|
65
|
+
<table summary="craigslist hosted images">
|
66
|
+
<tr>
|
67
|
+
<td align="center"><img src="http://images.craigslist.org/3k43pe3o8ZZZZZZZZZ9655022102a3ea51624.jpg" alt="image 1207457727-0"></td>
|
68
|
+
<td align="center"><img src="http://images.craigslist.org/3n13m53p6ZZZZZZZZZ96596515e51237a179c.jpg" alt="image 1207457727-1"></td>
|
69
|
+
</tr>
|
70
|
+
<tr>
|
71
|
+
<td align="center"><img src="http://images.craigslist.org/3od3p33leZZZZZZZZZ9656d614da8e3a51dd9.jpg" alt="image 1207457727-2"></td>
|
72
|
+
<td align="center"><img src="http://images.craigslist.org/3pb3oa3leZZZZZZZZZ965eb60e4d2344019fb.jpg" alt="image 1207457727-3"></td>
|
73
|
+
</tr>
|
74
|
+
</table>
|
75
|
+
|
76
|
+
</div>
|
77
|
+
PostingID: 1207457727<br>
|
78
|
+
|
79
|
+
<br>
|
80
|
+
|
81
|
+
<hr>
|
82
|
+
<ul class="clfooter">
|
83
|
+
<li>Copyright © 2009 craigslist, inc.</li>
|
84
|
+
<li><a href="http://www.craigslist.org/about/terms.of.use.html">terms of use</a></li>
|
85
|
+
<li><a href="http://www.craigslist.org/about/privacy_policy">privacy policy</a></li>
|
86
|
+
<li><a href="/forums/?forumID=8">feedback forum</a></li>
|
87
|
+
</ul>
|
88
|
+
<script type="text/javascript" src="http://www.craigslist.org/js/jquery.js"></script>
|
89
|
+
<script type="text/javascript" src="http://www.craigslist.org/js/postings.js"></script>
|
90
|
+
</body>
|
91
|
+
</html>
|
92
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<title></title>
|
5
|
+
<meta name="robots" content="NOARCHIVE,NOFOLLOW" />
|
6
|
+
<link href="http://www.craigslist.org/styles/craigslist.css" title="craigslist" rel="stylesheet" media="all" type="text/css" />
|
7
|
+
</head>
|
8
|
+
|
9
|
+
<body class="posting" onload="initFlag(1187861811)">
|
10
|
+
|
11
|
+
<div class="bchead">
|
12
|
+
|
13
|
+
<a href="http://miami.craigslist.org">south florida craigslist</a>
|
14
|
+
> <a href="/brw/">broward county</a> > <a href="/brw/cto/">cars & trucks - by owner</a>
|
15
|
+
</div>
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
<hr />
|
20
|
+
<br />
|
21
|
+
<br />
|
22
|
+
<h2>This posting has been deleted by its author.</h2>
|
23
|
+
<h5>(The title on the listings page will be removed in just a few minutes.)</h5>
|
24
|
+
|
25
|
+
<br /><br />
|
26
|
+
|
27
|
+
<hr />
|
28
|
+
<ul class="clfooter">
|
29
|
+
<li>Copyright © 2009 craigslist, inc.</li>
|
30
|
+
<li><a href="http://www.craigslist.org/about/terms.of.use.html">terms of use</a></li>
|
31
|
+
<li><a href="http://www.craigslist.org/about/privacy_policy">privacy policy</a></li>
|
32
|
+
<li><a href="/forums/?forumID=8">feedback forum</a></li>
|
33
|
+
</ul>
|
34
|
+
<script src="http://www.craigslist.org/js/jquery.js" type="text/javascript"></script>
|
35
|
+
<script src="http://www.craigslist.org/js/postings.js" type="text/javascript"></script>
|
36
|
+
</body>
|
37
|
+
</html>
|
@@ -119,8 +119,8 @@ EOD
|
|
119
119
|
assert_equal 25.0, six.price
|
120
120
|
end
|
121
121
|
|
122
|
-
def test_listings_parse
|
123
|
-
category = CraigScrape
|
122
|
+
def test_listings_parse
|
123
|
+
category = CraigScrape.scrape_listing relative_uri_for('listing_samples/category_output.html')
|
124
124
|
assert_equal 'index100.html', category.next_page_href
|
125
125
|
assert_equal 100, category.posts.length
|
126
126
|
category.posts[0..80].each do |l|
|
@@ -128,19 +128,19 @@ EOD
|
|
128
128
|
assert_equal 18, l.date.day
|
129
129
|
end
|
130
130
|
|
131
|
-
category2 = CraigScrape
|
131
|
+
category2 = CraigScrape.scrape_listing relative_uri_for('listing_samples/category_output_2.html')
|
132
132
|
assert_equal 'index900.html', category2.next_page_href
|
133
133
|
assert_equal 100, category2.posts.length
|
134
134
|
|
135
|
-
long_search = CraigScrape
|
135
|
+
long_search = CraigScrape.scrape_listing relative_uri_for('listing_samples/long_search_output.html')
|
136
136
|
assert_equal '/search/rea?query=house&minAsk=min&maxAsk=max&bedrooms=&s=800', long_search.next_page_href
|
137
137
|
assert_equal 100, long_search.posts.length
|
138
138
|
|
139
|
-
short_search = CraigScrape
|
139
|
+
short_search = CraigScrape.scrape_listing relative_uri_for('listing_samples/short_search_output.html')
|
140
140
|
assert_equal nil, short_search.next_page_href
|
141
141
|
assert_equal 93, short_search.posts.length
|
142
142
|
|
143
|
-
mia_fua_index8900_052109 = CraigScrape
|
143
|
+
mia_fua_index8900_052109 = CraigScrape.scrape_listing relative_uri_for('listing_samples/mia_fua_index8900.5.21.09.html')
|
144
144
|
assert_equal 'index9000.html', mia_fua_index8900_052109.next_page_href
|
145
145
|
assert_equal 100, mia_fua_index8900_052109.posts.length
|
146
146
|
mia_fua_index8900_052109.posts[0..13].each do |l|
|
@@ -151,10 +151,14 @@ EOD
|
|
151
151
|
assert_equal 5, l.date.month
|
152
152
|
assert_equal 14, l.date.day
|
153
153
|
end
|
154
|
+
|
155
|
+
empty_listings = CraigScrape.scrape_listing relative_uri_for('listing_samples/empty_listings.html')
|
156
|
+
assert_equal nil, empty_listings.next_page_href
|
157
|
+
assert_equal [], empty_listings.posts
|
154
158
|
end
|
155
159
|
|
156
160
|
def test_posting_parse
|
157
|
-
posting0 = CraigScrape
|
161
|
+
posting0 = CraigScrape.scrape_full_post relative_uri_for('post_samples/posting0.html')
|
158
162
|
assert_equal "Has storage for videos/dvds. About 2 ft high by 21/2 ft widw. Almond/light beige color", posting0.contents
|
159
163
|
assert_equal ["south florida craigslist", "miami / dade", "furniture - by owner"], posting0.full_section
|
160
164
|
assert_equal "tv cart on wheels - $35 (NMB)", posting0.header
|
@@ -167,7 +171,7 @@ EOD
|
|
167
171
|
assert_equal "Has storage for videos/dvds. About 2 ft high by 21/2 ft widw. Almond/light beige color",posting0.contents_as_plain
|
168
172
|
assert_equal 35.0, posting0.price
|
169
173
|
|
170
|
-
posting1 = CraigScrape
|
174
|
+
posting1 = CraigScrape.scrape_full_post relative_uri_for('post_samples/posting1.html')
|
171
175
|
assert_equal "Residential income property\227Investors this property is for you! This duplex has a 2bedroom/1bath unit on each side. It features updated kitchens and baths (new tubs, toilet, sink, vanities), ceramic tile flooring throughout, separate water and electric meters and on site laundry facilities. It is also closed to the Galleria, beaches and downtown Fort Lauderdale! \r<br />\n\r<br />\nJe parle le Fran\347ais\r<br />\n\r<br />\nThis property is being offered by Blaunch Perrier, Broker Associate, Atlantic Properties International. Blaunch can be reached at 954-593-0077. For additional property information you may also visit www.garylanham.com\r<br />\n\r<br />", posting1.contents
|
172
176
|
assert_equal ["south florida craigslist", "broward county", "real estate - by broker"], posting1.full_section
|
173
177
|
assert_equal "$189900 / 4br - Investment Property--Duplex in Fort Lauderdale", posting1.header
|
@@ -180,7 +184,7 @@ EOD
|
|
180
184
|
assert_equal "Residential income property\227Investors this property is for you! This duplex has a 2bedroom/1bath unit on each side. It features updated kitchens and baths (new tubs, toilet, sink, vanities), ceramic tile flooring throughout, separate water and electric meters and on site laundry facilities. It is also closed to the Galleria, beaches and downtown Fort Lauderdale! \r\n\r\nJe parle le Fran\347ais\r\n\r\nThis property is being offered by Blaunch Perrier, Broker Associate, Atlantic Properties International. Blaunch can be reached at 954-593-0077. For additional property information you may also visit www.garylanham.com\r\n\r", posting1.contents_as_plain
|
181
185
|
assert_equal 189900.0, posting1.price
|
182
186
|
|
183
|
-
posting2 = CraigScrape
|
187
|
+
posting2 = CraigScrape.scrape_full_post relative_uri_for('post_samples/posting2.html')
|
184
188
|
assert_equal 15775, posting2.contents.length # This is easy, and probably fine enough
|
185
189
|
assert_equal ["south florida craigslist", "broward county", "cars & trucks - by dealer"], posting2.full_section
|
186
190
|
assert_equal "PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEE - $23975 (Fort Lauderdale)", posting2.header
|
@@ -193,7 +197,7 @@ EOD
|
|
193
197
|
assert_equal "\302\240 Sheehan Buick Pontiac GMC \302\240 Pompano Beach, FL(754) 224-3257 \302\240PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!2002 Chevrolet Corvette Z06 Florida Driven AutoCheck Certified 5.7L V8 6sp2 Door Coupe.\302\240Price: \302\240 $23,975Exterior:Electron Blue MetallicInterior:BlackStock#:P5110AVIN:1G1YY12S625129021FREE AutoCheck Vehicle ReportMileage:63,560Transmission:6 Speed ManualEngine:V8 5.7L OHVWarranty:Limited WarrantyTitle:Clear\302\273\302\240View All 58 Photos\302\273\302\240View Full Vehicle Details\302\273\302\240Ask the Seller a Question\302\273\302\240E-mail this to a Friend\302\240 DescriptionPRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!\r\n\r\nLOADED WITH BLACK LEATHER BUCKET SEATS, POWER DRIVERS SEAT, DUAL ZONE CLIMATE CONTROL, 4 WHEEL ABS BRAKES, POWER STEERING AND BRAKES, REAR LIMITED SLIP DIFFERENTIAL, STABILITY CONTROL, CRUISE CONTROL, TLT STEERING WHEEL, POWER WINDOWS AND LOCKS, AUTOMATIC ON/OFF HEADLAMPS, FOG LIGHTS, DUAL AIR BAG SAFETY, AM/FM STEREO CD PLAYER, INTERMITTENT WINDSHIELD WIPERS AND SO MUCH MORE - THIS CAR IS TOTALLY HOT WITH GREAT LOW MILES!\r\n\r\nPlease call us to make your deal now at 1-888-453-5244. Please visit our Website at www.sheehanautoplex.com ***View 50+ Pictures of this vehicle - a complete description including standard features and all added options & a FREE AUTO CHECK REPORT at www.sheehanautoplex.com. ***Financing for Everyone - Good credit - bad credit - divorce - charge off's - NO PROBLEM. To complete a secure credit application, please visit our website at www.sheehanautoplex.com ***The largest Dealer in the State of Florida - We export all over the world - For details please visit www.sheehanautoplex.com ***Sheehan Autoplex takes great pride in our outstanding customer service and has been recognized by the following associations - BBB (Better Business Bureau) - NIADA - and the FIADA. Call us to get your best deal. CALL NOW. 1-888-453-5244\302\240 Contact Sheehan Buick Pontiac GMCPhone:(754) 224-3257Fax:(954) 781-9050Phone:(754) 224-3257E-mail:sales@proauto.comBusiness HoursWeekdays:9:00 AM to 9:00 PMSat:9:00 AM to 6:00 PMSun:",posting2.contents_as_plain
|
194
198
|
assert_equal 23975.0, posting2.price
|
195
199
|
|
196
|
-
posting3 = CraigScrape
|
200
|
+
posting3 = CraigScrape.scrape_full_post relative_uri_for('post_samples/posting3.html')
|
197
201
|
assert_equal "1992 Twin Turbo 300ZX. This car is pearl white outside and Camel leather interior with suede accents. Motor was re-done from the ground up two years ago. 23,000 on new motor rebuild! New Leather seats and center arm rest done also two years ago. Has Alpine Am/Fm Cd with Ipod cable, Viper pager alarm New! JL Audio Amp & JLAudio sub box custom made. Mtx mids& highs component speakers sparate tweeter. Car runs strong & straight. Just detailed the interior. Exterior should be painted. This car once painted will sell for over $10,000. \r<br />\nCome get a great deal now! offers and trades will be considered. 786-303-6550 Manny", posting3.contents
|
198
202
|
assert_equal ["south florida craigslist", "miami / dade", "cars & trucks - by owner"], posting3.full_section
|
199
203
|
assert_equal "300ZX Nissan Twin Turbo 1992 - $5800 (N.Miami/ Hialeah)", posting3.header
|
@@ -207,7 +211,7 @@ EOD
|
|
207
211
|
assert_equal 5800.0, posting3.price
|
208
212
|
|
209
213
|
# This one ended up being quite a curveball since the user uploaded HTML was such junk:
|
210
|
-
posting4 = CraigScrape
|
214
|
+
posting4 = CraigScrape.scrape_full_post relative_uri_for('post_samples/posting4.html')
|
211
215
|
assert_equal 20640, posting4.contents.length
|
212
216
|
assert_equal ["south florida craigslist", "broward county", "real estate - by broker"], posting4.full_section
|
213
217
|
assert_equal "$225000 / 3br - Palm Aire Golf Corner Unit!", posting4.header
|
@@ -220,7 +224,7 @@ EOD
|
|
220
224
|
assert_equal 6399,posting4.contents_as_plain.length
|
221
225
|
assert_equal 225000.0, posting4.price
|
222
226
|
|
223
|
-
posting5 = CraigScrape
|
227
|
+
posting5 = CraigScrape.scrape_full_post relative_uri_for('post_samples/posting5.html')
|
224
228
|
assert_equal true, posting5.flagged_for_removal?
|
225
229
|
assert_equal nil, posting5.contents
|
226
230
|
assert_equal ["south florida craigslist", "palm beach co", "apts/housing for rent"], posting5.full_section
|
@@ -233,6 +237,35 @@ EOD
|
|
233
237
|
assert_equal [], posting5.images
|
234
238
|
assert_equal nil, posting5.contents_as_plain
|
235
239
|
assert_equal nil, posting5.price
|
240
|
+
|
241
|
+
posting_deleted = CraigScrape.scrape_full_post relative_uri_for('post_samples/this_post_has_been_deleted_by_its_author.html')
|
242
|
+
assert_equal true, posting_deleted.deleted_by_author?
|
243
|
+
assert_equal nil, posting_deleted.contents
|
244
|
+
assert_equal ["south florida craigslist", "broward county", "cars & trucks - by owner"], posting_deleted.full_section
|
245
|
+
assert_equal "This posting has been deleted by its author.", posting_deleted.header
|
246
|
+
assert_equal nil, posting_deleted.title
|
247
|
+
assert_equal nil, posting_deleted.location
|
248
|
+
assert_equal nil, posting_deleted.posting_id
|
249
|
+
assert_equal nil, posting_deleted.reply_to
|
250
|
+
assert_equal nil, posting_deleted.post_time
|
251
|
+
assert_equal [], posting_deleted.images
|
252
|
+
assert_equal nil, posting_deleted.contents_as_plain
|
253
|
+
assert_equal nil, posting_deleted.price
|
254
|
+
|
255
|
+
posting6 = CraigScrape.scrape_full_post relative_uri_for('post_samples/1207457727.html')
|
256
|
+
assert_equal "<p><br />Call!! asking for a new owner.<br /> no deposit required rent to own properties. <br /> <br /> Defaulting payment records are not a problem, <br /> we will help you protect the previous owners credit history! 202-567-6371 <br /><br /></p>",posting6.contents
|
257
|
+
assert_equal "Call!! asking for a new owner. no deposit required rent to own properties. Defaulting payment records are not a problem, we will help you protect the previous owners credit history! 202-567-6371 ",posting6.contents_as_plain
|
258
|
+
assert_equal false,posting6.deleted_by_author?
|
259
|
+
assert_equal false,posting6.flagged_for_removal?
|
260
|
+
assert_equal ["south florida craigslist", "broward county", "apts/housing for rent"],posting6.full_section
|
261
|
+
assert_equal "$1350 / 3br - 2bth for no deposit req (Coral Springs)",posting6.header
|
262
|
+
assert_equal ["http://images.craigslist.org/3k43pe3o8ZZZZZZZZZ9655022102a3ea51624.jpg", "http://images.craigslist.org/3n13m53p6ZZZZZZZZZ96596515e51237a179c.jpg", "http://images.craigslist.org/3od3p33leZZZZZZZZZ9656d614da8e3a51dd9.jpg", "http://images.craigslist.org/3pb3oa3leZZZZZZZZZ965eb60e4d2344019fb.jpg"],posting6.images
|
263
|
+
assert_equal 'Coral Springs',posting6.location
|
264
|
+
assert_equal [0, 56, 18, 5, 6, 2009, 5, 156, true, "EDT"],posting6.post_time.to_a
|
265
|
+
assert_equal 1207457727,posting6.posting_id
|
266
|
+
assert_equal 1350.0,posting6.price
|
267
|
+
assert_equal "hous-ccpap-1207457727@craigslist.org",posting6.reply_to
|
268
|
+
assert_equal "2bth for no deposit req",posting6.title
|
236
269
|
end
|
237
270
|
|
238
271
|
private
|
@@ -242,4 +275,26 @@ EOD
|
|
242
275
|
File.open('%s/%s' % [File.dirname(__FILE__), test_file]).read
|
243
276
|
)
|
244
277
|
end
|
278
|
+
|
279
|
+
def relative_uri_for(filename)
|
280
|
+
'file://%s/%s' % [File.dirname(File.expand_path(__FILE__)), filename]
|
281
|
+
end
|
282
|
+
|
283
|
+
def pp_assertions(obj, obj_name)
|
284
|
+
probable_accessors = (obj.methods-obj.class.superclass.methods)
|
285
|
+
|
286
|
+
puts
|
287
|
+
probable_accessors.sort.each do |m|
|
288
|
+
val = obj.send(m.to_sym)
|
289
|
+
|
290
|
+
# There's a good number of transformations worth doing here, I'll just start like this for now:
|
291
|
+
if val.kind_of? Time
|
292
|
+
# I've decided this is the the easiest way to understand and test a time
|
293
|
+
val = val.to_a
|
294
|
+
m = "#{m}.to_a"
|
295
|
+
end
|
296
|
+
|
297
|
+
puts "assert_equal %s, %s.%s" % [val.inspect,obj_name,m]
|
298
|
+
end
|
299
|
+
end
|
245
300
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libcraigscrape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.6.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris DeRose, DeRose Technologies, Inc.
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-06-08 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -68,6 +68,7 @@ files:
|
|
68
68
|
- test/listing_samples
|
69
69
|
- test/listing_samples/category_output.html
|
70
70
|
- test/listing_samples/short_search_output.html
|
71
|
+
- test/listing_samples/empty_listings.html
|
71
72
|
- test/listing_samples/mia_fua_index8900.5.21.09.html
|
72
73
|
- test/listing_samples/category_output_2.html
|
73
74
|
- test/listing_samples/long_search_output.html
|
@@ -78,6 +79,8 @@ files:
|
|
78
79
|
- test/post_samples/posting0.html
|
79
80
|
- test/post_samples/posting5.html
|
80
81
|
- test/post_samples/posting3.html
|
82
|
+
- test/post_samples/this_post_has_been_deleted_by_its_author.html
|
83
|
+
- test/post_samples/1207457727.html
|
81
84
|
- test/post_samples/posting2.html
|
82
85
|
- test/google.html
|
83
86
|
- lib/libcraigscrape.rb
|