olek-libcraigscrape 1.1.0.4 → 1.1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/craigwatch +10 -12
- data/lib/libcraigscrape.rb +7 -4
- data/lib/posting.rb +8 -3
- data/lib/scraper.rb +3 -8
- data/spec/listings_spec.rb +5 -3
- data/spec/postings_spec.rb +0 -6
- data/test/test_craigslist_posting.rb +15 -15
- metadata +2 -3
- data/spec/assets/posting_page_not_found_120512.html +0 -160
data/bin/craigwatch
CHANGED
@@ -171,6 +171,7 @@ require 'action_mailer'
|
|
171
171
|
require 'kwalify/util/hashlike'
|
172
172
|
require 'libcraigscrape'
|
173
173
|
require "socket"
|
174
|
+
require 'active_support/all'
|
174
175
|
|
175
176
|
class String #:nodoc:
|
176
177
|
RE = /^\/(.*)\/([ixm]*)$/
|
@@ -252,9 +253,9 @@ class CraigReportDefinition #:nodoc:
|
|
252
253
|
def price_required?; @price_required; end
|
253
254
|
|
254
255
|
def starting_at
|
255
|
-
(@starting) ?
|
256
|
-
Time.strptime(@starting, "%m/%d/%Y") :
|
257
|
-
Time.now.yesterday.
|
256
|
+
((@starting) ?
|
257
|
+
Time.strptime([@starting, 'UTC'].join(' '), "%m/%d/%Y %Z") :
|
258
|
+
Time.zone.now.yesterday).to_date
|
258
259
|
end
|
259
260
|
|
260
261
|
def passes_filter?(post)
|
@@ -339,7 +340,6 @@ class TrackedListing < ActiveRecord::Base #:nodoc:
|
|
339
340
|
end
|
340
341
|
|
341
342
|
def delete_posts_older_than(cutoff_date)
|
342
|
-
# TODO: can't I use posts.delete 'created_at < ?' and keep it cleaner?
|
343
343
|
TrackedPost.delete_all [ 'tracked_listing_id = ? AND created_at < ?', self.id, cutoff_date ]
|
344
344
|
end
|
345
345
|
end
|
@@ -360,12 +360,10 @@ class TrackedPost < ActiveRecord::Base #:nodoc:
|
|
360
360
|
end
|
361
361
|
|
362
362
|
class ReportMailer < ActionMailer::Base #:nodoc:
|
363
|
-
# default :template_path => File.dirname(__FILE__)
|
364
|
-
|
365
363
|
def report(to, sender, subject_template, report_tmpl)
|
366
|
-
subject = Time.now.strftime subject_template
|
367
364
|
@summaries = report_tmpl[:summaries]
|
368
|
-
|
365
|
+
|
366
|
+
mail :to => to, :subject => Time.zone.now.strftime(subject_template), :from => sender
|
369
367
|
end
|
370
368
|
end
|
371
369
|
|
@@ -472,11 +470,11 @@ report_summaries = craig_report.searches.collect do |search|
|
|
472
470
|
already_tracked_urls = tracked_listing.posts.collect{|tp| tp.url}
|
473
471
|
|
474
472
|
# We'll use this in the loop to decide what posts to track:
|
475
|
-
newest_post_date = last_tracked_at
|
473
|
+
newest_post_date = last_tracked_at.to_date
|
476
474
|
|
477
475
|
# We keep track of post.post_date here, b/c in some circumstances, you can be in the below loop
|
478
476
|
# but have no post.post_date since the posting was removed and it parsed to nil
|
479
|
-
most_recent_posting_date = Time.now
|
477
|
+
most_recent_posting_date = Time.zone.now.to_date
|
480
478
|
|
481
479
|
# OK - Now let's go!
|
482
480
|
catch :list_break do
|
@@ -487,7 +485,7 @@ report_summaries = craig_report.searches.collect do |search|
|
|
487
485
|
|
488
486
|
# Are we at a point in the scrape, past which we don't need to proceed?
|
489
487
|
throw :list_break if (
|
490
|
-
most_recent_posting_date < last_tracked_at or
|
488
|
+
most_recent_posting_date.to_time < last_tracked_at or
|
491
489
|
already_tracked_urls.include? post.url
|
492
490
|
)
|
493
491
|
|
@@ -496,7 +494,7 @@ report_summaries = craig_report.searches.collect do |search|
|
|
496
494
|
!new_summaries.has_key? post.url and
|
497
495
|
search.passes_filter? post
|
498
496
|
)
|
499
|
-
rescue CraigScrape::Scraper::ResourceNotFoundError
|
497
|
+
rescue CraigScrape::Scraper::ResourceNotFoundError => e
|
500
498
|
# Sometimes we do end up with 404's that will never load, and we dont want to
|
501
499
|
# abort a run simply b/c we found some anomaly due to the craigslist index.
|
502
500
|
# being out of date. This ResourceNotFoundError can occur due to
|
data/lib/libcraigscrape.rb
CHANGED
@@ -7,10 +7,13 @@ require 'time'
|
|
7
7
|
require 'uri'
|
8
8
|
require 'htmlentities'
|
9
9
|
require 'active_support/core_ext/class/attribute_accessors'
|
10
|
+
require 'active_support/core_ext/time/calculations'
|
10
11
|
require 'htmlentities'
|
11
12
|
require 'nokogiri'
|
12
13
|
require 'typhoeus'
|
13
14
|
|
15
|
+
Time.zone = 'UTC'
|
16
|
+
|
14
17
|
# A base class encapsulating the various libcraigscrape objects, and providing most of the
|
15
18
|
# craigslist interaction methods. Currently, we're supporting the old Class methods
|
16
19
|
# in a legacy-compatibility mode, but these methods are marked for deprecation. Instead,
|
@@ -101,7 +104,7 @@ class CraigScrape
|
|
101
104
|
ret = []
|
102
105
|
fragments.each do |frag|
|
103
106
|
each_post(frag) do |p|
|
104
|
-
break if p.
|
107
|
+
break if p.post_time <= newer_then
|
105
108
|
ret << p
|
106
109
|
end
|
107
110
|
end
|
@@ -196,11 +199,11 @@ class CraigScrape
|
|
196
199
|
|
197
200
|
# Returns the most recentlt expired time for the provided month and day
|
198
201
|
def self.most_recently_expired_time(month, day) #:nodoc:
|
199
|
-
now = (time_now) ? time_now : Time.now
|
202
|
+
now = (time_now) ? time_now : Time.zone.now
|
200
203
|
|
201
204
|
# This ensures we always generate a time in the past, by guessing the year and subtracting one if we guessed wrong
|
202
|
-
ret = Time.local now.year, month, day
|
203
|
-
ret = Time.local now.year-1, month, day if ret > now
|
205
|
+
ret = Time.zone.local now.year, month, day
|
206
|
+
ret = Time.zone.local now.year-1, month, day if ret > now
|
204
207
|
|
205
208
|
ret
|
206
209
|
end
|
data/lib/posting.rb
CHANGED
@@ -17,7 +17,12 @@ class CraigScrape::Posting < CraigScrape::Scraper
|
|
17
17
|
HEADER_LOCATION = /^.+[ ]*\-[ ]*[\$]?[\d]+[ ]*\((.+)\)$/
|
18
18
|
POSTING_ID = /PostingID\:[ ]*([\d]+)/
|
19
19
|
REPLY_TO = /(.+)/
|
20
|
+
<<<<<<< HEAD
|
20
21
|
PRICE = /((?:^\$[\d]+(?:\.[\d]{2})?)|(?:\$[\d]+(?:\.[\d]{2})?$))/
|
22
|
+
|
23
|
+
=======
|
24
|
+
PRICE = /((?:^\$[\d]+(?:\.[\d]{2})?)|(?:\$[\d]+(?:\.[\d]{2})?))/
|
25
|
+
>>>>>>> bded128... Removed end of line ($) in price regex, and switched price method to use header. Fixes Real Estate listings.
|
21
26
|
# NOTE: we implement the (?:) to first check the 'old' style format, and then the 'new style'
|
22
27
|
# (As of 12/03's parse changes)
|
23
28
|
USERBODY_PARTS = /^(.+)\<div id\=\"userbody\">(.+)\<br[ ]*[\/]?\>\<br[ ]*[\/]?\>(.+)\<\/div\>(.+)$/m
|
@@ -105,7 +110,7 @@ class CraigScrape::Posting < CraigScrape::Scraper
|
|
105
110
|
unless @post_time
|
106
111
|
cursor = html_head.at 'hr' if html_head
|
107
112
|
cursor = cursor.next until cursor.nil? or POST_DATE.match cursor.to_s
|
108
|
-
@post_time = Time.parse $1 if $1
|
113
|
+
@post_time = Time.zone.parse $1 if $1
|
109
114
|
end
|
110
115
|
|
111
116
|
@post_time
|
@@ -239,7 +244,7 @@ class CraigScrape::Posting < CraigScrape::Scraper
|
|
239
244
|
# Reflects only the date portion of the posting. Does not include hours/minutes. This is useful when reflecting the listing scrapes, and can be safely
|
240
245
|
# used if you wish conserve bandwidth by not pulling an entire post from a listing scrape.
|
241
246
|
def post_date
|
242
|
-
@post_date =
|
247
|
+
@post_date = post_time.to_date unless @post_date or post_time.nil?
|
243
248
|
|
244
249
|
@post_date
|
245
250
|
end
|
@@ -297,7 +302,7 @@ class CraigScrape::Posting < CraigScrape::Scraper
|
|
297
302
|
# Returns the best-guess of a price, judging by the label's contents. Price is available when pulled from the listing summary
|
298
303
|
# and can be safely used if you wish conserve bandwidth by not pulling an entire post from a listing scrape.
|
299
304
|
def price
|
300
|
-
$1.tr('$','').to_f if
|
305
|
+
$1.tr('$','').to_f if header and PRICE.match header
|
301
306
|
end
|
302
307
|
|
303
308
|
# Returns the post contents with all html tags removed
|
data/lib/scraper.rb
CHANGED
@@ -15,14 +15,6 @@
|
|
15
15
|
#
|
16
16
|
# <b>logger</b> - a Logger object to debug http notices too. Defaults to nil
|
17
17
|
#
|
18
|
-
# <b>retries_on_fetch_fail</b> - The number of times to retry a failed uri download. Defaults to 8
|
19
|
-
#
|
20
|
-
# <b>sleep_between_fetch_retries</b> - The amount of seconds to sleep, between successive attempts in the case of a failed download. Defaults to 30.
|
21
|
-
#
|
22
|
-
# <b>retries_on_404_fail</b> - The number of times to retry a Resource Not Found error (http Response code 404). Defaults to 3.
|
23
|
-
#
|
24
|
-
# <b>sleep_between_404_retries</b> - The amount of seconds to sleep, between successive attempts in the case of a Resource Not Found error. Defaults to 3.
|
25
|
-
#
|
26
18
|
|
27
19
|
class CraigScrape::Scraper
|
28
20
|
cattr_accessor :logger
|
@@ -50,6 +42,9 @@ class CraigScrape::Scraper
|
|
50
42
|
|
51
43
|
class FetchError < StandardError #:nodoc:
|
52
44
|
end
|
45
|
+
|
46
|
+
class ResourceNotFoundError < StandardError #:nodoc:
|
47
|
+
end
|
53
48
|
|
54
49
|
# Scraper Objects can be created from either a full URL (string), or a Hash.
|
55
50
|
# Currently, this initializer isn't intended to be called from libcraigslist API users, though
|
data/spec/listings_spec.rb
CHANGED
@@ -5,16 +5,18 @@ describe CraigScrape::Listings do
|
|
5
5
|
context "listing_cta_ftl_112612.html" do
|
6
6
|
subject { described_class.new( uri_for('listing_cta_ftl_112612.html') ) }
|
7
7
|
specify{ subject.posts.should have(100).items }
|
8
|
-
specify{ subject.posts.collect(&:post_date).uniq.should eq([Time.parse('2012-11-26 00:00:00
|
8
|
+
specify{ subject.posts.collect(&:post_date).uniq.should eq([Time.zone.parse('2012-11-26 00:00:00')]) }
|
9
9
|
specify{ subject.next_page_href.should eq('index100.html') }
|
10
|
+
|
11
|
+
|
10
12
|
end
|
11
13
|
|
12
14
|
context 'listing_search_ppa_nyc_121212.html' do
|
13
15
|
subject { described_class.new( uri_for('listing_search_ppa_nyc_121212.html') ) }
|
14
16
|
|
15
17
|
specify{ subject.posts.should have(100).items }
|
16
|
-
specify{ subject.posts.collect(&:post_date).uniq.should eq(['2012-12-12 00:00:00
|
17
|
-
'2012-12-11 00:00:00
|
18
|
+
specify{ subject.posts.collect(&:post_date).uniq.should eq(['2012-12-12 00:00:00',
|
19
|
+
'2012-12-11 00:00:00', '2012-12-10 00:00:00'].collect{|t| Time.zone.parse(t) }) }
|
18
20
|
specify{ subject.next_page_href.should eq('http://newyork.craigslist.org/search/ppa?query=kenmore&srchType=A&s=100') }
|
19
21
|
end
|
20
22
|
end
|
data/spec/postings_spec.rb
CHANGED
@@ -8,12 +8,6 @@ describe CraigScrape::Posting do
|
|
8
8
|
its(:posting_has_expired?){ should be_true }
|
9
9
|
end
|
10
10
|
|
11
|
-
context "posting_page_not_found_120512.html" do
|
12
|
-
subject{ described_class.new uri_for('posting_page_not_found_120512.html') }
|
13
|
-
|
14
|
-
its(:system_post?){ should be_true }
|
15
|
-
end
|
16
|
-
|
17
11
|
context "posting_sya_121012.html" do
|
18
12
|
# This example was picked since it has pics
|
19
13
|
subject{ described_class.new uri_for('posting_sya_121012.html') }
|
@@ -146,7 +146,7 @@ EOD
|
|
146
146
|
assert_equal "NMB", posting0.location
|
147
147
|
assert_equal 1131363612, posting0.posting_id
|
148
148
|
assert_equal "sale-ktf9w-1131363612@craigslist.org", posting0.reply_to
|
149
|
-
assert_equal [0, 21, 13, 20, 4, 2009, 1, 110, true, "
|
149
|
+
assert_equal [0, 21, 13, 20, 4, 2009, 1, 110, true, "UTC"], posting0.post_time.to_a
|
150
150
|
assert_equal [], posting0.pics
|
151
151
|
assert_equal "Has storage for videos/dvds. About 2 ft high by 21/2 ft widw. Almond/light beige color",posting0.contents_as_plain
|
152
152
|
assert_equal 35.0, posting0.price
|
@@ -162,7 +162,7 @@ EOD
|
|
162
162
|
assert_equal '1000 NE 14th Pl', posting1.location
|
163
163
|
assert_equal 1131242195, posting1.posting_id
|
164
164
|
assert_equal "hous-5nzhq-1131242195@craigslist.org", posting1.reply_to
|
165
|
-
assert_equal [0, 33, 13, 20, 4, 2009, 1, 110, true, "
|
165
|
+
assert_equal [0, 33, 13, 20, 4, 2009, 1, 110, true, "UTC"], posting1.post_time.to_a
|
166
166
|
assert_equal %w(http://images.craigslist.org/3n83o33l5ZZZZZZZZZ94k913ac1582d4b1fa4.jpg http://images.craigslist.org/3n93p63obZZZZZZZZZ94k19d5e32eb3b610c2.jpg http://images.craigslist.org/3n93m03l6ZZZZZZZZZ94k6e9785e37a1b1f3f.jpg http://images.craigslist.org/3ma3oc3l4ZZZZZZZZZ94kbfecbcd2fb2e19cc.jpg), posting1.pics
|
167
167
|
assert_equal "Residential income property\u0097Investors this property is for you! This duplex has a 2bedroom/1bath unit on each side. It features updated kitchens and baths (new tubs, toilet, sink, vanities), ceramic tile flooring throughout, separate water and electric meters and on site laundry facilities. It is also closed to the Galleria, beaches and downtown Fort Lauderdale! \r\n\r\nJe parle le Français\r\n\r\nThis property is being offered by Blaunch Perrier, Broker Associate, Atlantic Properties International. Blaunch can be reached at 954-593-0077. For additional property information you may also visit www.garylanham.com\r\n\r", posting1.contents_as_plain
|
168
168
|
assert_equal 189900.0, posting1.price
|
@@ -179,7 +179,7 @@ EOD
|
|
179
179
|
assert_equal 'Fort Lauderdale', posting2.location
|
180
180
|
assert_equal 1127037648, posting2.posting_id
|
181
181
|
assert_equal nil, posting2.reply_to
|
182
|
-
assert_equal [0, 16, 14, 17, 4, 2009, 5, 107, true, "
|
182
|
+
assert_equal [0, 16, 14, 17, 4, 2009, 5, 107, true, "UTC"], posting2.post_time.to_a
|
183
183
|
assert_equal [], posting2.pics
|
184
184
|
assert_equal "\302\240 Sheehan Buick Pontiac GMC \302\240 Pompano Beach, FL(754) 224-3257 \302\240PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!2002 Chevrolet Corvette Z06 Florida Driven AutoCheck Certified 5.7L V8 6sp2 Door Coupe.\302\240Price: \302\240 $23,975Exterior:Electron Blue MetallicInterior:BlackStock#:P5110AVIN:1G1YY12S625129021FREE AutoCheck Vehicle ReportMileage:63,560Transmission:6 Speed ManualEngine:V8 5.7L OHVWarranty:Limited WarrantyTitle:Clear\302\273\302\240View All 58 Photos\302\273\302\240View Full Vehicle Details\302\273\302\240Ask the Seller a Question\302\273\302\240E-mail this to a Friend\302\240 DescriptionPRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!\r\n\r\nLOADED WITH BLACK LEATHER BUCKET SEATS, POWER DRIVERS SEAT, DUAL ZONE CLIMATE CONTROL, 4 WHEEL ABS BRAKES, POWER STEERING AND BRAKES, REAR LIMITED SLIP DIFFERENTIAL, STABILITY CONTROL, CRUISE CONTROL, TLT STEERING WHEEL, POWER WINDOWS AND LOCKS, AUTOMATIC ON/OFF HEADLAMPS, FOG LIGHTS, DUAL AIR BAG SAFETY, AM/FM STEREO CD PLAYER, INTERMITTENT WINDSHIELD WIPERS AND SO MUCH MORE - THIS CAR IS TOTALLY HOT WITH GREAT LOW MILES!\r\n\r\nPlease call us to make your deal now at 1-888-453-5244. Please visit our Website at www.sheehanautoplex.com ***View 50+ Pictures of this vehicle - a complete description including standard features and all added options & a FREE AUTO CHECK REPORT at www.sheehanautoplex.com. ***Financing for Everyone - Good credit - bad credit - divorce - charge off's - NO PROBLEM. To complete a secure credit application, please visit our website at www.sheehanautoplex.com ***The largest Dealer in the State of Florida - We export all over the world - For details please visit www.sheehanautoplex.com ***Sheehan Autoplex takes great pride in our outstanding customer service and has been recognized by the following associations - BBB (Better Business Bureau) - NIADA - and the FIADA. Call us to get your best deal. CALL NOW. 1-888-453-5244\302\240 Contact Sheehan Buick Pontiac GMCPhone:(754) 224-3257Fax:(954) 781-9050Phone:(754) 224-3257E-mail:sales@proauto.comBusiness HoursWeekdays:9:00 AM to 9:00 PMSat:9:00 AM to 6:00 PMSun:",posting2.contents_as_plain
|
185
185
|
assert_equal 23975.0, posting2.price
|
@@ -195,7 +195,7 @@ EOD
|
|
195
195
|
assert_equal "N.Miami/ Hialeah", posting3.location
|
196
196
|
assert_equal 1130212403, posting3.posting_id
|
197
197
|
assert_equal "sale-c9bpa-1130212403@craigslist.org", posting3.reply_to
|
198
|
-
assert_equal [0, 21, 18, 19, 4, 2009, 0, 109, true, "
|
198
|
+
assert_equal [0, 21, 18, 19, 4, 2009, 0, 109, true, "UTC"], posting3.post_time.to_a
|
199
199
|
assert_equal %w(http://images.craigslist.org/3n23kf3lfZZZZZZZZZ94j1160e7d7b0601934.jpg http://images.craigslist.org/3nc3kf3p2ZZZZZZZZZ94j04fbc71e0a551ace.jpg http://images.craigslist.org/3nc3k33l7ZZZZZZZZZ94k13d8d7b1024e1e0e.jpg http://images.craigslist.org/3n23k63mfZZZZZZZZZ94k7838ae5d48d91eb8.jpg), posting3.pics
|
200
200
|
assert_equal "1992 Twin Turbo 300ZX. This car is pearl white outside and Camel leather interior with suede accents. Motor was re-done from the ground up two years ago. 23,000 on new motor rebuild! New Leather seats and center arm rest done also two years ago. Has Alpine Am/Fm Cd with Ipod cable, Viper pager alarm New! JL Audio Amp & JLAudio sub box custom made. Mtx mids& highs component speakers sparate tweeter. Car runs strong & straight. Just detailed the interior. Exterior should be painted. This car once painted will sell for over $10,000. \r\nCome get a great deal now! offers and trades will be considered. 786-303-6550 Manny",posting3.contents_as_plain
|
201
201
|
assert_equal 5800.0, posting3.price
|
@@ -213,7 +213,7 @@ EOD
|
|
213
213
|
assert_equal nil, posting4.location
|
214
214
|
assert_equal 1139303170, posting4.posting_id
|
215
215
|
assert_equal "hous-sk9f2-1139303170@craigslist.org", posting4.reply_to
|
216
|
-
assert_equal [0, 8, 9, 25, 4, 2009, 6, 115, true, "
|
216
|
+
assert_equal [0, 8, 9, 25, 4, 2009, 6, 115, true, "UTC"], posting4.post_time.to_a
|
217
217
|
assert_equal [], posting4.pics
|
218
218
|
assert_equal 6321,posting4.contents_as_plain.length
|
219
219
|
assert_equal 225000.0, posting4.price
|
@@ -264,7 +264,7 @@ EOD
|
|
264
264
|
assert_equal "$1350 / 3br - 2bth for no deposit req",posting6.label
|
265
265
|
assert_equal ["http://images.craigslist.org/3k43pe3o8ZZZZZZZZZ9655022102a3ea51624.jpg", "http://images.craigslist.org/3n13m53p6ZZZZZZZZZ96596515e51237a179c.jpg", "http://images.craigslist.org/3od3p33leZZZZZZZZZ9656d614da8e3a51dd9.jpg", "http://images.craigslist.org/3pb3oa3leZZZZZZZZZ965eb60e4d2344019fb.jpg"],posting6.pics
|
266
266
|
assert_equal 'Coral Springs',posting6.location
|
267
|
-
assert_equal [0, 56, 18, 5, 6, 2009, 5, 156, true, "
|
267
|
+
assert_equal [0, 56, 18, 5, 6, 2009, 5, 156, true, "UTC"],posting6.post_time.to_a
|
268
268
|
assert_equal 1207457727,posting6.posting_id
|
269
269
|
assert_equal 1350.0,posting6.price
|
270
270
|
assert_equal "hous-ccpap-1207457727@craigslist.org",posting6.reply_to
|
@@ -283,7 +283,7 @@ EOD
|
|
283
283
|
assert_equal "$189999 / 3br - Nice 3 Bedroom/ 2 Bathroom House with Garage in Sunrise (Sunrise) (map)", brw_reb_1224008903.header_as_plain
|
284
284
|
assert_equal ["http://images.craigslist.org/3ma3o93laZZZZZZZZZ96g5ee7cc528f1818a8.jpg", "http://images.craigslist.org/3nf3m03oeZZZZZZZZZ96gb267b7db57d91f60.jpg", "http://images.craigslist.org/3m63oc3p1ZZZZZZZZZ96g521443416aea1cac.jpg", "http://images.craigslist.org/3nc3p53l5ZZZZZZZZZ96g8706fce2c0bb17e9.jpg"], brw_reb_1224008903.pics
|
285
285
|
assert_equal "Sunrise", brw_reb_1224008903.location
|
286
|
-
assert_equal [0, 43, 18, 16, 6, 2009, 2, 167, true, "
|
286
|
+
assert_equal [0, 43, 18, 16, 6, 2009, 2, 167, true, "UTC"], brw_reb_1224008903.post_time.to_a
|
287
287
|
assert_equal 1224008903, brw_reb_1224008903.posting_id
|
288
288
|
assert_equal 189999.0, brw_reb_1224008903.price
|
289
289
|
assert_equal "1971CJS@Bellsouth.net", brw_reb_1224008903.reply_to
|
@@ -305,8 +305,8 @@ EOD
|
|
305
305
|
assert_equal ["http://images.craigslist.org/3kf3o93laZZZZZZZZZ96fbc594a6ceb1f1025.jpg"], sfbay_art_1223614914.pics
|
306
306
|
assert_equal "Bombay Company Art Painting - $650", sfbay_art_1223614914.label
|
307
307
|
assert_equal 'saratoga', sfbay_art_1223614914.location
|
308
|
-
assert_equal
|
309
|
-
assert_equal [0, 38, 22, 15, 6, 2009, 1, 166, true, "
|
308
|
+
assert_equal Date.new(2009, 6, 15), sfbay_art_1223614914.post_date
|
309
|
+
assert_equal [0, 38, 22, 15, 6, 2009, 1, 166, true, "UTC"], sfbay_art_1223614914.post_time.to_a
|
310
310
|
assert_equal 1223614914, sfbay_art_1223614914.posting_id
|
311
311
|
assert_equal 650.0, sfbay_art_1223614914.price
|
312
312
|
assert_equal "sale-trzm8-1223614914@craigslist.org", sfbay_art_1223614914.reply_to
|
@@ -338,8 +338,8 @@ EOD
|
|
338
338
|
assert_equal "*****SOFTWARE****", posting_061710.label
|
339
339
|
assert_equal "Dade/Broward", posting_061710.location
|
340
340
|
assert_equal [], posting_061710.pics
|
341
|
-
assert_equal
|
342
|
-
assert_equal [0, 22, 13, 17, 6, 2010, 4, 168, true, "
|
341
|
+
assert_equal Date.new(2010, 6, 17), posting_061710.post_date
|
342
|
+
assert_equal [0, 22, 13, 17, 6, 2010, 4, 168, true, "UTC"], posting_061710.post_time.to_a
|
343
343
|
assert_equal 1796890756, posting_061710.posting_id
|
344
344
|
assert_equal nil, posting_061710.price
|
345
345
|
assert_equal nil, posting_061710.reply_to
|
@@ -363,8 +363,8 @@ EOD
|
|
363
363
|
assert_equal "*Software*AdobeCS5*RosettaStone*AutoCAD*Windows7*Office2010*&* More", posting1808219423.label
|
364
364
|
assert_equal "Dade/Broward", posting1808219423.location
|
365
365
|
assert_equal [], posting1808219423.pics
|
366
|
-
assert_equal
|
367
|
-
assert_equal [0, 35, 7, 24, 6, 2010, 4, 175, true, "
|
366
|
+
assert_equal Date.new(2010, 6, 24), posting1808219423.post_date
|
367
|
+
assert_equal [0, 35, 7, 24, 6, 2010, 4, 175, true, "UTC"], posting1808219423.post_time.to_a
|
368
368
|
assert_equal 1808219423, posting1808219423.posting_id
|
369
369
|
assert_equal nil, posting1808219423.price
|
370
370
|
assert_equal nil, posting1808219423.reply_to
|
@@ -393,8 +393,8 @@ EOD
|
|
393
393
|
assert_equal "2008 GMC Sierra 2500HD - $14800", posting_090610.label
|
394
394
|
assert_equal "boston", posting_090610.location
|
395
395
|
assert_equal [], posting_090610.pics
|
396
|
-
assert_equal
|
397
|
-
assert_equal [0, 29, 18, 5, 9, 2010, 0, 248, true, "
|
396
|
+
assert_equal Date.new(2010, 9, 5), posting_090610.post_date
|
397
|
+
assert_equal [0, 29, 18, 5, 9, 2010, 0, 248, true, "UTC"], posting_090610.post_time.to_a
|
398
398
|
assert_equal 1938291834, posting_090610.posting_id
|
399
399
|
assert_equal 14800.0, posting_090610.price
|
400
400
|
assert_equal nil, posting_090610.reply_to
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: olek-libcraigscrape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.0.
|
4
|
+
version: 1.1.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: htmlentities
|
@@ -271,7 +271,6 @@ files:
|
|
271
271
|
- spec/assets/posting_daytona_art_120512.html
|
272
272
|
- spec/assets/posting_mdc_cto_ftl_112612.html
|
273
273
|
- spec/assets/posting_mdc_reb_120612.html
|
274
|
-
- spec/assets/posting_page_not_found_120512.html
|
275
274
|
- spec/assets/posting_sya_121012-2.html
|
276
275
|
- spec/assets/posting_sya_121012.html
|
277
276
|
- spec/assets/this_post_has_expired_old.html
|
@@ -1,160 +0,0 @@
|
|
1
|
-
<!DOCTYPE html>
|
2
|
-
<html>
|
3
|
-
<head>
|
4
|
-
<title>Premier Bouquet Wrap</title>
|
5
|
-
<meta name="robots" content="NOARCHIVE,NOFOLLOW">
|
6
|
-
<meta name="viewport" content="user-scalable=1;">
|
7
|
-
<link type="text/css" rel="stylesheet" media="all" href="http://www.craigslist.org/styles/craigslist.css?v=9380f84aa49cda76299b65a80eaa7d6a">
|
8
|
-
|
9
|
-
<!--[if lt IE 9]>
|
10
|
-
<script type="text/javascript" src="http://www.craigslist.org/js/html5shiv.js?v=ed7af45dcbda983c8455631037ebcdda"></script>
|
11
|
-
<![endif]-->
|
12
|
-
</head>
|
13
|
-
|
14
|
-
<body class="posting">
|
15
|
-
|
16
|
-
<article class="container">
|
17
|
-
<header class="bchead">
|
18
|
-
<a id="ef" href="https://accounts.craigslist.org/eaf?postingID=3448282416&token=U2FsdGVkX184MzQzODM0MwIvhB_fTy4UswRiXezIFImfh9DCIm84sQtZ0KdXRyOJOGBoogMegaCtStglpUkrs-Fpix7tUinx">email this posting to a friend</a> <a href="http://daytona.craigslist.org/">daytona beach craigslist</a> > <a href="http://daytona.craigslist.org/sss/">for sale / wanted</a> > <a href="http://daytona.craigslist.org/art/">arts & crafts - by owner</a>
|
19
|
-
</header>
|
20
|
-
|
21
|
-
<section class="body">
|
22
|
-
<div id="flags">
|
23
|
-
<div id="flagMsg">
|
24
|
-
please flag with care:
|
25
|
-
<a href="http://www.craigslist.org/about/help/flags_and_community_moderation">[?]</a>
|
26
|
-
</div>
|
27
|
-
<div id="flagChooser">
|
28
|
-
<a class="fl" id="flag16" href="/flag/?flagCode=16&postingID=3448282416"
|
29
|
-
title="Wrong category, wrong site, discusses another post, or otherwise misplaced">
|
30
|
-
miscategorized</a>
|
31
|
-
<a class="fl" id="flag28" href="/flag/?flagCode=28&postingID=3448282416"
|
32
|
-
title="Violates craigslist Terms Of Use or other posted guidelines">
|
33
|
-
prohibited</a>
|
34
|
-
<a class="fl" id="flag15" href="/flag/?flagCode=15&postingID=3448282416"
|
35
|
-
title="Posted too frequently, in multiple cities/categories, or is too commercial">
|
36
|
-
spam/overpost</a>
|
37
|
-
<a class="fl" id="flag9" href="/flag/?flagCode=9&postingID=3448282416"
|
38
|
-
title="Should be considered for inclusion in the Best-Of-Craigslist">
|
39
|
-
best of craigslist</a>
|
40
|
-
</div>
|
41
|
-
</div>
|
42
|
-
|
43
|
-
<div class="tsb">
|
44
|
-
<em>Avoid scams and fraud by dealing locally!</em>
|
45
|
-
Beware any deal involving Western Union, Moneygram, wire transfer, cashier check, money order, shipping, escrow, or any promise of transaction protection/certification/guarantee
|
46
|
-
<a href="http://www.craigslist.org/about/scams">More info</a>
|
47
|
-
</div>
|
48
|
-
|
49
|
-
<h2 class="postingtitle">Premier Bouquet Wrap - $2 (PALM COAST)</h2>
|
50
|
-
|
51
|
-
<section class="dateReplyBar">
|
52
|
-
<hr>
|
53
|
-
<div class="postingdate">Date: <time>2012-12-01, 3:02PM EST</time></div>
|
54
|
-
|
55
|
-
<script type="text/javascript"><!--
|
56
|
-
var isPreview = "";
|
57
|
-
var postingTitle = "Premier Bouquet Wrap - $2 (PALM COAST)";
|
58
|
-
var bestOf = "";
|
59
|
-
var postingURL = "http%3A%2F%2Fdaytona.craigslist.org%2Fart%2F3448282416.html";
|
60
|
-
var displayEmail = "nqmhm-3448282416@sale.craigslist.org";
|
61
|
-
|
62
|
-
--></script>
|
63
|
-
|
64
|
-
|
65
|
-
<button id="reply_button">Reply to this post</button>
|
66
|
-
|
67
|
-
<span id="replytext">Reply to:</span> <small><a href="mailto:nqmhm-3448282416@sale.craigslist.org?subject=Premier%20Bouquet%20Wrap%20-%20%242%20(PALM%20COAST)&body=%0A%0Ahttp%3A%2F%2Fdaytona.craigslist.org%2Fart%2F3448282416.html%0A">nqmhm-3448282416@sale.craigslist.org</a></small><sup>[<a href="http://www.craigslist.org/about/help/replying_to_posts" target="_blank">?</a>]</sup>
|
68
|
-
|
69
|
-
<div id="returnemail"> </div>
|
70
|
-
|
71
|
-
|
72
|
-
<hr>
|
73
|
-
</section>
|
74
|
-
|
75
|
-
|
76
|
-
<section id="userbody">
|
77
|
-
THESE ARE USED IN FLOWER / CRAFT SHOPS . ALL ARE NEW, BOXED AND AND VERY WELL MADE. I HAVE A CASE OF THESE I WILL SELL FOR ONE PRICE, OR WILL SELL BY THE PIECE. CASE PRICE IS FOR ABOUT 144 PIECES $75.00. <br>
|
78
|
-
<br>
|
79
|
-
Premier Bouquet Wrap<br>
|
80
|
-
<br>
|
81
|
-
Flower Bridal Bouquet Wrap White/Satin <br>
|
82
|
-
<br>
|
83
|
-
New White Satin <br>
|
84
|
-
<br>
|
85
|
-
The wraps are approximately 6 1/2" Long <br>
|
86
|
-
<br>
|
87
|
-
The bridal bouquet wrap is a creative alternative to tying a ribbon around your flowers. Just slide the wrap around flower stems<br>
|
88
|
-
<br>
|
89
|
-
This wrap is perfect for covering/decorating the stems on "Wedding", Quincea�era" or "Prom" bouquets. They can also be used with "Wedding bouquet" holder handles. These wraps are made with quality Satin material, easy to install and feels soft and smooth on the Bride's or Bridesmaid's hands. These wraps put the finishing touches on any Floral Wedding Bouquet.<br>
|
90
|
-
<br>
|
91
|
-
PLEASE CALL . . .<br>
|
92
|
-
<br>
|
93
|
-
AJ-518-858-2002<br>
|
94
|
-
<br>
|
95
|
-
<br>
|
96
|
-
|
97
|
-
<script type="text/javascript">
|
98
|
-
<!--
|
99
|
-
imgList = ["http://images.craigslist.org/3I93pe3Hf5G75J55M2cc13e19b59314771029.jpg"];
|
100
|
-
// -->
|
101
|
-
</script>
|
102
|
-
<div class="iw">
|
103
|
-
<div id="ci">
|
104
|
-
<span><!-- --></span>
|
105
|
-
<img id="iwi" src="http://images.craigslist.org/3I93pe3Hf5G75J55M2cc13e19b59314771029.jpg" alt="">
|
106
|
-
</div>
|
107
|
-
<div id="iwt">
|
108
|
-
|
109
|
-
<div class="tn">
|
110
|
-
<a href="http://images.craigslist.org/3I93pe3Hf5G75J55M2cc13e19b59314771029.jpg" title="1">
|
111
|
-
<img src="http://images.craigslist.org/thumb/3I93pe3Hf5G75J55M2cc13e19b59314771029.jpg" alt="image 1">
|
112
|
-
</a>
|
113
|
-
</div>
|
114
|
-
|
115
|
-
</div>
|
116
|
-
</div>
|
117
|
-
|
118
|
-
<!-- START CLTAGS -->
|
119
|
-
<br>
|
120
|
-
<ul class="blurbs">
|
121
|
-
<li> <!-- CLTAG GeographicArea=PALM COAST -->Location: PALM COAST</li>
|
122
|
-
<li>it's NOT ok to contact this poster with services or other commercial interests</li></ul>
|
123
|
-
<!-- END CLTAGS -->
|
124
|
-
</section>
|
125
|
-
|
126
|
-
<p class="postingidtext">PostingID:3448282416</p>
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
<br class="c">
|
131
|
-
</section>
|
132
|
-
<footer>
|
133
|
-
<ul class="clfooter">
|
134
|
-
<li>Copyright © 2012 craigslist, inc.</li>
|
135
|
-
<li><a href="http://www.craigslist.org/about/terms.of.use">terms of use</a></li>
|
136
|
-
<li><a href="http://www.craigslist.org/about/privacy_policy">privacy policy</a></li>
|
137
|
-
<li><a href="/forums/?forumID=8">feedback forum</a></li>
|
138
|
-
</ul>
|
139
|
-
</footer>
|
140
|
-
|
141
|
-
</article>
|
142
|
-
|
143
|
-
|
144
|
-
<script type="text/javascript"><!--
|
145
|
-
var pagetype = "posting";
|
146
|
-
var pID = "3448282416";
|
147
|
-
var wwwurl = "http://www.craigslist.org";
|
148
|
-
|
149
|
-
--></script>
|
150
|
-
|
151
|
-
<script type="text/javascript" src="http://www.craigslist.org/js/jquery-1.7.2.js?v=89700834f1601ac3ebc3e5fb3302c040"></script>
|
152
|
-
<script type="text/javascript" src="http://www.craigslist.org/js/postings.js?v=d32f9ab28089cba9f207317b8c264ebb"></script>
|
153
|
-
<script type="text/javascript" src="http://www.craigslist.org/js/formats.js?v=3e34df20530f6579488bbac70a1e2e1d"></script>
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
</body>
|
159
|
-
</html>
|
160
|
-
|