olek-libcraigscrape 1.1.0.4 → 1.1.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/bin/craigwatch CHANGED
@@ -171,6 +171,7 @@ require 'action_mailer'
171
171
  require 'kwalify/util/hashlike'
172
172
  require 'libcraigscrape'
173
173
  require "socket"
174
+ require 'active_support/all'
174
175
 
175
176
  class String #:nodoc:
176
177
  RE = /^\/(.*)\/([ixm]*)$/
@@ -252,9 +253,9 @@ class CraigReportDefinition #:nodoc:
252
253
  def price_required?; @price_required; end
253
254
 
254
255
  def starting_at
255
- (@starting) ?
256
- Time.strptime(@starting, "%m/%d/%Y") :
257
- Time.now.yesterday.beginning_of_day
256
+ ((@starting) ?
257
+ Time.strptime([@starting, 'UTC'].join(' '), "%m/%d/%Y %Z") :
258
+ Time.zone.now.yesterday).to_date
258
259
  end
259
260
 
260
261
  def passes_filter?(post)
@@ -339,7 +340,6 @@ class TrackedListing < ActiveRecord::Base #:nodoc:
339
340
  end
340
341
 
341
342
  def delete_posts_older_than(cutoff_date)
342
- # TODO: can't I use posts.delete 'created_at < ?' and keep it cleaner?
343
343
  TrackedPost.delete_all [ 'tracked_listing_id = ? AND created_at < ?', self.id, cutoff_date ]
344
344
  end
345
345
  end
@@ -360,12 +360,10 @@ class TrackedPost < ActiveRecord::Base #:nodoc:
360
360
  end
361
361
 
362
362
  class ReportMailer < ActionMailer::Base #:nodoc:
363
- # default :template_path => File.dirname(__FILE__)
364
-
365
363
  def report(to, sender, subject_template, report_tmpl)
366
- subject = Time.now.strftime subject_template
367
364
  @summaries = report_tmpl[:summaries]
368
- mail :to => to, :subject => subject, :from => sender
365
+
366
+ mail :to => to, :subject => Time.zone.now.strftime(subject_template), :from => sender
369
367
  end
370
368
  end
371
369
 
@@ -472,11 +470,11 @@ report_summaries = craig_report.searches.collect do |search|
472
470
  already_tracked_urls = tracked_listing.posts.collect{|tp| tp.url}
473
471
 
474
472
  # We'll use this in the loop to decide what posts to track:
475
- newest_post_date = last_tracked_at
473
+ newest_post_date = last_tracked_at.to_date
476
474
 
477
475
  # We keep track of post.post_date here, b/c in some circumstances, you can be in the below loop
478
476
  # but have no post.post_date since the posting was removed and it parsed to nil
479
- most_recent_posting_date = Time.now
477
+ most_recent_posting_date = Time.zone.now.to_date
480
478
 
481
479
  # OK - Now let's go!
482
480
  catch :list_break do
@@ -487,7 +485,7 @@ report_summaries = craig_report.searches.collect do |search|
487
485
 
488
486
  # Are we at a point in the scrape, past which we don't need to proceed?
489
487
  throw :list_break if (
490
- most_recent_posting_date < last_tracked_at or
488
+ most_recent_posting_date.to_time < last_tracked_at or
491
489
  already_tracked_urls.include? post.url
492
490
  )
493
491
 
@@ -496,7 +494,7 @@ report_summaries = craig_report.searches.collect do |search|
496
494
  !new_summaries.has_key? post.url and
497
495
  search.passes_filter? post
498
496
  )
499
- rescue CraigScrape::Scraper::ResourceNotFoundError,CraigScrape::Scraper::MaxRedirectError => e
497
+ rescue CraigScrape::Scraper::ResourceNotFoundError => e
500
498
  # Sometimes we do end up with 404's that will never load, and we dont want to
501
499
  # abort a run simply b/c we found some anomaly due to the craigslist index.
502
500
  # being out of date. This ResourceNotFoundError can occur due to
@@ -7,10 +7,13 @@ require 'time'
7
7
  require 'uri'
8
8
  require 'htmlentities'
9
9
  require 'active_support/core_ext/class/attribute_accessors'
10
+ require 'active_support/core_ext/time/calculations'
10
11
  require 'htmlentities'
11
12
  require 'nokogiri'
12
13
  require 'typhoeus'
13
14
 
15
+ Time.zone = 'UTC'
16
+
14
17
  # A base class encapsulating the various libcraigscrape objects, and providing most of the
15
18
  # craigslist interaction methods. Currently, we're supporting the old Class methods
16
19
  # in a legacy-compatibility mode, but these methods are marked for deprecation. Instead,
@@ -101,7 +104,7 @@ class CraigScrape
101
104
  ret = []
102
105
  fragments.each do |frag|
103
106
  each_post(frag) do |p|
104
- break if p.post_date <= newer_then
107
+ break if p.post_time <= newer_then
105
108
  ret << p
106
109
  end
107
110
  end
@@ -196,11 +199,11 @@ class CraigScrape
196
199
 
197
200
  # Returns the most recentlt expired time for the provided month and day
198
201
  def self.most_recently_expired_time(month, day) #:nodoc:
199
- now = (time_now) ? time_now : Time.now
202
+ now = (time_now) ? time_now : Time.zone.now
200
203
 
201
204
  # This ensures we always generate a time in the past, by guessing the year and subtracting one if we guessed wrong
202
- ret = Time.local now.year, month, day
203
- ret = Time.local now.year-1, month, day if ret > now
205
+ ret = Time.zone.local now.year, month, day
206
+ ret = Time.zone.local now.year-1, month, day if ret > now
204
207
 
205
208
  ret
206
209
  end
data/lib/posting.rb CHANGED
@@ -17,7 +17,12 @@ class CraigScrape::Posting < CraigScrape::Scraper
17
17
  HEADER_LOCATION = /^.+[ ]*\-[ ]*[\$]?[\d]+[ ]*\((.+)\)$/
18
18
  POSTING_ID = /PostingID\:[ ]*([\d]+)/
19
19
  REPLY_TO = /(.+)/
20
+ <<<<<<< HEAD
20
21
  PRICE = /((?:^\$[\d]+(?:\.[\d]{2})?)|(?:\$[\d]+(?:\.[\d]{2})?$))/
22
+
23
+ =======
24
+ PRICE = /((?:^\$[\d]+(?:\.[\d]{2})?)|(?:\$[\d]+(?:\.[\d]{2})?))/
25
+ >>>>>>> bded128... Removed end of line ($) in price regex, and switched price method to use header. Fixes Real Estate listings.
21
26
  # NOTE: we implement the (?:) to first check the 'old' style format, and then the 'new style'
22
27
  # (As of 12/03's parse changes)
23
28
  USERBODY_PARTS = /^(.+)\<div id\=\"userbody\">(.+)\<br[ ]*[\/]?\>\<br[ ]*[\/]?\>(.+)\<\/div\>(.+)$/m
@@ -105,7 +110,7 @@ class CraigScrape::Posting < CraigScrape::Scraper
105
110
  unless @post_time
106
111
  cursor = html_head.at 'hr' if html_head
107
112
  cursor = cursor.next until cursor.nil? or POST_DATE.match cursor.to_s
108
- @post_time = Time.parse $1 if $1
113
+ @post_time = Time.zone.parse $1 if $1
109
114
  end
110
115
 
111
116
  @post_time
@@ -239,7 +244,7 @@ class CraigScrape::Posting < CraigScrape::Scraper
239
244
  # Reflects only the date portion of the posting. Does not include hours/minutes. This is useful when reflecting the listing scrapes, and can be safely
240
245
  # used if you wish conserve bandwidth by not pulling an entire post from a listing scrape.
241
246
  def post_date
242
- @post_date = Time.local(*[0]*3+post_time.to_a[3...10]) unless @post_date or post_time.nil?
247
+ @post_date = post_time.to_date unless @post_date or post_time.nil?
243
248
 
244
249
  @post_date
245
250
  end
@@ -297,7 +302,7 @@ class CraigScrape::Posting < CraigScrape::Scraper
297
302
  # Returns the best-guess of a price, judging by the label's contents. Price is available when pulled from the listing summary
298
303
  # and can be safely used if you wish conserve bandwidth by not pulling an entire post from a listing scrape.
299
304
  def price
300
- $1.tr('$','').to_f if label and PRICE.match label
305
+ $1.tr('$','').to_f if header and PRICE.match header
301
306
  end
302
307
 
303
308
  # Returns the post contents with all html tags removed
data/lib/scraper.rb CHANGED
@@ -15,14 +15,6 @@
15
15
  #
16
16
  # <b>logger</b> - a Logger object to debug http notices too. Defaults to nil
17
17
  #
18
- # <b>retries_on_fetch_fail</b> - The number of times to retry a failed uri download. Defaults to 8
19
- #
20
- # <b>sleep_between_fetch_retries</b> - The amount of seconds to sleep, between successive attempts in the case of a failed download. Defaults to 30.
21
- #
22
- # <b>retries_on_404_fail</b> - The number of times to retry a Resource Not Found error (http Response code 404). Defaults to 3.
23
- #
24
- # <b>sleep_between_404_retries</b> - The amount of seconds to sleep, between successive attempts in the case of a Resource Not Found error. Defaults to 3.
25
- #
26
18
 
27
19
  class CraigScrape::Scraper
28
20
  cattr_accessor :logger
@@ -50,6 +42,9 @@ class CraigScrape::Scraper
50
42
 
51
43
  class FetchError < StandardError #:nodoc:
52
44
  end
45
+
46
+ class ResourceNotFoundError < StandardError #:nodoc:
47
+ end
53
48
 
54
49
  # Scraper Objects can be created from either a full URL (string), or a Hash.
55
50
  # Currently, this initializer isn't intended to be called from libcraigslist API users, though
@@ -5,16 +5,18 @@ describe CraigScrape::Listings do
5
5
  context "listing_cta_ftl_112612.html" do
6
6
  subject { described_class.new( uri_for('listing_cta_ftl_112612.html') ) }
7
7
  specify{ subject.posts.should have(100).items }
8
- specify{ subject.posts.collect(&:post_date).uniq.should eq([Time.parse('2012-11-26 00:00:00 -0500')]) }
8
+ specify{ subject.posts.collect(&:post_date).uniq.should eq([Time.zone.parse('2012-11-26 00:00:00')]) }
9
9
  specify{ subject.next_page_href.should eq('index100.html') }
10
+
11
+
10
12
  end
11
13
 
12
14
  context 'listing_search_ppa_nyc_121212.html' do
13
15
  subject { described_class.new( uri_for('listing_search_ppa_nyc_121212.html') ) }
14
16
 
15
17
  specify{ subject.posts.should have(100).items }
16
- specify{ subject.posts.collect(&:post_date).uniq.should eq(['2012-12-12 00:00:00 -0500',
17
- '2012-12-11 00:00:00 -0500', '2012-12-10 00:00:00 -0500'].collect{|t| Time.parse(t) }) }
18
+ specify{ subject.posts.collect(&:post_date).uniq.should eq(['2012-12-12 00:00:00',
19
+ '2012-12-11 00:00:00', '2012-12-10 00:00:00'].collect{|t| Time.zone.parse(t) }) }
18
20
  specify{ subject.next_page_href.should eq('http://newyork.craigslist.org/search/ppa?query=kenmore&srchType=A&s=100') }
19
21
  end
20
22
  end
@@ -8,12 +8,6 @@ describe CraigScrape::Posting do
8
8
  its(:posting_has_expired?){ should be_true }
9
9
  end
10
10
 
11
- context "posting_page_not_found_120512.html" do
12
- subject{ described_class.new uri_for('posting_page_not_found_120512.html') }
13
-
14
- its(:system_post?){ should be_true }
15
- end
16
-
17
11
  context "posting_sya_121012.html" do
18
12
  # This example was picked since it has pics
19
13
  subject{ described_class.new uri_for('posting_sya_121012.html') }
@@ -146,7 +146,7 @@ EOD
146
146
  assert_equal "NMB", posting0.location
147
147
  assert_equal 1131363612, posting0.posting_id
148
148
  assert_equal "sale-ktf9w-1131363612@craigslist.org", posting0.reply_to
149
- assert_equal [0, 21, 13, 20, 4, 2009, 1, 110, true, "EDT"], posting0.post_time.to_a
149
+ assert_equal [0, 21, 13, 20, 4, 2009, 1, 110, true, "UTC"], posting0.post_time.to_a
150
150
  assert_equal [], posting0.pics
151
151
  assert_equal "Has storage for videos/dvds. About 2 ft high by 21/2 ft widw. Almond/light beige color",posting0.contents_as_plain
152
152
  assert_equal 35.0, posting0.price
@@ -162,7 +162,7 @@ EOD
162
162
  assert_equal '1000 NE 14th Pl', posting1.location
163
163
  assert_equal 1131242195, posting1.posting_id
164
164
  assert_equal "hous-5nzhq-1131242195@craigslist.org", posting1.reply_to
165
- assert_equal [0, 33, 13, 20, 4, 2009, 1, 110, true, "EDT"], posting1.post_time.to_a
165
+ assert_equal [0, 33, 13, 20, 4, 2009, 1, 110, true, "UTC"], posting1.post_time.to_a
166
166
  assert_equal %w(http://images.craigslist.org/3n83o33l5ZZZZZZZZZ94k913ac1582d4b1fa4.jpg http://images.craigslist.org/3n93p63obZZZZZZZZZ94k19d5e32eb3b610c2.jpg http://images.craigslist.org/3n93m03l6ZZZZZZZZZ94k6e9785e37a1b1f3f.jpg http://images.craigslist.org/3ma3oc3l4ZZZZZZZZZ94kbfecbcd2fb2e19cc.jpg), posting1.pics
167
167
  assert_equal "Residential income property\u0097Investors this property is for you! This duplex has a 2bedroom/1bath unit on each side. It features updated kitchens and baths (new tubs, toilet, sink, vanities), ceramic tile flooring throughout, separate water and electric meters and on site laundry facilities. It is also closed to the Galleria, beaches and downtown Fort Lauderdale! \r\n\r\nJe parle le Français\r\n\r\nThis property is being offered by Blaunch Perrier, Broker Associate, Atlantic Properties International. Blaunch can be reached at 954-593-0077. For additional property information you may also visit www.garylanham.com\r\n\r", posting1.contents_as_plain
168
168
  assert_equal 189900.0, posting1.price
@@ -179,7 +179,7 @@ EOD
179
179
  assert_equal 'Fort Lauderdale', posting2.location
180
180
  assert_equal 1127037648, posting2.posting_id
181
181
  assert_equal nil, posting2.reply_to
182
- assert_equal [0, 16, 14, 17, 4, 2009, 5, 107, true, "EDT"], posting2.post_time.to_a
182
+ assert_equal [0, 16, 14, 17, 4, 2009, 5, 107, true, "UTC"], posting2.post_time.to_a
183
183
  assert_equal [], posting2.pics
184
184
  assert_equal "\302\240 Sheehan Buick Pontiac GMC \302\240 Pompano Beach, FL(754) 224-3257 \302\240PRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!2002 Chevrolet Corvette Z06 Florida Driven AutoCheck Certified 5.7L V8 6sp2 Door Coupe.\302\240Price: \302\240 $23,975Exterior:Electron Blue MetallicInterior:BlackStock#:P5110AVIN:1G1YY12S625129021FREE AutoCheck Vehicle ReportMileage:63,560Transmission:6 Speed ManualEngine:V8 5.7L OHVWarranty:Limited WarrantyTitle:Clear\302\273\302\240View All 58 Photos\302\273\302\240View Full Vehicle Details\302\273\302\240Ask the Seller a Question\302\273\302\240E-mail this to a Friend\302\240 DescriptionPRESENTING A ELECTRON BLUE METALLIC 2002 CHEVROLET CORVETTE Z06 6 SPEED FLORIDA DRIVEN SMOKIN' SPORTS CAR!\r\n\r\nLOADED WITH BLACK LEATHER BUCKET SEATS, POWER DRIVERS SEAT, DUAL ZONE CLIMATE CONTROL, 4 WHEEL ABS BRAKES, POWER STEERING AND BRAKES, REAR LIMITED SLIP DIFFERENTIAL, STABILITY CONTROL, CRUISE CONTROL, TLT STEERING WHEEL, POWER WINDOWS AND LOCKS, AUTOMATIC ON/OFF HEADLAMPS, FOG LIGHTS, DUAL AIR BAG SAFETY, AM/FM STEREO CD PLAYER, INTERMITTENT WINDSHIELD WIPERS AND SO MUCH MORE - THIS CAR IS TOTALLY HOT WITH GREAT LOW MILES!\r\n\r\nPlease call us to make your deal now at 1-888-453-5244. Please visit our Website at www.sheehanautoplex.com ***View 50+ Pictures of this vehicle - a complete description including standard features and all added options & a FREE AUTO CHECK REPORT at www.sheehanautoplex.com. ***Financing for Everyone - Good credit - bad credit - divorce - charge off's - NO PROBLEM. To complete a secure credit application, please visit our website at www.sheehanautoplex.com ***The largest Dealer in the State of Florida - We export all over the world - For details please visit www.sheehanautoplex.com ***Sheehan Autoplex takes great pride in our outstanding customer service and has been recognized by the following associations - BBB (Better Business Bureau) - NIADA - and the FIADA. Call us to get your best deal. CALL NOW. 1-888-453-5244\302\240 Contact Sheehan Buick Pontiac GMCPhone:(754) 224-3257Fax:(954) 781-9050Phone:(754) 224-3257E-mail:sales@proauto.comBusiness HoursWeekdays:9:00 AM to 9:00 PMSat:9:00 AM to 6:00 PMSun:",posting2.contents_as_plain
185
185
  assert_equal 23975.0, posting2.price
@@ -195,7 +195,7 @@ EOD
195
195
  assert_equal "N.Miami/ Hialeah", posting3.location
196
196
  assert_equal 1130212403, posting3.posting_id
197
197
  assert_equal "sale-c9bpa-1130212403@craigslist.org", posting3.reply_to
198
- assert_equal [0, 21, 18, 19, 4, 2009, 0, 109, true, "EDT"], posting3.post_time.to_a
198
+ assert_equal [0, 21, 18, 19, 4, 2009, 0, 109, true, "UTC"], posting3.post_time.to_a
199
199
  assert_equal %w(http://images.craigslist.org/3n23kf3lfZZZZZZZZZ94j1160e7d7b0601934.jpg http://images.craigslist.org/3nc3kf3p2ZZZZZZZZZ94j04fbc71e0a551ace.jpg http://images.craigslist.org/3nc3k33l7ZZZZZZZZZ94k13d8d7b1024e1e0e.jpg http://images.craigslist.org/3n23k63mfZZZZZZZZZ94k7838ae5d48d91eb8.jpg), posting3.pics
200
200
  assert_equal "1992 Twin Turbo 300ZX. This car is pearl white outside and Camel leather interior with suede accents. Motor was re-done from the ground up two years ago. 23,000 on new motor rebuild! New Leather seats and center arm rest done also two years ago. Has Alpine Am/Fm Cd with Ipod cable, Viper pager alarm New! JL Audio Amp & JLAudio sub box custom made. Mtx mids& highs component speakers sparate tweeter. Car runs strong & straight. Just detailed the interior. Exterior should be painted. This car once painted will sell for over $10,000. \r\nCome get a great deal now! offers and trades will be considered. 786-303-6550 Manny",posting3.contents_as_plain
201
201
  assert_equal 5800.0, posting3.price
@@ -213,7 +213,7 @@ EOD
213
213
  assert_equal nil, posting4.location
214
214
  assert_equal 1139303170, posting4.posting_id
215
215
  assert_equal "hous-sk9f2-1139303170@craigslist.org", posting4.reply_to
216
- assert_equal [0, 8, 9, 25, 4, 2009, 6, 115, true, "EDT"], posting4.post_time.to_a
216
+ assert_equal [0, 8, 9, 25, 4, 2009, 6, 115, true, "UTC"], posting4.post_time.to_a
217
217
  assert_equal [], posting4.pics
218
218
  assert_equal 6321,posting4.contents_as_plain.length
219
219
  assert_equal 225000.0, posting4.price
@@ -264,7 +264,7 @@ EOD
264
264
  assert_equal "$1350 / 3br - 2bth for no deposit req",posting6.label
265
265
  assert_equal ["http://images.craigslist.org/3k43pe3o8ZZZZZZZZZ9655022102a3ea51624.jpg", "http://images.craigslist.org/3n13m53p6ZZZZZZZZZ96596515e51237a179c.jpg", "http://images.craigslist.org/3od3p33leZZZZZZZZZ9656d614da8e3a51dd9.jpg", "http://images.craigslist.org/3pb3oa3leZZZZZZZZZ965eb60e4d2344019fb.jpg"],posting6.pics
266
266
  assert_equal 'Coral Springs',posting6.location
267
- assert_equal [0, 56, 18, 5, 6, 2009, 5, 156, true, "EDT"],posting6.post_time.to_a
267
+ assert_equal [0, 56, 18, 5, 6, 2009, 5, 156, true, "UTC"],posting6.post_time.to_a
268
268
  assert_equal 1207457727,posting6.posting_id
269
269
  assert_equal 1350.0,posting6.price
270
270
  assert_equal "hous-ccpap-1207457727@craigslist.org",posting6.reply_to
@@ -283,7 +283,7 @@ EOD
283
283
  assert_equal "$189999 / 3br - Nice 3 Bedroom/ 2 Bathroom House with Garage in Sunrise (Sunrise) (map)", brw_reb_1224008903.header_as_plain
284
284
  assert_equal ["http://images.craigslist.org/3ma3o93laZZZZZZZZZ96g5ee7cc528f1818a8.jpg", "http://images.craigslist.org/3nf3m03oeZZZZZZZZZ96gb267b7db57d91f60.jpg", "http://images.craigslist.org/3m63oc3p1ZZZZZZZZZ96g521443416aea1cac.jpg", "http://images.craigslist.org/3nc3p53l5ZZZZZZZZZ96g8706fce2c0bb17e9.jpg"], brw_reb_1224008903.pics
285
285
  assert_equal "Sunrise", brw_reb_1224008903.location
286
- assert_equal [0, 43, 18, 16, 6, 2009, 2, 167, true, "EDT"], brw_reb_1224008903.post_time.to_a
286
+ assert_equal [0, 43, 18, 16, 6, 2009, 2, 167, true, "UTC"], brw_reb_1224008903.post_time.to_a
287
287
  assert_equal 1224008903, brw_reb_1224008903.posting_id
288
288
  assert_equal 189999.0, brw_reb_1224008903.price
289
289
  assert_equal "1971CJS@Bellsouth.net", brw_reb_1224008903.reply_to
@@ -305,8 +305,8 @@ EOD
305
305
  assert_equal ["http://images.craigslist.org/3kf3o93laZZZZZZZZZ96fbc594a6ceb1f1025.jpg"], sfbay_art_1223614914.pics
306
306
  assert_equal "Bombay Company Art Painting - $650", sfbay_art_1223614914.label
307
307
  assert_equal 'saratoga', sfbay_art_1223614914.location
308
- assert_equal [0, 0, 0, 15, 6, 2009, 1, 166, true, "EDT"], sfbay_art_1223614914.post_date.to_a
309
- assert_equal [0, 38, 22, 15, 6, 2009, 1, 166, true, "EDT"], sfbay_art_1223614914.post_time.to_a
308
+ assert_equal Date.new(2009, 6, 15), sfbay_art_1223614914.post_date
309
+ assert_equal [0, 38, 22, 15, 6, 2009, 1, 166, true, "UTC"], sfbay_art_1223614914.post_time.to_a
310
310
  assert_equal 1223614914, sfbay_art_1223614914.posting_id
311
311
  assert_equal 650.0, sfbay_art_1223614914.price
312
312
  assert_equal "sale-trzm8-1223614914@craigslist.org", sfbay_art_1223614914.reply_to
@@ -338,8 +338,8 @@ EOD
338
338
  assert_equal "*****SOFTWARE****", posting_061710.label
339
339
  assert_equal "Dade/Broward", posting_061710.location
340
340
  assert_equal [], posting_061710.pics
341
- assert_equal [0, 0, 0, 17, 6, 2010, 4, 168, true, "EDT"], posting_061710.post_date.to_a
342
- assert_equal [0, 22, 13, 17, 6, 2010, 4, 168, true, "EDT"], posting_061710.post_time.to_a
341
+ assert_equal Date.new(2010, 6, 17), posting_061710.post_date
342
+ assert_equal [0, 22, 13, 17, 6, 2010, 4, 168, true, "UTC"], posting_061710.post_time.to_a
343
343
  assert_equal 1796890756, posting_061710.posting_id
344
344
  assert_equal nil, posting_061710.price
345
345
  assert_equal nil, posting_061710.reply_to
@@ -363,8 +363,8 @@ EOD
363
363
  assert_equal "*Software*AdobeCS5*RosettaStone*AutoCAD*Windows7*Office2010*&* More", posting1808219423.label
364
364
  assert_equal "Dade/Broward", posting1808219423.location
365
365
  assert_equal [], posting1808219423.pics
366
- assert_equal [0, 0, 0, 24, 6, 2010, 4, 175, true, "EDT"], posting1808219423.post_date.to_a
367
- assert_equal [0, 35, 7, 24, 6, 2010, 4, 175, true, "EDT"], posting1808219423.post_time.to_a
366
+ assert_equal Date.new(2010, 6, 24), posting1808219423.post_date
367
+ assert_equal [0, 35, 7, 24, 6, 2010, 4, 175, true, "UTC"], posting1808219423.post_time.to_a
368
368
  assert_equal 1808219423, posting1808219423.posting_id
369
369
  assert_equal nil, posting1808219423.price
370
370
  assert_equal nil, posting1808219423.reply_to
@@ -393,8 +393,8 @@ EOD
393
393
  assert_equal "2008 GMC Sierra 2500HD - $14800", posting_090610.label
394
394
  assert_equal "boston", posting_090610.location
395
395
  assert_equal [], posting_090610.pics
396
- assert_equal [0, 0, 0, 5, 9, 2010, 0, 248, true, "EDT"], posting_090610.post_date.to_a
397
- assert_equal [0, 29, 18, 5, 9, 2010, 0, 248, true, "EDT"], posting_090610.post_time.to_a
396
+ assert_equal Date.new(2010, 9, 5), posting_090610.post_date
397
+ assert_equal [0, 29, 18, 5, 9, 2010, 0, 248, true, "UTC"], posting_090610.post_time.to_a
398
398
  assert_equal 1938291834, posting_090610.posting_id
399
399
  assert_equal 14800.0, posting_090610.price
400
400
  assert_equal nil, posting_090610.reply_to
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: olek-libcraigscrape
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0.4
4
+ version: 1.1.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-17 00:00:00.000000000 Z
12
+ date: 2012-12-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: htmlentities
@@ -271,7 +271,6 @@ files:
271
271
  - spec/assets/posting_daytona_art_120512.html
272
272
  - spec/assets/posting_mdc_cto_ftl_112612.html
273
273
  - spec/assets/posting_mdc_reb_120612.html
274
- - spec/assets/posting_page_not_found_120512.html
275
274
  - spec/assets/posting_sya_121012-2.html
276
275
  - spec/assets/posting_sya_121012.html
277
276
  - spec/assets/this_post_has_expired_old.html
@@ -1,160 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <title>Premier Bouquet Wrap</title>
5
- <meta name="robots" content="NOARCHIVE,NOFOLLOW">
6
- <meta name="viewport" content="user-scalable=1;">
7
- <link type="text/css" rel="stylesheet" media="all" href="http://www.craigslist.org/styles/craigslist.css?v=9380f84aa49cda76299b65a80eaa7d6a">
8
-
9
- <!--[if lt IE 9]>
10
- <script type="text/javascript" src="http://www.craigslist.org/js/html5shiv.js?v=ed7af45dcbda983c8455631037ebcdda"></script>
11
- <![endif]-->
12
- </head>
13
-
14
- <body class="posting">
15
-
16
- <article class="container">
17
- <header class="bchead">
18
- <a id="ef" href="https://accounts.craigslist.org/eaf?postingID=3448282416&amp;token=U2FsdGVkX184MzQzODM0MwIvhB_fTy4UswRiXezIFImfh9DCIm84sQtZ0KdXRyOJOGBoogMegaCtStglpUkrs-Fpix7tUinx">email this posting to a friend</a> <a href="http://daytona.craigslist.org/">daytona beach craigslist</a> &gt; <a href="http://daytona.craigslist.org/sss/">for sale / wanted</a> &gt; <a href="http://daytona.craigslist.org/art/">arts &amp; crafts - by owner</a>
19
- </header>
20
-
21
- <section class="body">
22
- <div id="flags">
23
- <div id="flagMsg">
24
- please flag with care:
25
- <a href="http://www.craigslist.org/about/help/flags_and_community_moderation">[?]</a>
26
- </div>
27
- <div id="flagChooser">
28
- <a class="fl" id="flag16" href="/flag/?flagCode=16&amp;postingID=3448282416"
29
- title="Wrong category, wrong site, discusses another post, or otherwise misplaced">
30
- miscategorized</a>
31
- <a class="fl" id="flag28" href="/flag/?flagCode=28&amp;postingID=3448282416"
32
- title="Violates craigslist Terms Of Use or other posted guidelines">
33
- prohibited</a>
34
- <a class="fl" id="flag15" href="/flag/?flagCode=15&amp;postingID=3448282416"
35
- title="Posted too frequently, in multiple cities/categories, or is too commercial">
36
- spam/overpost</a>
37
- <a class="fl" id="flag9" href="/flag/?flagCode=9&amp;postingID=3448282416"
38
- title="Should be considered for inclusion in the Best-Of-Craigslist">
39
- best of craigslist</a>
40
- </div>
41
- </div>
42
-
43
- <div class="tsb">
44
- <em>Avoid scams and fraud by dealing locally!</em>
45
- Beware any deal involving Western Union, Moneygram, wire transfer, cashier check, money order, shipping, escrow, or any promise of transaction protection/certification/guarantee
46
- <a href="http://www.craigslist.org/about/scams">More info</a>
47
- </div>
48
-
49
- <h2 class="postingtitle">Premier Bouquet Wrap - $2 (PALM COAST)</h2>
50
-
51
- <section class="dateReplyBar">
52
- <hr>
53
- <div class="postingdate">Date: <time>2012-12-01, 3:02PM EST</time></div>
54
-
55
- <script type="text/javascript"><!--
56
- var isPreview = "";
57
- var postingTitle = "Premier Bouquet Wrap - $2 (PALM COAST)";
58
- var bestOf = "";
59
- var postingURL = "http%3A%2F%2Fdaytona.craigslist.org%2Fart%2F3448282416.html";
60
- var displayEmail = "nqmhm-3448282416@sale.craigslist.org";
61
-
62
- --></script>
63
-
64
-
65
- <button id="reply_button">Reply to this post</button>
66
-
67
- <span id="replytext">Reply to:</span> <small><a href="mailto:nqmhm-3448282416@sale.craigslist.org?subject=Premier%20Bouquet%20Wrap%20-%20%242%20(PALM%20COAST)&amp;body=%0A%0Ahttp%3A%2F%2Fdaytona.craigslist.org%2Fart%2F3448282416.html%0A">nqmhm-3448282416@sale.craigslist.org</a></small><sup>[<a href="http://www.craigslist.org/about/help/replying_to_posts" target="_blank">?</a>]</sup>
68
-
69
- <div id="returnemail"> </div>
70
-
71
-
72
- <hr>
73
- </section>
74
-
75
-
76
- <section id="userbody">
77
- THESE ARE USED IN FLOWER / CRAFT SHOPS . ALL ARE NEW, BOXED AND AND VERY WELL MADE. I HAVE A CASE OF THESE I WILL SELL FOR ONE PRICE, OR WILL SELL BY THE PIECE. CASE PRICE IS FOR ABOUT 144 PIECES $75.00. <br>
78
- <br>
79
- Premier Bouquet Wrap<br>
80
- <br>
81
- Flower Bridal Bouquet Wrap White/Satin <br>
82
- <br>
83
- New White Satin <br>
84
- <br>
85
- The wraps are approximately 6 1/2" Long <br>
86
- <br>
87
- The bridal bouquet wrap is a creative alternative to tying a ribbon around your flowers. Just slide the wrap around flower stems<br>
88
- <br>
89
- This wrap is perfect for covering/decorating the stems on "Wedding", Quincea�era" or "Prom" bouquets. They can also be used with "Wedding bouquet" holder handles. These wraps are made with quality Satin material, easy to install and feels soft and smooth on the Bride's or Bridesmaid's hands. These wraps put the finishing touches on any Floral Wedding Bouquet.<br>
90
- <br>
91
- PLEASE CALL . . .<br>
92
- <br>
93
- AJ-518-858-2002<br>
94
- <br>
95
- <br>
96
-
97
- <script type="text/javascript">
98
- <!--
99
- imgList = ["http://images.craigslist.org/3I93pe3Hf5G75J55M2cc13e19b59314771029.jpg"];
100
- // -->
101
- </script>
102
- <div class="iw">
103
- <div id="ci">
104
- <span><!-- --></span>
105
- <img id="iwi" src="http://images.craigslist.org/3I93pe3Hf5G75J55M2cc13e19b59314771029.jpg" alt="">
106
- </div>
107
- <div id="iwt">
108
-
109
- <div class="tn">
110
- <a href="http://images.craigslist.org/3I93pe3Hf5G75J55M2cc13e19b59314771029.jpg" title="1">
111
- <img src="http://images.craigslist.org/thumb/3I93pe3Hf5G75J55M2cc13e19b59314771029.jpg" alt="image 1">
112
- </a>
113
- </div>
114
-
115
- </div>
116
- </div>
117
-
118
- <!-- START CLTAGS -->
119
- <br>
120
- <ul class="blurbs">
121
- <li> <!-- CLTAG GeographicArea=PALM COAST -->Location: PALM COAST</li>
122
- <li>it's NOT ok to contact this poster with services or other commercial interests</li></ul>
123
- <!-- END CLTAGS -->
124
- </section>
125
-
126
- <p class="postingidtext">PostingID:3448282416</p>
127
-
128
-
129
-
130
- <br class="c">
131
- </section>
132
- <footer>
133
- <ul class="clfooter">
134
- <li>Copyright &copy; 2012 craigslist, inc.</li>
135
- <li><a href="http://www.craigslist.org/about/terms.of.use">terms of use</a></li>
136
- <li><a href="http://www.craigslist.org/about/privacy_policy">privacy policy</a></li>
137
- <li><a href="/forums/?forumID=8">feedback forum</a></li>
138
- </ul>
139
- </footer>
140
-
141
- </article>
142
-
143
-
144
- <script type="text/javascript"><!--
145
- var pagetype = "posting";
146
- var pID = "3448282416";
147
- var wwwurl = "http://www.craigslist.org";
148
-
149
- --></script>
150
-
151
- <script type="text/javascript" src="http://www.craigslist.org/js/jquery-1.7.2.js?v=89700834f1601ac3ebc3e5fb3302c040"></script>
152
- <script type="text/javascript" src="http://www.craigslist.org/js/postings.js?v=d32f9ab28089cba9f207317b8c264ebb"></script>
153
- <script type="text/javascript" src="http://www.craigslist.org/js/formats.js?v=3e34df20530f6579488bbac70a1e2e1d"></script>
154
-
155
-
156
-
157
-
158
- </body>
159
- </html>
160
-