RubyGems - scrapin-a-livin - Versions diffs - 0.1.3 → 0.1.4 - Mend

scrapin-a-livin 0.1.3 → 0.1.4

Files changed (5) hide show

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.1.3
1	+ 0.1.4

data/lib/generic/listing.rb CHANGED

@@ -90,7 +90,7 @@ class JobListing
      # @return [String] the new path with the date appended
      def add_date(basepath)
-          date_path = "#{basepath}/" + date.tr(" ", "_").tr(",", "")
+          date_path = "#{basepath}/" + date.tr(" -", "_").tr(",", "")
           # Create the directories
           Dir.mkdir(date_path) if !File.exists?(date_path)

data/lib/scrapin-a-livin.rb CHANGED

@@ -3,4 +3,79 @@ require 'generic/listing.rb'
 require 'dice/dice.rb'
 require 'yahoo/hotjobs.rb'
-# Helper file to include the available libraries
+# Scrape the listings for the provided URL's
+class Scraper
+     def initialize(urls)
+          @urls = urls
+     end
+     # Fetch the data for the provided urls
+     #
+     # @param urls [Array, #read] array of query urls
+     def fetch
+          @urls.each { |u|
+               # Write the listings
+               get_listings(u).each { |l|
+                    write_listing(l)
+               }
+          }
+     end
+     # Get the job listings for the url
+     #
+     # @param url [String, #read] the url to query
+     # @return the job listings
+     # @raise ArgumentError
+     def get_listings(url)
+          # Check the url for Dice
+          if url.index("http://seeker.dice.com/") == 0
+               DiceSearch.get_listings(url).sort_by{ |l| l.date }
+          # Check the url for Hotjobs
+          elsif url.index("http://hotjobs.yahoo.com/") == 0
+               HotjobsSearch.get_listings(url).sort_by{ |l| l.date }
+          # Else Error
+          else
+               raise ArgumentError.new "Url invalid or not supported"
+          end
+     end
+     # Write the listing at the specified path
+     #
+     # @param path [String, #read] the base path for the listing
+     # @parma listing [JobListing] the job listing
+     def write_listing(listing)
+          date = listing.add_date(JOBS_DIR)
+          state = listing.add_state(date)
+          city = listing.add_city(state)
+          company = listing.add_company(city)
+          name = listing.add_name(company)
+          puts name
+          # Write the listing to the directory
+          File.open("#{name}.html", "w"){ |file|
+               file << `curl #{listing.link}`
+               file << "<!--\n"
+               file << listing
+               file << "-->"
+          }
+     end
+end
+# Check to see if this file was executed
+# If this was run as an individual script
+if $0 == (__FILE__)
+     # Fetch the listings for the provided urls
+     Scraper.new($ARGS).fetch
+end

data/test/listing_test.rb CHANGED

@@ -118,4 +118,34 @@ class TC_JobListing < Test::Unit::TestCase
           end
      end
+     context "Check other formats" do
+          setup do
+               # Create the job listing
+               @listing = JobListing.new(
+                    "Begin !@\#$%^&*()'~`;:'\"<>,./?&amp;&#039; End",
+                    "http://www.google.com",
+                    "Begin !@\#$%^&*()'~`;:'\"<>,./?&amp;&#039; End",
+                    "http://careers.google.com",
+                    "Begin !@\#$%^&*()'~`;:'\"<>./?&amp;&#039; End, CA",
+                    "Dec-1",
+                    nil)
+               # Create the temp directory
+               Dir.mkdir TMP_DIR
+          end
+          teardown do
+               # Delete the temp directory
+               FileUtils.rm_r TMP_DIR
+          end
+          should "replace invalid date characters" do
+               path = "./tmp/Dec_1"
+               assert_equal(path, @listing.add_date(TMP_DIR))
+               assert(true, File.exists?(path))
+          end
+     end
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: scrapin-a-livin
 version: !ruby/object:Gem::Version
-  version: 0.1.3
+  version: 0.1.4
 platform: ruby
 authors:
 - Kevin S Kirkup
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-12-01 00:00:00 -05:00
+date: 2009-12-02 00:00:00 -05:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency