scrapin-a-livin 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.1.4
@@ -90,7 +90,7 @@ class JobListing
90
90
  # @return [String] the new path with the date appended
91
91
  def add_date(basepath)
92
92
 
93
- date_path = "#{basepath}/" + date.tr(" ", "_").tr(",", "")
93
+ date_path = "#{basepath}/" + date.tr(" -", "_").tr(",", "")
94
94
 
95
95
  # Create the directories
96
96
  Dir.mkdir(date_path) if !File.exists?(date_path)
@@ -3,4 +3,79 @@ require 'generic/listing.rb'
3
3
  require 'dice/dice.rb'
4
4
  require 'yahoo/hotjobs.rb'
5
5
 
6
- # Helper file to include the available libraries
6
+ # Scrape the listings for the provided URL's
7
+ class Scraper
8
+
9
+ def initialize(urls)
10
+
11
+ @urls = urls
12
+ end
13
+
14
+ # Fetch the data for the provided urls
15
+ #
16
+ # @param urls [Array, #read] array of query urls
17
+ def fetch
18
+
19
+ @urls.each { |u|
20
+
21
+ # Write the listings
22
+ get_listings(u).each { |l|
23
+ write_listing(l)
24
+ }
25
+ }
26
+ end
27
+
28
+ # Get the job listings for the url
29
+ #
30
+ # @param url [String, #read] the url to query
31
+ # @return the job listings
32
+ # @raise ArgumentError
33
+ def get_listings(url)
34
+
35
+ # Check the url for Dice
36
+ if url.index("http://seeker.dice.com/") == 0
37
+ DiceSearch.get_listings(url).sort_by{ |l| l.date }
38
+
39
+ # Check the url for Hotjobs
40
+ elsif url.index("http://hotjobs.yahoo.com/") == 0
41
+ HotjobsSearch.get_listings(url).sort_by{ |l| l.date }
42
+
43
+ # Else Error
44
+ else
45
+ raise ArgumentError.new "Url invalid or not supported"
46
+ end
47
+ end
48
+
49
+ # Write the listing at the specified path
50
+ #
51
+ # @param path [String, #read] the base path for the listing
52
+ # @parma listing [JobListing] the job listing
53
+ def write_listing(listing)
54
+
55
+ date = listing.add_date(JOBS_DIR)
56
+ state = listing.add_state(date)
57
+ city = listing.add_city(state)
58
+ company = listing.add_company(city)
59
+ name = listing.add_name(company)
60
+
61
+ puts name
62
+
63
+ # Write the listing to the directory
64
+ File.open("#{name}.html", "w"){ |file|
65
+ file << `curl #{listing.link}`
66
+ file << "<!--\n"
67
+ file << listing
68
+ file << "-->"
69
+ }
70
+ end
71
+ end
72
+
73
+ # Check to see if this file was executed
74
+ # If this was run as an individual script
75
+ if $0 == (__FILE__)
76
+
77
+ # Fetch the listings for the provided urls
78
+ Scraper.new($ARGS).fetch
79
+
80
+ end
81
+
@@ -118,4 +118,34 @@ class TC_JobListing < Test::Unit::TestCase
118
118
  end
119
119
 
120
120
  end
121
+
122
+ context "Check other formats" do
123
+
124
+ setup do
125
+
126
+ # Create the job listing
127
+ @listing = JobListing.new(
128
+ "Begin !@\#$%^&*()'~`;:'\"<>,./?&amp;&#039; End",
129
+ "http://www.google.com",
130
+ "Begin !@\#$%^&*()'~`;:'\"<>,./?&amp;&#039; End",
131
+ "http://careers.google.com",
132
+ "Begin !@\#$%^&*()'~`;:'\"<>./?&amp;&#039; End, CA",
133
+ "Dec-1",
134
+ nil)
135
+
136
+ # Create the temp directory
137
+ Dir.mkdir TMP_DIR
138
+ end
139
+
140
+ teardown do
141
+ # Delete the temp directory
142
+ FileUtils.rm_r TMP_DIR
143
+ end
144
+
145
+ should "replace invalid date characters" do
146
+ path = "./tmp/Dec_1"
147
+ assert_equal(path, @listing.add_date(TMP_DIR))
148
+ assert(true, File.exists?(path))
149
+ end
150
+ end
121
151
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrapin-a-livin
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S Kirkup
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-12-01 00:00:00 -05:00
12
+ date: 2009-12-02 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency