scrapin-a-livin 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.2
1
+ 0.1.3
data/lib/dice/dice.rb CHANGED
@@ -63,7 +63,7 @@ class DiceSearch
63
63
  end
64
64
 
65
65
  # Create the job listing
66
- listings << JobListing.new(name, link, company, company_link, location, date)
66
+ listings << JobListing.new(name, link, company, company_link, location, date, nil)
67
67
 
68
68
  end
69
69
 
@@ -8,10 +8,8 @@ class JobListing
8
8
  attr_reader :company
9
9
  attr_reader :company_link
10
10
  attr_reader :location
11
- attr_reader :date
12
- attr_reader :repost
13
11
 
14
- def initialize(title, link, company, company_link, location, date)
12
+ def initialize(title, link, company, company_link, location, date, repost)
15
13
 
16
14
  @title = title
17
15
  @link = link
@@ -19,6 +17,19 @@ class JobListing
19
17
  @company_link = company_link
20
18
  @location = location
21
19
  @date = date
20
+ @repost = repost
21
+ end
22
+
23
+ # Retrieve the date of the job listing
24
+ def date
25
+
26
+ # Return the reposted date if it has been specified
27
+ if @repost != nil
28
+ @repost
29
+ else
30
+ @date
31
+ end
32
+
22
33
  end
23
34
 
24
35
  # The state where the job is listed
@@ -38,7 +49,8 @@ class JobListing
38
49
  "Company: #@company\n" +
39
50
  "Company Link: #@company_link\n" +
40
51
  "Location: #@location\n" +
41
- "Date: #@date\n"
52
+ "Date: #@date\n" +
53
+ "Repost: #@repost\n"
42
54
  end
43
55
 
44
56
  # Add the state for the listing to the path
@@ -94,7 +106,7 @@ class JobListing
94
106
  def add_company(basepath)
95
107
 
96
108
  # Get the directory
97
- company_path = "#{basepath}/" + replace_invalid(company)
109
+ company_path = "#{basepath}/" + replace_invalid(@company)
98
110
 
99
111
  # Check to see if the date folder is available
100
112
  Dir.mkdir(company_path) if !File.exists?(company_path)
@@ -109,7 +121,7 @@ class JobListing
109
121
  # @return [String] the new path with the company appended
110
122
  def add_name(basepath)
111
123
 
112
- name_path = "#{basepath}/" + replace_invalid(title)
124
+ name_path = "#{basepath}/" + replace_invalid(@title)
113
125
  end
114
126
 
115
127
  private
data/lib/yahoo/hotjobs.rb CHANGED
@@ -8,164 +8,165 @@ require 'hpricot'
8
8
  # http://hotjobs.yahoo.com
9
9
  class HotjobsSearch
10
10
 
11
- # Constants
12
- DEBUG = false
13
- TITLE_CELL = 0
14
- COMPANY_CELL = 1
15
- LOCATION_CELL = 2
16
- DATE_CELL = 3
17
- CELL_COUNT = 4
18
- HOTJOBS_LINK = "http://hotjobs.yahoo.com"
19
-
20
- # Parse the provided query data
21
- #
22
- # @param query [String, #read] the html web page data
23
- # @returns [JobListings] an array of job listings
24
- def self.parse_listings(query)
25
-
26
- # Create the listings
27
- listings = Array.new
28
-
29
- # Filter the data with Hpricot
30
- doc = Hpricot(query)
31
-
32
- # Get the table
33
- table = (doc/"//table[@id=results]")
34
-
35
- # Iterate through each row
36
- rows = (table/"tr")
37
-
38
- # Retrieve the table rows that contain the job listings
39
- rows.each { |row|
40
-
41
- # Get the individual cells
42
- cells = (row/"td")
43
-
44
- # If this is a job listing
45
- if cells.size == CELL_COUNT
46
-
47
- # Get the fields
48
- name = (cells[TITLE_CELL]/"a").inner_html
49
- link = HOTJOBS_LINK + (cells[TITLE_CELL]/"a").attr("href")
50
- company = (cells[COMPANY_CELL]/"a").inner_html
51
- company_link = HOTJOBS_LINK + (cells[COMPANY_CELL]/"a").attr("href")
52
- location = get_location(cells[LOCATION_CELL])
53
- date, repost = get_dates(cells[DATE_CELL])
54
-
55
- if DEBUG
56
- puts "Row: count #{cells.size}"
57
- puts "Name: #{name}"
58
- puts "Link: #{link}"
59
- puts "Company: #{company}"
60
- puts "Company Link: #{company_link}"
61
- puts "Location: #{location}"
62
- puts "Date: #{date}"
63
- end
64
-
65
- # Create the job listing
66
- listings << JobListing.new(name, link, company, company_link, location, date)
67
-
68
- end
69
-
70
- }
71
-
72
- # Return the listings
73
- return listings
74
-
75
- end
76
-
77
- # Retrieve the job listings
78
- #
79
- # @param url [String, #read] the url used to query the data
80
- # @param [JobListings] an array of job listings
81
- def self.get_listings(url)
82
-
83
- # Read the data from the url
84
- response = open(url, "User-Agent" => "Ruby/#{RUBY_VERSION}",
85
- "From" => "email@addr.com",
86
- "Referer" => "http://hotjobs.yahoo.com/").read
87
-
88
- # Parse the listings from the query
89
- parse_listings(response)
90
-
91
- end
92
-
93
- # Query yahoo for html code for the query
94
- #
95
- # @param location [String, #read] the location to search
96
- # @param keywords [String, #read] keywords to use for the search
97
- # @param days_back [String, #read] how long ago to search
98
- # @param num_entries [String, #read] the number of entries to request
99
- def self.query(location, keywords, days_back, num_entries)
100
-
101
- url = "http://hotjobs.yahoo.com/job-search?" +
102
- "src=advsearch&pageOp=search&ts=1259353986&" +
103
- "kw_search_type=kwany&kw=#{keywords}&kw_none=&" +
104
- "locations=#{location}&country=&locations=&locations=&" +
105
- "industry=&industry=&industry=&" +
106
- "updated_since=month&" +
107
- "exp_level=&experience_level=&" +
108
- "education=&salary[min]=&salary[type]=yearly&" +
109
- "commitment=FT&commitment=PT&jobtype=PERM&jobtype=CONT&" +
110
- "travel_amount=&company=&" +
111
- "source=&email_format=html&email_frequency=1&email_enabled=0&search_jobs=Search+Jobs"
112
-
113
- # Read the data from the url
114
- open(url, "User-Agent" => "Ruby/#{RUBY_VERSION}",
115
- "From" => "email@addr.com",
116
- "Referer" => "http://hotjobs.yahoo.com/").read
117
- end
118
-
119
- private
120
- # Method to get the available locations
121
- def self.get_location(element)
122
-
123
- location = ''
124
-
125
- puts "Element: #{element}" if DEBUG
126
-
127
- # check to see if the element has a span
128
- if (element/"span").size > 0
129
-
130
- # The first span is the primary location
131
- location << (element/"span")[0].inner_text
132
-
133
- # Only one location
134
- else
135
- location = element.inner_html
136
- end
137
-
138
- return location
139
- end
140
-
141
- # Method to get the Dates
142
- def self.get_dates(element)
143
-
144
- date = ''
145
- repost = ''
146
-
147
- puts "Element: #{element}" if DEBUG
148
-
149
- spans = (element/"span")
150
-
151
- # Check to see if the element contains a span
152
- if spans.size > 0
153
-
154
- # The first span is the Reposted data
155
- repost = spans[0].inner_text
156
-
157
- # Remove the Reposted string
158
- repost.sub!(/Reposted /, "")
159
-
160
- # delete the span
161
- spans.remove
162
- end
163
-
164
- # Get the main date
165
- date = element.inner_text
166
-
167
- return date, repost
168
-
169
- end
11
+ # Constants
12
+ DEBUG = false
13
+ TITLE_CELL = 0
14
+ COMPANY_CELL = 1
15
+ LOCATION_CELL = 2
16
+ DATE_CELL = 3
17
+ CELL_COUNT = 4
18
+ HOTJOBS_LINK = "http://hotjobs.yahoo.com"
19
+
20
+ # Parse the provided query data
21
+ #
22
+ # @param query [String, #read] the html web page data
23
+ # @returns [JobListings] an array of job listings
24
+ def self.parse_listings(query)
25
+
26
+ # Create the listings
27
+ listings = Array.new
28
+
29
+ # Filter the data with Hpricot
30
+ doc = Hpricot(query)
31
+
32
+ # Get the table
33
+ table = (doc/"//table[@id=results]")
34
+
35
+ # Iterate through each row
36
+ rows = (table/"tr")
37
+
38
+ # Retrieve the table rows that contain the job listings
39
+ rows.each { |row|
40
+
41
+ # Get the individual cells
42
+ cells = (row/"td")
43
+
44
+ # If this is a job listing
45
+ if cells.size == CELL_COUNT
46
+
47
+ # Get the fields
48
+ name = (cells[TITLE_CELL]/"a").inner_html
49
+ link = HOTJOBS_LINK + (cells[TITLE_CELL]/"a").attr("href")
50
+ company = (cells[COMPANY_CELL]/"a").inner_html
51
+ company_link = HOTJOBS_LINK + (cells[COMPANY_CELL]/"a").attr("href")
52
+ location = get_location(cells[LOCATION_CELL])
53
+ date, repost = get_dates(cells[DATE_CELL])
54
+
55
+ if DEBUG
56
+ puts "Row: count #{cells.size}"
57
+ puts "Name: #{name}"
58
+ puts "Link: #{link}"
59
+ puts "Company: #{company}"
60
+ puts "Company Link: #{company_link}"
61
+ puts "Location: #{location}"
62
+ puts "Date: #{date}"
63
+ puts "Repost: #{date}"
64
+ end
65
+
66
+ # Create the job listing
67
+ listings << JobListing.new(name, link, company, company_link, location, date, repost)
68
+
69
+ end
70
+
71
+ }
72
+
73
+ # Return the listings
74
+ return listings
75
+
76
+ end
77
+
78
+ # Retrieve the job listings
79
+ #
80
+ # @param url [String, #read] the url used to query the data
81
+ # @param [JobListings] an array of job listings
82
+ def self.get_listings(url)
83
+
84
+ # Read the data from the url
85
+ response = open(url, "User-Agent" => "Ruby/#{RUBY_VERSION}",
86
+ "From" => "email@addr.com",
87
+ "Referer" => "http://hotjobs.yahoo.com/").read
88
+
89
+ # Parse the listings from the query
90
+ parse_listings(response)
91
+
92
+ end
93
+
94
+ # Query yahoo for html code for the query
95
+ #
96
+ # @param location [String, #read] the location to search
97
+ # @param keywords [String, #read] keywords to use for the search
98
+ # @param days_back [String, #read] how long ago to search
99
+ # @param num_entries [String, #read] the number of entries to request
100
+ def self.query(location, keywords, days_back, num_entries)
101
+
102
+ url = "http://hotjobs.yahoo.com/job-search?" +
103
+ "src=advsearch&pageOp=search&ts=1259353986&" +
104
+ "kw_search_type=kwany&kw=#{keywords}&kw_none=&" +
105
+ "locations=#{location}&country=&locations=&locations=&" +
106
+ "industry=&industry=&industry=&" +
107
+ "updated_since=month&" +
108
+ "exp_level=&experience_level=&" +
109
+ "education=&salary[min]=&salary[type]=yearly&" +
110
+ "commitment=FT&commitment=PT&jobtype=PERM&jobtype=CONT&" +
111
+ "travel_amount=&company=&" +
112
+ "source=&email_format=html&email_frequency=1&email_enabled=0&search_jobs=Search+Jobs"
113
+
114
+ # Read the data from the url
115
+ open(url, "User-Agent" => "Ruby/#{RUBY_VERSION}",
116
+ "From" => "email@addr.com",
117
+ "Referer" => "http://hotjobs.yahoo.com/").read
118
+ end
119
+
120
+ private
121
+ # Method to get the available locations
122
+ def self.get_location(element)
123
+
124
+ location = ''
125
+
126
+ puts "Element: #{element}" if DEBUG
127
+
128
+ # check to see if the element has a span
129
+ if (element/"span").size > 0
130
+
131
+ # The first span is the primary location
132
+ location << (element/"span")[0].inner_text
133
+
134
+ # Only one location
135
+ else
136
+ location = element.inner_html
137
+ end
138
+
139
+ return location
140
+ end
141
+
142
+ # Method to get the Dates
143
+ def self.get_dates(element)
144
+
145
+ date = ''
146
+ repost = nil
147
+
148
+ puts "Element: #{element}" if DEBUG
149
+
150
+ spans = (element/"span")
151
+
152
+ # Check to see if the element contains a span
153
+ if spans.size > 0
154
+
155
+ # The first span is the Reposted data
156
+ repost = spans[0].inner_text
157
+
158
+ # Remove the Reposted string
159
+ repost.sub!(/Reposted /, "")
160
+
161
+ # delete the span
162
+ spans.remove
163
+ end
164
+
165
+ # Get the main date
166
+ date = element.inner_text
167
+
168
+ return date, repost
169
+
170
+ end
170
171
 
171
172
  end
@@ -6,167 +6,167 @@ require 'ftools'
6
6
  # Dice
7
7
  class TC_DiceSearchParser < Test::Unit::TestCase
8
8
 
9
- RALEIGH_QUERY = File.dirname(__FILE__) + '/queries/dice/queryRaleigh.html'
10
- AUSTIN_QUERY = File.dirname(__FILE__) + '/queries/dice/queryAustin.html'
11
- SANJOSE_QUERY = File.dirname(__FILE__) + '/queries/dice/querySanJose.html'
12
-
13
- # Test for the Raleigh listings
14
- def test_raleigh
15
-
16
- # Read the query data from the file
17
- data = check_query(RALEIGH_QUERY)
18
-
19
- # Parse the listings from the query
20
- listings = DiceSearch.parse_listings(data)
21
-
22
-
23
- # Go through each listing
24
- assert_equal("Software Developer - Top Secret Clearance", listings[0].title)
25
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/d/8/d85973db7a09ede4c4df6487900a215a@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[0].link)
26
- assert_equal("TEKsystems", listings[0].company)
27
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=teksyst&cn=TEKsystems&values=", listings[0].company_link)
28
- assert_equal("Fayetteville, NC", listings[0].location)
29
- assert_equal("Nov-16", listings[0].date)
30
-
31
- assert_equal("Senior Software Developer (multiple openings)", listings[1].title)
32
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/f/b/fb5b26f3d34730091f3ac23e350364d5@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[1].link)
33
- assert_equal("Valiant Solutions LLC", listings[1].company)
34
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=10119134&cn=Valiant+Solutions+LLC&values=", listings[1].company_link)
35
- assert_equal("Cary, NC", listings[1].location)
36
- assert_equal("Nov-24", listings[1].date)
37
-
38
- assert_equal("Software Developer, C++, CAA RADE", listings[2].title)
39
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/f/f/ff2b24cc875aeadca2440db7b3fd830e@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[2].link)
40
- assert_equal("I-cubed", listings[2].company)
41
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=RTL246150&cn=I-cubed&values=", listings[2].company_link)
42
- assert_equal("Raleigh, NC", listings[2].location)
43
- assert_equal("Nov-18", listings[2].date)
44
-
45
- assert_equal("Software Engineer", listings[3].title)
46
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/8/f/8f13ef8f39bba3974b792fbb3d224e8c@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[3].link)
47
- assert_equal("CTG, Inc.", listings[3].company)
48
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=ctgsan&cn=CTG%2C+Inc.&values=", listings[3].company_link)
49
- assert_equal("RTP, NC", listings[3].location)
50
- assert_equal("Nov-17", listings[3].date)
51
-
52
- assert_equal("SOFTWARE DEVELOPER", listings[4].title)
53
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/4/6/4680d0437b1f09866a38bab1bd5cfc24@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[4].link)
54
- assert_equal("Tratum Technologies, Inc", listings[4].company)
55
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=intva&cn=Tratum+Technologies%2C+Inc&values=", listings[4].company_link)
56
- assert_equal("Raleigh, NC", listings[4].location)
57
- assert_equal("Nov-23", listings[4].date)
58
-
59
- end
60
-
61
- # Test for parsing a listing
62
- def test_austin
63
-
64
- # Read the query data
65
- data = check_query(AUSTIN_QUERY)
66
-
67
- # Parse the listings from the query
68
- listings = DiceSearch.parse_listings(data)
69
-
70
- # Go through each listing
71
- assert_equal("Inside Software Sales Representative", listings[0].title)
72
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/7/2/721a4a5e55e944d6f87c3f2e406795d2@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[0].link)
73
- assert_equal("Burnett Staffing Specialists", listings[0].company)
74
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=BITSTX&cn=Burnett+Staffing+Specialists&values=", listings[0].company_link)
75
- assert_equal("Austin, TX", listings[0].location)
76
- assert_equal("Oct-30", listings[0].date)
77
-
78
- assert_equal("Software/Silicon Validation Tester", listings[1].title)
79
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/6/4/64c5076cc143855cbfb39e9464d022e9@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[1].link)
80
- assert_equal("Burnett Staffing Specialists", listings[1].company)
81
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=BITSTX&cn=Burnett+Staffing+Specialists&values=", listings[1].company_link)
82
- assert_equal("Austin, TX", listings[1].location)
83
- assert_equal("Oct-28", listings[1].date)
84
-
85
- assert_equal("DSP Software Engineer", listings[2].title)
86
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/c/4/c41455094cd7d5223182a5a8784c850e@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[2].link)
87
- assert_equal("Shoretel, Inc.", listings[2].company)
88
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=10118419&cn=Shoretel%2C+Inc.&values=", listings[2].company_link)
89
- assert_equal("Austin, TX", listings[2].location)
90
- assert_equal("Nov-12", listings[2].date)
91
-
92
- assert_equal("Online Services Software Engineer (Austin, TX)", listings[3].title)
93
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/4/4/44b2e29b2061e3c2da900998af991a89@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[3].link)
94
- assert_equal("Pitney Bowes", listings[3].company)
95
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=pbowes&cn=Pitney+Bowes&values=", listings[3].company_link)
96
- assert_equal("Austin, TX", listings[3].location)
97
- assert_equal("Nov-20", listings[3].date)
98
-
99
- assert_equal("3D OpenGL Embedded Software Engineer", listings[4].title)
100
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/3/f/3f7d7c3ffb12ff14b51317e5191e1339@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[4].link)
101
- assert_equal("NetLogic Microsystems", listings[4].company)
102
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=RTL61966&cn=NetLogic+Microsystems&values=", listings[4].company_link)
103
- assert_equal("Austin, TX", listings[4].location)
104
- assert_equal("Nov-23", listings[4].date)
105
-
106
- end
107
-
108
- # Test for parsing a listing
109
- def test_sanjose
110
-
111
- # Read the query data
112
- data = check_query(SANJOSE_QUERY)
113
-
114
- # Parse the listings from the query
115
- listings = DiceSearch.parse_listings(data)
116
-
117
- # Go through each listing
118
- assert_equal("Senior Software Development Engineer (Treatment Delivery)", listings[0].title)
119
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/1/9/19477fb452c47c396ee16cfbba969c17@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[0].link)
120
- assert_equal("Elekta Impac Software", listings[0].company)
121
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=bhodblok&cn=Elekta+Impac+Software&values=", listings[0].company_link)
122
- assert_equal("Sunnyvale, CA", listings[0].location)
123
- assert_equal("Nov-04", listings[0].date)
124
-
125
- assert_equal("Senior Software Engineer - Device Driver", listings[1].title)
126
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/7/a/7a18ebd1fe1b32d133857b5dd4f36b1e@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[1].link)
127
- assert_equal("Denali Software Inc.", listings[1].company)
128
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=10199948&cn=Denali+Software+Inc.&values=", listings[1].company_link)
129
- assert_equal("Sunnyvale, CA", listings[1].location)
130
- assert_equal("Nov-23", listings[1].date)
131
-
132
- assert_equal("Software Engineer", listings[2].title)
133
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/b/b/bbbb6318066747a46fe923c65011d84a@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[2].link)
134
- assert_equal("Denali Software Inc.", listings[2].company)
135
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=10199948&cn=Denali+Software+Inc.&values=", listings[2].company_link)
136
- assert_equal("Sunnyvale, CA", listings[2].location)
137
- assert_equal("Nov-23", listings[2].date)
138
-
139
- assert_equal("Microsoft .NET Software Developer", listings[3].title)
140
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/8/3/8376adc204f2490f6cf45be6c8f27776@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[3].link)
141
- assert_equal("Terrace Software, Inc.", listings[3].company)
142
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=RTL99409&cn=Terrace+Software%2C+Inc.&values=", listings[3].company_link)
143
- assert_equal("San Francisco, CA", listings[3].location)
144
- assert_equal("Nov-24", listings[3].date)
145
-
146
- assert_equal("Embedded Software Engineer", listings[4].title)
147
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/d/2/d21249cf800dc89109e0ff2c22a215a4@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[4].link)
148
- assert_equal("Denali Software Inc.", listings[4].company)
149
- assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=10199948&cn=Denali+Software+Inc.&values=", listings[4].company_link)
150
- assert_equal("Sunnyvale, CA", listings[4].location)
151
- assert_equal("Nov-23", listings[4].date)
152
- end
153
-
154
- private
155
-
156
- # Make sure the query file exists
157
- #
158
- # @param path [String, #read] the path to the input file
159
- # @returns [String] the data from the input file
160
- def check_query(path)
161
-
162
- # Check to make sure the query exists and is readable
163
- if !File.exists?(path) || !File.readable?(path)
164
-
165
- raise ArgumentError.new "Invalid Query: \"#{path}\""
166
- end
167
-
168
- # Open the file and read the contents
169
- File.open(path, "r").read
170
- end
9
+ RALEIGH_QUERY = File.dirname(__FILE__) + '/queries/dice/queryRaleigh.html'
10
+ AUSTIN_QUERY = File.dirname(__FILE__) + '/queries/dice/queryAustin.html'
11
+ SANJOSE_QUERY = File.dirname(__FILE__) + '/queries/dice/querySanJose.html'
12
+
13
+ # Test for the Raleigh listings
14
+ def test_raleigh
15
+
16
+ # Read the query data from the file
17
+ data = check_query(RALEIGH_QUERY)
18
+
19
+ # Parse the listings from the query
20
+ listings = DiceSearch.parse_listings(data)
21
+
22
+
23
+ # Go through each listing
24
+ assert_equal("Software Developer - Top Secret Clearance", listings[0].title)
25
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/d/8/d85973db7a09ede4c4df6487900a215a@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[0].link)
26
+ assert_equal("TEKsystems", listings[0].company)
27
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=teksyst&cn=TEKsystems&values=", listings[0].company_link)
28
+ assert_equal("Fayetteville, NC", listings[0].location)
29
+ assert_equal("Nov-16", listings[0].date)
30
+
31
+ assert_equal("Senior Software Developer (multiple openings)", listings[1].title)
32
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/f/b/fb5b26f3d34730091f3ac23e350364d5@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[1].link)
33
+ assert_equal("Valiant Solutions LLC", listings[1].company)
34
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=10119134&cn=Valiant+Solutions+LLC&values=", listings[1].company_link)
35
+ assert_equal("Cary, NC", listings[1].location)
36
+ assert_equal("Nov-24", listings[1].date)
37
+
38
+ assert_equal("Software Developer, C++, CAA RADE", listings[2].title)
39
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/f/f/ff2b24cc875aeadca2440db7b3fd830e@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[2].link)
40
+ assert_equal("I-cubed", listings[2].company)
41
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=RTL246150&cn=I-cubed&values=", listings[2].company_link)
42
+ assert_equal("Raleigh, NC", listings[2].location)
43
+ assert_equal("Nov-18", listings[2].date)
44
+
45
+ assert_equal("Software Engineer", listings[3].title)
46
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/8/f/8f13ef8f39bba3974b792fbb3d224e8c@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[3].link)
47
+ assert_equal("CTG, Inc.", listings[3].company)
48
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=ctgsan&cn=CTG%2C+Inc.&values=", listings[3].company_link)
49
+ assert_equal("RTP, NC", listings[3].location)
50
+ assert_equal("Nov-17", listings[3].date)
51
+
52
+ assert_equal("SOFTWARE DEVELOPER", listings[4].title)
53
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/4/6/4680d0437b1f09866a38bab1bd5cfc24@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[4].link)
54
+ assert_equal("Tratum Technologies, Inc", listings[4].company)
55
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=intva&cn=Tratum+Technologies%2C+Inc&values=", listings[4].company_link)
56
+ assert_equal("Raleigh, NC", listings[4].location)
57
+ assert_equal("Nov-23", listings[4].date)
58
+
59
+ end
60
+
61
+ # Test for parsing a listing
62
+ def test_austin
63
+
64
+ # Read the query data
65
+ data = check_query(AUSTIN_QUERY)
66
+
67
+ # Parse the listings from the query
68
+ listings = DiceSearch.parse_listings(data)
69
+
70
+ # Go through each listing
71
+ assert_equal("Inside Software Sales Representative", listings[0].title)
72
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/7/2/721a4a5e55e944d6f87c3f2e406795d2@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[0].link)
73
+ assert_equal("Burnett Staffing Specialists", listings[0].company)
74
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=BITSTX&cn=Burnett+Staffing+Specialists&values=", listings[0].company_link)
75
+ assert_equal("Austin, TX", listings[0].location)
76
+ assert_equal("Oct-30", listings[0].date)
77
+
78
+ assert_equal("Software/Silicon Validation Tester", listings[1].title)
79
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/6/4/64c5076cc143855cbfb39e9464d022e9@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[1].link)
80
+ assert_equal("Burnett Staffing Specialists", listings[1].company)
81
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=BITSTX&cn=Burnett+Staffing+Specialists&values=", listings[1].company_link)
82
+ assert_equal("Austin, TX", listings[1].location)
83
+ assert_equal("Oct-28", listings[1].date)
84
+
85
+ assert_equal("DSP Software Engineer", listings[2].title)
86
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/c/4/c41455094cd7d5223182a5a8784c850e@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[2].link)
87
+ assert_equal("Shoretel, Inc.", listings[2].company)
88
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=10118419&cn=Shoretel%2C+Inc.&values=", listings[2].company_link)
89
+ assert_equal("Austin, TX", listings[2].location)
90
+ assert_equal("Nov-12", listings[2].date)
91
+
92
+ assert_equal("Online Services Software Engineer (Austin, TX)", listings[3].title)
93
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/4/4/44b2e29b2061e3c2da900998af991a89@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[3].link)
94
+ assert_equal("Pitney Bowes", listings[3].company)
95
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=pbowes&cn=Pitney+Bowes&values=", listings[3].company_link)
96
+ assert_equal("Austin, TX", listings[3].location)
97
+ assert_equal("Nov-20", listings[3].date)
98
+
99
+ assert_equal("3D OpenGL Embedded Software Engineer", listings[4].title)
100
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/3/f/3f7d7c3ffb12ff14b51317e5191e1339@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[4].link)
101
+ assert_equal("NetLogic Microsystems", listings[4].company)
102
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=RTL61966&cn=NetLogic+Microsystems&values=", listings[4].company_link)
103
+ assert_equal("Austin, TX", listings[4].location)
104
+ assert_equal("Nov-23", listings[4].date)
105
+
106
+ end
107
+
108
+ # Test for parsing a listing
109
+ def test_sanjose
110
+
111
+ # Read the query data
112
+ data = check_query(SANJOSE_QUERY)
113
+
114
+ # Parse the listings from the query
115
+ listings = DiceSearch.parse_listings(data)
116
+
117
+ # Go through each listing
118
+ assert_equal("Senior Software Development Engineer (Treatment Delivery)", listings[0].title)
119
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/1/9/19477fb452c47c396ee16cfbba969c17@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[0].link)
120
+ assert_equal("Elekta Impac Software", listings[0].company)
121
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=bhodblok&cn=Elekta+Impac+Software&values=", listings[0].company_link)
122
+ assert_equal("Sunnyvale, CA", listings[0].location)
123
+ assert_equal("Nov-04", listings[0].date)
124
+
125
+ assert_equal("Senior Software Engineer - Device Driver", listings[1].title)
126
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/7/a/7a18ebd1fe1b32d133857b5dd4f36b1e@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[1].link)
127
+ assert_equal("Denali Software Inc.", listings[1].company)
128
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=10199948&cn=Denali+Software+Inc.&values=", listings[1].company_link)
129
+ assert_equal("Sunnyvale, CA", listings[1].location)
130
+ assert_equal("Nov-23", listings[1].date)
131
+
132
+ assert_equal("Software Engineer", listings[2].title)
133
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/b/b/bbbb6318066747a46fe923c65011d84a@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[2].link)
134
+ assert_equal("Denali Software Inc.", listings[2].company)
135
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=10199948&cn=Denali+Software+Inc.&values=", listings[2].company_link)
136
+ assert_equal("Sunnyvale, CA", listings[2].location)
137
+ assert_equal("Nov-23", listings[2].date)
138
+
139
+ assert_equal("Microsoft .NET Software Developer", listings[3].title)
140
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/8/3/8376adc204f2490f6cf45be6c8f27776@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[3].link)
141
+ assert_equal("Terrace Software, Inc.", listings[3].company)
142
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=RTL99409&cn=Terrace+Software%2C+Inc.&values=", listings[3].company_link)
143
+ assert_equal("San Francisco, CA", listings[3].location)
144
+ assert_equal("Nov-24", listings[3].date)
145
+
146
+ assert_equal("Embedded Software Engineer", listings[4].title)
147
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/d/2/d21249cf800dc89109e0ff2c22a215a4@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[4].link)
148
+ assert_equal("Denali Software Inc.", listings[4].company)
149
+ assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=10199948&cn=Denali+Software+Inc.&values=", listings[4].company_link)
150
+ assert_equal("Sunnyvale, CA", listings[4].location)
151
+ assert_equal("Nov-23", listings[4].date)
152
+ end
153
+
154
+ private
155
+
156
+ # Make sure the query file exists
157
+ #
158
+ # @param path [String, #read] the path to the input file
159
+ # @returns [String] the data from the input file
160
+ def check_query(path)
161
+
162
+ # Check to make sure the query exists and is readable
163
+ if !File.exists?(path) || !File.readable?(path)
164
+
165
+ raise ArgumentError.new "Invalid Query: \"#{path}\""
166
+ end
167
+
168
+ # Open the file and read the contents
169
+ File.open(path, "r").read
170
+ end
171
171
 
172
172
  end