scrapin-a-livin 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/dice/dice.rb +1 -1
- data/lib/generic/listing.rb +18 -6
- data/lib/yahoo/hotjobs.rb +160 -159
- data/test/dice_parser_test.rb +162 -162
- data/test/listing_test.rb +13 -5
- data/test/yahoo_parser_test.rb +160 -160
- metadata +2 -3
- data/lib/dice/dice_search.rb +0 -122
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.3
|
data/lib/dice/dice.rb
CHANGED
data/lib/generic/listing.rb
CHANGED
@@ -8,10 +8,8 @@ class JobListing
|
|
8
8
|
attr_reader :company
|
9
9
|
attr_reader :company_link
|
10
10
|
attr_reader :location
|
11
|
-
attr_reader :date
|
12
|
-
attr_reader :repost
|
13
11
|
|
14
|
-
def initialize(title, link, company, company_link, location, date)
|
12
|
+
def initialize(title, link, company, company_link, location, date, repost)
|
15
13
|
|
16
14
|
@title = title
|
17
15
|
@link = link
|
@@ -19,6 +17,19 @@ class JobListing
|
|
19
17
|
@company_link = company_link
|
20
18
|
@location = location
|
21
19
|
@date = date
|
20
|
+
@repost = repost
|
21
|
+
end
|
22
|
+
|
23
|
+
# Retrieve the date of the job listing
|
24
|
+
def date
|
25
|
+
|
26
|
+
# Return the reposted date if it has been specified
|
27
|
+
if @repost != nil
|
28
|
+
@repost
|
29
|
+
else
|
30
|
+
@date
|
31
|
+
end
|
32
|
+
|
22
33
|
end
|
23
34
|
|
24
35
|
# The state where the job is listed
|
@@ -38,7 +49,8 @@ class JobListing
|
|
38
49
|
"Company: #@company\n" +
|
39
50
|
"Company Link: #@company_link\n" +
|
40
51
|
"Location: #@location\n" +
|
41
|
-
"Date: #@date\n"
|
52
|
+
"Date: #@date\n" +
|
53
|
+
"Repost: #@repost\n"
|
42
54
|
end
|
43
55
|
|
44
56
|
# Add the state for the listing to the path
|
@@ -94,7 +106,7 @@ class JobListing
|
|
94
106
|
def add_company(basepath)
|
95
107
|
|
96
108
|
# Get the directory
|
97
|
-
company_path = "#{basepath}/" + replace_invalid(company)
|
109
|
+
company_path = "#{basepath}/" + replace_invalid(@company)
|
98
110
|
|
99
111
|
# Check to see if the date folder is available
|
100
112
|
Dir.mkdir(company_path) if !File.exists?(company_path)
|
@@ -109,7 +121,7 @@ class JobListing
|
|
109
121
|
# @return [String] the new path with the company appended
|
110
122
|
def add_name(basepath)
|
111
123
|
|
112
|
-
name_path = "#{basepath}/" + replace_invalid(title)
|
124
|
+
name_path = "#{basepath}/" + replace_invalid(@title)
|
113
125
|
end
|
114
126
|
|
115
127
|
private
|
data/lib/yahoo/hotjobs.rb
CHANGED
@@ -8,164 +8,165 @@ require 'hpricot'
|
|
8
8
|
# http://hotjobs.yahoo.com
|
9
9
|
class HotjobsSearch
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
11
|
+
# Constants
|
12
|
+
DEBUG = false
|
13
|
+
TITLE_CELL = 0
|
14
|
+
COMPANY_CELL = 1
|
15
|
+
LOCATION_CELL = 2
|
16
|
+
DATE_CELL = 3
|
17
|
+
CELL_COUNT = 4
|
18
|
+
HOTJOBS_LINK = "http://hotjobs.yahoo.com"
|
19
|
+
|
20
|
+
# Parse the provided query data
|
21
|
+
#
|
22
|
+
# @param query [String, #read] the html web page data
|
23
|
+
# @returns [JobListings] an array of job listings
|
24
|
+
def self.parse_listings(query)
|
25
|
+
|
26
|
+
# Create the listings
|
27
|
+
listings = Array.new
|
28
|
+
|
29
|
+
# Filter the data with Hpricot
|
30
|
+
doc = Hpricot(query)
|
31
|
+
|
32
|
+
# Get the table
|
33
|
+
table = (doc/"//table[@id=results]")
|
34
|
+
|
35
|
+
# Iterate through each row
|
36
|
+
rows = (table/"tr")
|
37
|
+
|
38
|
+
# Retrieve the table rows that contain the job listings
|
39
|
+
rows.each { |row|
|
40
|
+
|
41
|
+
# Get the individual cells
|
42
|
+
cells = (row/"td")
|
43
|
+
|
44
|
+
# If this is a job listing
|
45
|
+
if cells.size == CELL_COUNT
|
46
|
+
|
47
|
+
# Get the fields
|
48
|
+
name = (cells[TITLE_CELL]/"a").inner_html
|
49
|
+
link = HOTJOBS_LINK + (cells[TITLE_CELL]/"a").attr("href")
|
50
|
+
company = (cells[COMPANY_CELL]/"a").inner_html
|
51
|
+
company_link = HOTJOBS_LINK + (cells[COMPANY_CELL]/"a").attr("href")
|
52
|
+
location = get_location(cells[LOCATION_CELL])
|
53
|
+
date, repost = get_dates(cells[DATE_CELL])
|
54
|
+
|
55
|
+
if DEBUG
|
56
|
+
puts "Row: count #{cells.size}"
|
57
|
+
puts "Name: #{name}"
|
58
|
+
puts "Link: #{link}"
|
59
|
+
puts "Company: #{company}"
|
60
|
+
puts "Company Link: #{company_link}"
|
61
|
+
puts "Location: #{location}"
|
62
|
+
puts "Date: #{date}"
|
63
|
+
puts "Repost: #{date}"
|
64
|
+
end
|
65
|
+
|
66
|
+
# Create the job listing
|
67
|
+
listings << JobListing.new(name, link, company, company_link, location, date, repost)
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
}
|
72
|
+
|
73
|
+
# Return the listings
|
74
|
+
return listings
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
# Retrieve the job listings
|
79
|
+
#
|
80
|
+
# @param url [String, #read] the url used to query the data
|
81
|
+
# @param [JobListings] an array of job listings
|
82
|
+
def self.get_listings(url)
|
83
|
+
|
84
|
+
# Read the data from the url
|
85
|
+
response = open(url, "User-Agent" => "Ruby/#{RUBY_VERSION}",
|
86
|
+
"From" => "email@addr.com",
|
87
|
+
"Referer" => "http://hotjobs.yahoo.com/").read
|
88
|
+
|
89
|
+
# Parse the listings from the query
|
90
|
+
parse_listings(response)
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
# Query yahoo for html code for the query
|
95
|
+
#
|
96
|
+
# @param location [String, #read] the location to search
|
97
|
+
# @param keywords [String, #read] keywords to use for the search
|
98
|
+
# @param days_back [String, #read] how long ago to search
|
99
|
+
# @param num_entries [String, #read] the number of entries to request
|
100
|
+
def self.query(location, keywords, days_back, num_entries)
|
101
|
+
|
102
|
+
url = "http://hotjobs.yahoo.com/job-search?" +
|
103
|
+
"src=advsearch&pageOp=search&ts=1259353986&" +
|
104
|
+
"kw_search_type=kwany&kw=#{keywords}&kw_none=&" +
|
105
|
+
"locations=#{location}&country=&locations=&locations=&" +
|
106
|
+
"industry=&industry=&industry=&" +
|
107
|
+
"updated_since=month&" +
|
108
|
+
"exp_level=&experience_level=&" +
|
109
|
+
"education=&salary[min]=&salary[type]=yearly&" +
|
110
|
+
"commitment=FT&commitment=PT&jobtype=PERM&jobtype=CONT&" +
|
111
|
+
"travel_amount=&company=&" +
|
112
|
+
"source=&email_format=html&email_frequency=1&email_enabled=0&search_jobs=Search+Jobs"
|
113
|
+
|
114
|
+
# Read the data from the url
|
115
|
+
open(url, "User-Agent" => "Ruby/#{RUBY_VERSION}",
|
116
|
+
"From" => "email@addr.com",
|
117
|
+
"Referer" => "http://hotjobs.yahoo.com/").read
|
118
|
+
end
|
119
|
+
|
120
|
+
private
|
121
|
+
# Method to get the available locations
|
122
|
+
def self.get_location(element)
|
123
|
+
|
124
|
+
location = ''
|
125
|
+
|
126
|
+
puts "Element: #{element}" if DEBUG
|
127
|
+
|
128
|
+
# check to see if the element has a span
|
129
|
+
if (element/"span").size > 0
|
130
|
+
|
131
|
+
# The first span is the primary location
|
132
|
+
location << (element/"span")[0].inner_text
|
133
|
+
|
134
|
+
# Only one location
|
135
|
+
else
|
136
|
+
location = element.inner_html
|
137
|
+
end
|
138
|
+
|
139
|
+
return location
|
140
|
+
end
|
141
|
+
|
142
|
+
# Method to get the Dates
|
143
|
+
def self.get_dates(element)
|
144
|
+
|
145
|
+
date = ''
|
146
|
+
repost = nil
|
147
|
+
|
148
|
+
puts "Element: #{element}" if DEBUG
|
149
|
+
|
150
|
+
spans = (element/"span")
|
151
|
+
|
152
|
+
# Check to see if the element contains a span
|
153
|
+
if spans.size > 0
|
154
|
+
|
155
|
+
# The first span is the Reposted data
|
156
|
+
repost = spans[0].inner_text
|
157
|
+
|
158
|
+
# Remove the Reposted string
|
159
|
+
repost.sub!(/Reposted /, "")
|
160
|
+
|
161
|
+
# delete the span
|
162
|
+
spans.remove
|
163
|
+
end
|
164
|
+
|
165
|
+
# Get the main date
|
166
|
+
date = element.inner_text
|
167
|
+
|
168
|
+
return date, repost
|
169
|
+
|
170
|
+
end
|
170
171
|
|
171
172
|
end
|
data/test/dice_parser_test.rb
CHANGED
@@ -6,167 +6,167 @@ require 'ftools'
|
|
6
6
|
# Dice
|
7
7
|
class TC_DiceSearchParser < Test::Unit::TestCase
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
9
|
+
RALEIGH_QUERY = File.dirname(__FILE__) + '/queries/dice/queryRaleigh.html'
|
10
|
+
AUSTIN_QUERY = File.dirname(__FILE__) + '/queries/dice/queryAustin.html'
|
11
|
+
SANJOSE_QUERY = File.dirname(__FILE__) + '/queries/dice/querySanJose.html'
|
12
|
+
|
13
|
+
# Test for the Raleigh listings
|
14
|
+
def test_raleigh
|
15
|
+
|
16
|
+
# Read the query data from the file
|
17
|
+
data = check_query(RALEIGH_QUERY)
|
18
|
+
|
19
|
+
# Parse the listings from the query
|
20
|
+
listings = DiceSearch.parse_listings(data)
|
21
|
+
|
22
|
+
|
23
|
+
# Go through each listing
|
24
|
+
assert_equal("Software Developer - Top Secret Clearance", listings[0].title)
|
25
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/d/8/d85973db7a09ede4c4df6487900a215a@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[0].link)
|
26
|
+
assert_equal("TEKsystems", listings[0].company)
|
27
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=teksyst&cn=TEKsystems&values=", listings[0].company_link)
|
28
|
+
assert_equal("Fayetteville, NC", listings[0].location)
|
29
|
+
assert_equal("Nov-16", listings[0].date)
|
30
|
+
|
31
|
+
assert_equal("Senior Software Developer (multiple openings)", listings[1].title)
|
32
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/f/b/fb5b26f3d34730091f3ac23e350364d5@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[1].link)
|
33
|
+
assert_equal("Valiant Solutions LLC", listings[1].company)
|
34
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=10119134&cn=Valiant+Solutions+LLC&values=", listings[1].company_link)
|
35
|
+
assert_equal("Cary, NC", listings[1].location)
|
36
|
+
assert_equal("Nov-24", listings[1].date)
|
37
|
+
|
38
|
+
assert_equal("Software Developer, C++, CAA RADE", listings[2].title)
|
39
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/f/f/ff2b24cc875aeadca2440db7b3fd830e@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[2].link)
|
40
|
+
assert_equal("I-cubed", listings[2].company)
|
41
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=RTL246150&cn=I-cubed&values=", listings[2].company_link)
|
42
|
+
assert_equal("Raleigh, NC", listings[2].location)
|
43
|
+
assert_equal("Nov-18", listings[2].date)
|
44
|
+
|
45
|
+
assert_equal("Software Engineer", listings[3].title)
|
46
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/8/f/8f13ef8f39bba3974b792fbb3d224e8c@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[3].link)
|
47
|
+
assert_equal("CTG, Inc.", listings[3].company)
|
48
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=ctgsan&cn=CTG%2C+Inc.&values=", listings[3].company_link)
|
49
|
+
assert_equal("RTP, NC", listings[3].location)
|
50
|
+
assert_equal("Nov-17", listings[3].date)
|
51
|
+
|
52
|
+
assert_equal("SOFTWARE DEVELOPER", listings[4].title)
|
53
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/4/6/4680d0437b1f09866a38bab1bd5cfc24@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[4].link)
|
54
|
+
assert_equal("Tratum Technologies, Inc", listings[4].company)
|
55
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=intva&cn=Tratum+Technologies%2C+Inc&values=", listings[4].company_link)
|
56
|
+
assert_equal("Raleigh, NC", listings[4].location)
|
57
|
+
assert_equal("Nov-23", listings[4].date)
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
# Test for parsing a listing
|
62
|
+
def test_austin
|
63
|
+
|
64
|
+
# Read the query data
|
65
|
+
data = check_query(AUSTIN_QUERY)
|
66
|
+
|
67
|
+
# Parse the listings from the query
|
68
|
+
listings = DiceSearch.parse_listings(data)
|
69
|
+
|
70
|
+
# Go through each listing
|
71
|
+
assert_equal("Inside Software Sales Representative", listings[0].title)
|
72
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/7/2/721a4a5e55e944d6f87c3f2e406795d2@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[0].link)
|
73
|
+
assert_equal("Burnett Staffing Specialists", listings[0].company)
|
74
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=BITSTX&cn=Burnett+Staffing+Specialists&values=", listings[0].company_link)
|
75
|
+
assert_equal("Austin, TX", listings[0].location)
|
76
|
+
assert_equal("Oct-30", listings[0].date)
|
77
|
+
|
78
|
+
assert_equal("Software/Silicon Validation Tester", listings[1].title)
|
79
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/6/4/64c5076cc143855cbfb39e9464d022e9@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[1].link)
|
80
|
+
assert_equal("Burnett Staffing Specialists", listings[1].company)
|
81
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=BITSTX&cn=Burnett+Staffing+Specialists&values=", listings[1].company_link)
|
82
|
+
assert_equal("Austin, TX", listings[1].location)
|
83
|
+
assert_equal("Oct-28", listings[1].date)
|
84
|
+
|
85
|
+
assert_equal("DSP Software Engineer", listings[2].title)
|
86
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/c/4/c41455094cd7d5223182a5a8784c850e@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[2].link)
|
87
|
+
assert_equal("Shoretel, Inc.", listings[2].company)
|
88
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=10118419&cn=Shoretel%2C+Inc.&values=", listings[2].company_link)
|
89
|
+
assert_equal("Austin, TX", listings[2].location)
|
90
|
+
assert_equal("Nov-12", listings[2].date)
|
91
|
+
|
92
|
+
assert_equal("Online Services Software Engineer (Austin, TX)", listings[3].title)
|
93
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/4/4/44b2e29b2061e3c2da900998af991a89@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[3].link)
|
94
|
+
assert_equal("Pitney Bowes", listings[3].company)
|
95
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=pbowes&cn=Pitney+Bowes&values=", listings[3].company_link)
|
96
|
+
assert_equal("Austin, TX", listings[3].location)
|
97
|
+
assert_equal("Nov-20", listings[3].date)
|
98
|
+
|
99
|
+
assert_equal("3D OpenGL Embedded Software Engineer", listings[4].title)
|
100
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/3/f/3f7d7c3ffb12ff14b51317e5191e1339@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[4].link)
|
101
|
+
assert_equal("NetLogic Microsystems", listings[4].company)
|
102
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=RTL61966&cn=NetLogic+Microsystems&values=", listings[4].company_link)
|
103
|
+
assert_equal("Austin, TX", listings[4].location)
|
104
|
+
assert_equal("Nov-23", listings[4].date)
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
# Test for parsing a listing
|
109
|
+
def test_sanjose
|
110
|
+
|
111
|
+
# Read the query data
|
112
|
+
data = check_query(SANJOSE_QUERY)
|
113
|
+
|
114
|
+
# Parse the listings from the query
|
115
|
+
listings = DiceSearch.parse_listings(data)
|
116
|
+
|
117
|
+
# Go through each listing
|
118
|
+
assert_equal("Senior Software Development Engineer (Treatment Delivery)", listings[0].title)
|
119
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/1/9/19477fb452c47c396ee16cfbba969c17@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[0].link)
|
120
|
+
assert_equal("Elekta Impac Software", listings[0].company)
|
121
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=bhodblok&cn=Elekta+Impac+Software&values=", listings[0].company_link)
|
122
|
+
assert_equal("Sunnyvale, CA", listings[0].location)
|
123
|
+
assert_equal("Nov-04", listings[0].date)
|
124
|
+
|
125
|
+
assert_equal("Senior Software Engineer - Device Driver", listings[1].title)
|
126
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/7/a/7a18ebd1fe1b32d133857b5dd4f36b1e@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[1].link)
|
127
|
+
assert_equal("Denali Software Inc.", listings[1].company)
|
128
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=10199948&cn=Denali+Software+Inc.&values=", listings[1].company_link)
|
129
|
+
assert_equal("Sunnyvale, CA", listings[1].location)
|
130
|
+
assert_equal("Nov-23", listings[1].date)
|
131
|
+
|
132
|
+
assert_equal("Software Engineer", listings[2].title)
|
133
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/b/b/bbbb6318066747a46fe923c65011d84a@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[2].link)
|
134
|
+
assert_equal("Denali Software Inc.", listings[2].company)
|
135
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=10199948&cn=Denali+Software+Inc.&values=", listings[2].company_link)
|
136
|
+
assert_equal("Sunnyvale, CA", listings[2].location)
|
137
|
+
assert_equal("Nov-23", listings[2].date)
|
138
|
+
|
139
|
+
assert_equal("Microsoft .NET Software Developer", listings[3].title)
|
140
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/8/3/8376adc204f2490f6cf45be6c8f27776@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[3].link)
|
141
|
+
assert_equal("Terrace Software, Inc.", listings[3].company)
|
142
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=RTL99409&cn=Terrace+Software%2C+Inc.&values=", listings[3].company_link)
|
143
|
+
assert_equal("San Francisco, CA", listings[3].location)
|
144
|
+
assert_equal("Nov-24", listings[3].date)
|
145
|
+
|
146
|
+
assert_equal("Embedded Software Engineer", listings[4].title)
|
147
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=302&dockey=xml/d/2/d21249cf800dc89109e0ff2c22a215a4@endecaindex&source=19&FREE_TEXT=Software&rating=99", listings[4].link)
|
148
|
+
assert_equal("Denali Software Inc.", listings[4].company)
|
149
|
+
assert_equal("http://seeker.dice.com/jobsearch/servlet/JobSearch?op=308&did=10199948&cn=Denali+Software+Inc.&values=", listings[4].company_link)
|
150
|
+
assert_equal("Sunnyvale, CA", listings[4].location)
|
151
|
+
assert_equal("Nov-23", listings[4].date)
|
152
|
+
end
|
153
|
+
|
154
|
+
private
|
155
|
+
|
156
|
+
# Make sure the query file exists
|
157
|
+
#
|
158
|
+
# @param path [String, #read] the path to the input file
|
159
|
+
# @returns [String] the data from the input file
|
160
|
+
def check_query(path)
|
161
|
+
|
162
|
+
# Check to make sure the query exists and is readable
|
163
|
+
if !File.exists?(path) || !File.readable?(path)
|
164
|
+
|
165
|
+
raise ArgumentError.new "Invalid Query: \"#{path}\""
|
166
|
+
end
|
167
|
+
|
168
|
+
# Open the file and read the contents
|
169
|
+
File.open(path, "r").read
|
170
|
+
end
|
171
171
|
|
172
172
|
end
|