scrapin-a-livin 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,71 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'open-uri'
4
+ require 'hpricot'
5
+
6
+ # We want to be able to easily pull and update set of listings
7
+ # from the dice website. This script formats the request for us
8
+ # to make things a little easier.
9
+ ENTRIES = 5
10
+ DAYS_BACK = 30
11
+ SEARCH_KEYWORDS = "Software"
12
+
13
+ # Format the URL to retrieve the current listings from
14
+ # the Dice website
15
+ #
16
+ # http://seeker.dice.com
17
+ # @param where [String, #read] the job location
18
+ # @returns [String] the url request for dice.com
19
+ def get_url(where)
20
+ "http://seeker.dice.com/jobsearch/servlet/JobSearch" +
21
+ "?caller=0&LOCATION_OPTION=2&EXTRA_STUFF=1&N=0&Hf=0" +
22
+ "&Ntk=JobSearchRanking&op=300" +
23
+ "&values=&FREE_TEXT=#{SEARCH_KEYWORDS}" +
24
+ "&Ntx=mode+matchall&WHERE=#{where}" +
25
+ "&WHEREList=#{where}" +
26
+ "&RADIUS=80.4672" +
27
+ "&COUNTRY=1525&STAT_PROV=0&METRO_AREA=33.78715899%2C-84.39164034&AREA_CODES=&AC_COUNTRY=1525" +
28
+ "&TRAVEL=0&TAXTERM=1001" +
29
+ "&SORTSPEC=0" +
30
+ "&FRMT=0" +
31
+ "&DAYSBACK=#{DAYS_BACK}" +
32
+ "&NUM_PER_PAGE=#{ENTRIES}"
33
+ end
34
+
35
+ # Write the retrieved data to the file at the specified path.
36
+ #
37
+ # @param filepath [String, #read] the path to the output file
38
+ # @param url [String, #read] the url to use for the request
39
+ def write_query(filepath, url)
40
+
41
+ begin
42
+ # Open the file for writing
43
+ file = File.new(filepath, "w")
44
+
45
+ # Query the page
46
+ response = open(url, "User-Agent" => "Ruby/#{RUBY_VERSION}",
47
+ "From" => "email@addr.com",
48
+ "Referer" => "http://seeker.dice.com/jobsearch/").read
49
+
50
+ # Write the data to the file
51
+ file.write(response)
52
+
53
+ ensure
54
+
55
+ # Make sure the file is closed
56
+ file.close
57
+ end
58
+
59
+ end
60
+
61
+ # Query for San Jose
62
+ url = get_url("San+Jose+CA")
63
+ write_query('./querySanJose.html', url)
64
+
65
+ # Query for San Jose
66
+ url = get_url("Raleigh+NC")
67
+ write_query('./queryRaleigh.html', url)
68
+
69
+ # Query for San Jose
70
+ url = get_url("Austin+TX")
71
+ write_query('./queryAustin.html', url)
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'open-uri'
4
+ require 'hpricot'
5
+
6
+ # We want to be able to easily pull and update set of listings
7
+ # from the yahoo website. This script formats the request for us
8
+ # to make things a little easier.
9
+ DAYS_BACK = "month"
10
+ SEARCH_KEYWORDS = "Software"
11
+
12
+ # Format the URL to retrieve the current listings from
13
+ # the Yahoo Hotjobs website
14
+ #
15
+ # http://hotjobs.yahoo.com
16
+ # @param where [String, #read] the job location
17
+ # @returns [String] the url request for dice.com
18
+ def get_url(where)
19
+
20
+ "http://hotjobs.yahoo.com/job-search?" +
21
+ "src=advsearch&pageOp=search&ts=1259353986&" +
22
+ "kw_search_type=kwany&kw=#{SEARCH_KEYWORDS}&kw_none=&" +
23
+ "locations=#{where}&country=&locations=&locations=&" +
24
+ "industry=&industry=&industry=&" +
25
+ "updated_since=#{DAYS_BACK}&" +
26
+ "exp_level=&experience_level=&" +
27
+ "education=&salary[min]=&salary[type]=yearly&" +
28
+ "commitment=FT&commitment=PT&jobtype=PERM&jobtype=CONT&" +
29
+ "travel_amount=&company=&" +
30
+ "source=&email_format=html&email_frequency=1&email_enabled=0&search_jobs=Search+Jobs"
31
+ end
32
+
33
+ # Write the retrieved data to the file at the specified path.
34
+ #
35
+ # @param filepath [String, #read] the path to the output file
36
+ # @param url [String, #read] the url to use for the request
37
+ def write_query(filepath, url)
38
+
39
+ begin
40
+ # Open the file for writing
41
+ file = File.new(filepath, "w")
42
+
43
+ # Query the page
44
+ response = open(url, "User-Agent" => "Ruby/#{RUBY_VERSION}",
45
+ "From" => "email@addr.com",
46
+ "Referer" => "http://hotjobs.yahoo.com/").read
47
+
48
+ # Write the data to the file
49
+ file.write(response)
50
+
51
+ ensure
52
+
53
+ # Make sure the file is closed
54
+ file.close
55
+ end
56
+
57
+ end
58
+
59
+ # Query for San Jose
60
+ url = get_url("San+Jose+CA")
61
+ write_query('./querySanJose.html', url)
62
+
63
+
64
+ # Query for San Jose
65
+ url = get_url("Raleigh+NC")
66
+ write_query('./queryRaleigh.html', url)
67
+
68
+ # Query for San Jose
69
+ url = get_url("Austin+TX")
70
+ write_query('./queryAustin.html', url)
@@ -0,0 +1,8 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+ # Test Case helper file
5
+
6
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
7
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
8
+ require 'scrapin-a-livin'
@@ -0,0 +1,168 @@
1
+ require 'test_helper'
2
+ require 'ftools'
3
+
4
+ # Test Case for the yahoo hotjobs listing parser Dice
5
+ class TC_HotjobsSearchParser < Test::Unit::TestCase
6
+
7
+ RALEIGH_QUERY = File.dirname(__FILE__) + '/queries/hotjobs/queryRaleigh.html'
8
+ AUSTIN_QUERY = File.dirname(__FILE__) + '/queries/hotjobs/queryAustin.html'
9
+ SANJOSE_QUERY = File.dirname(__FILE__) + '/queries/hotjobs/querySanJose.html'
10
+
11
+ # Test for the listing header
12
+ def test_raleigh
13
+
14
+ # Read the query data from the file
15
+ data = check_query(RALEIGH_QUERY)
16
+
17
+ # Parse the listings from the query
18
+ listings = HotjobsSearch.parse_listings(data)
19
+
20
+ # Go through each listing
21
+ assert_equal("Embedded Software Engineer", listings[0].title)
22
+ assert_equal("http://hotjobs.yahoo.com/job-JNEJ91O9CZA?source=SRP", listings[0].link)
23
+ assert_equal("Sirit Technology", listings[0].company)
24
+ assert_equal("http://hotjobs.yahoo.com/careers-786030-Sirit_Technology", listings[0].company_link)
25
+ assert_equal("Morrisville, NC", listings[0].location)
26
+ assert_equal("Oct 28", listings[0].date)
27
+
28
+ assert_equal("MES Java Software Engineer", listings[1].title)
29
+ assert_equal("http://hotjobs.yahoo.com/job-JDZWR4DRM00?source=SRP", listings[1].link)
30
+ assert_equal("Werum Software & Systems America", listings[1].company)
31
+ assert_equal("http://hotjobs.yahoo.com/careers-616438-Werum_Software___Systems_America", listings[1].company_link)
32
+ assert_equal("Cary, NC", listings[1].location)
33
+ assert_equal("Sep 29", listings[1].date)
34
+
35
+ assert_equal("Software Developer", listings[2].title)
36
+ assert_equal("http://hotjobs.yahoo.com/job-JQGSVB1Y99F?source=SRP", listings[2].link)
37
+ assert_equal("CTG", listings[2].company)
38
+ assert_equal("http://hotjobs.yahoo.com/careers-601569-CTG", listings[2].company_link)
39
+ assert_equal("Durham, NC", listings[2].location)
40
+ assert_equal("Oct 30", listings[2].date)
41
+
42
+ assert_equal("Software Engineer - Windows Installer Developer, MSI, WiX, WiM", listings[3].title)
43
+ assert_equal("http://hotjobs.yahoo.com/job-J9WH2JP6WO7?source=SRP", listings[3].link)
44
+ assert_equal("CyberCoders", listings[3].company)
45
+ assert_equal("http://hotjobs.yahoo.com/careers-577525-CyberCoders", listings[3].company_link)
46
+ assert_equal("Raleigh, NC", listings[3].location)
47
+ assert_equal("Nov 27", listings[3].date)
48
+
49
+ assert_equal("C# Developer, Software Developer, SAAS, ASP.Net, SQL Server, SQL, .Net, WCF, JavaScript", listings[4].title)
50
+ assert_equal("http://hotjobs.yahoo.com/job-J5PKCPM1QM2?source=SRP", listings[4].link)
51
+ assert_equal("CyberCoders", listings[4].company)
52
+ assert_equal("http://hotjobs.yahoo.com/careers-577525-CyberCoders", listings[4].company_link)
53
+ assert_equal("Raleigh, NC", listings[4].location)
54
+ assert_equal("Nov 27", listings[4].date)
55
+ end
56
+
57
+ # Test for parsing a listing
58
+ def test_austin
59
+
60
+ # Read the query data
61
+ data = check_query(AUSTIN_QUERY)
62
+
63
+ # Parse the listings from the query
64
+ listings = HotjobsSearch.parse_listings(data)
65
+
66
+ # Go through each listing
67
+ assert_equal("Software Program Manager", listings[0].title)
68
+ assert_equal("http://hotjobs.yahoo.com/job-JSFNSIFFNCA?source=SRP", listings[0].link)
69
+ assert_equal("Zebra Imaging", listings[0].company)
70
+ assert_equal("http://hotjobs.yahoo.com/careers-627739-Zebra_Imaging", listings[0].company_link)
71
+ assert_equal("Austin, TX", listings[0].location)
72
+ assert_equal("Nov 19", listings[0].date)
73
+
74
+ assert_equal("Client Project Manager - Software", listings[1].title)
75
+ assert_equal("http://hotjobs.yahoo.com/job-JM1F32JSA7R?source=SRP", listings[1].link)
76
+ assert_equal("Digital Cheetah Solutions, Inc.", listings[1].company)
77
+ assert_equal("http://hotjobs.yahoo.com/careers-577804-Digital_Cheetah_Solutions__Inc_", listings[1].company_link)
78
+ assert_equal("Austin, TX", listings[1].location)
79
+ assert_equal("Nov 08", listings[1].date)
80
+
81
+ assert_equal("Software Quality Assurance Lead", listings[2].title)
82
+ assert_equal("http://hotjobs.yahoo.com/job-JL5SOECPQKF?source=SRP", listings[2].link)
83
+ assert_equal("Zebra Imaging", listings[2].company)
84
+ assert_equal("http://hotjobs.yahoo.com/careers-627739-Zebra_Imaging", listings[2].company_link)
85
+ assert_equal("Austin, TX", listings[2].location)
86
+ assert_equal("Oct 06", listings[2].date)
87
+
88
+ assert_equal("Operating System Software Test Specialist", listings[3].title)
89
+ assert_equal("http://hotjobs.yahoo.com/job-J3KBF8EADAO?source=SRP", listings[3].link)
90
+ assert_equal("CTG", listings[3].company)
91
+ assert_equal("http://hotjobs.yahoo.com/careers-601569-CTG", listings[3].company_link)
92
+ assert_equal("Austin, TX", listings[3].location)
93
+ assert_equal("Nov 12", listings[3].date)
94
+
95
+ assert_equal("Software Engineer", listings[4].title)
96
+ assert_equal("http://hotjobs.yahoo.com/job-JTXD1K5GAZ2?source=SRP", listings[4].link)
97
+ assert_equal("Troux Technologies", listings[4].company)
98
+ assert_equal("http://hotjobs.yahoo.com/careers-788124-Troux_Technologies", listings[4].company_link)
99
+ assert_equal("Austin, TX", listings[4].location)
100
+ assert_equal("Nov 05", listings[4].date)
101
+
102
+ end
103
+
104
+ # Test for parsing a listing
105
+ def test_sanjose
106
+
107
+ # Read the query data
108
+ data = check_query(SANJOSE_QUERY)
109
+
110
+ # Parse the listings from the query
111
+ listings = HotjobsSearch.parse_listings(data)
112
+
113
+ # Go through each listing
114
+ assert_equal("Software Engineer/Carrier CE", listings[0].title)
115
+ assert_equal("http://hotjobs.yahoo.com/job-J2VW8Y7GMAH?source=SRP", listings[0].link)
116
+ assert_equal("Atheros Communications, Inc.", listings[0].company)
117
+ assert_equal("http://hotjobs.yahoo.com/careers-552561-Atheros_Communications__Inc_", listings[0].company_link)
118
+ assert_equal("Santa Clara, CA", listings[0].location)
119
+ assert_equal("Nov 12", listings[0].date)
120
+
121
+ assert_equal("EMBEDDED SOFTWARE ENGINEER", listings[1].title)
122
+ assert_equal("http://hotjobs.yahoo.com/job-JD285WTGJLP?source=SRP", listings[1].link)
123
+ assert_equal("NVIDIA Corporation", listings[1].company)
124
+ assert_equal("http://hotjobs.yahoo.com/careers-601859-NVIDIA_Corporation", listings[1].company_link)
125
+ assert_equal("Santa Clara, CA", listings[1].location)
126
+ assert_equal("Oct 23", listings[1].date)
127
+
128
+ assert_equal("EMBEDDED SOFTWARE ENGINEER", listings[2].title)
129
+ assert_equal("http://hotjobs.yahoo.com/job-J9LJP1J38EB?source=SRP", listings[2].link)
130
+ assert_equal("NVIDIA Corporation", listings[2].company)
131
+ assert_equal("http://hotjobs.yahoo.com/careers-601859-NVIDIA_Corporation", listings[2].company_link)
132
+ assert_equal("Santa Clara, CA", listings[2].location)
133
+ assert_equal("Oct 23", listings[2].date)
134
+
135
+ assert_equal("Platform Bringup Software Engineer", listings[3].title)
136
+ assert_equal("http://hotjobs.yahoo.com/job-J6YYFVVFK7P?source=SRP", listings[3].link)
137
+ assert_equal("Aruba Networks", listings[3].company)
138
+ assert_equal("http://hotjobs.yahoo.com/careers-562030-Aruba_Networks", listings[3].company_link)
139
+ assert_equal("Sunnyvale, CA", listings[3].location)
140
+ assert_equal("Nov 14", listings[3].date)
141
+
142
+ assert_equal("Staff Software Development Engineer", listings[4].title)
143
+ assert_equal("http://hotjobs.yahoo.com/job-JDG4ZD5RP59?source=SRP", listings[4].link)
144
+ assert_equal("IDT", listings[4].company)
145
+ assert_equal("http://hotjobs.yahoo.com/careers-577851-IDT", listings[4].company_link)
146
+ assert_equal("San Jose, CA", listings[4].location)
147
+ assert_equal("Nov 19", listings[4].date)
148
+ end
149
+
150
+ private
151
+
152
+ # Make sure the query file exists
153
+ #
154
+ # @param path [String, #read] the path to the input file
155
+ # @returns [String] the data from the input file
156
+ def check_query(path)
157
+
158
+ # Check to make sure the query exists and is readable
159
+ if !File.exists?(path) || !File.readable?(path)
160
+
161
+ raise ArgumentError.new "Invalid Query: \"#{path}\""
162
+ end
163
+
164
+ # Open the file and read the contents
165
+ File.open(path, "r").read
166
+ end
167
+
168
+ end
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scrapin-a-livin
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kevin S Kirkup
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-28 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0.6"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: thoughtbot-shoulda
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ description: |
36
+ Are you in the technology industry?
37
+ The you have probably lost a job during your career.
38
+ This script helps you scrape the most common job sites for your search criteria,
39
+ and save the lists on you local computer.
40
+
41
+ You can then use other tools to display updates, send out resumes or search your LinkedIn account
42
+ to see who you may know at that company.
43
+
44
+ Please feel free to contribute make. All suggestions are welcome.
45
+ Hopefully this will help you find the career you are looking for.
46
+
47
+ email: kevin.kirkup@gmail.com
48
+ executables: []
49
+
50
+ extensions: []
51
+
52
+ extra_rdoc_files:
53
+ - LICENSE
54
+ - README.markdown
55
+ files:
56
+ - .document
57
+ - .gitignore
58
+ - LICENSE
59
+ - README.markdown
60
+ - Rakefile
61
+ - VERSION
62
+ - lib/dice/dice_search.rb
63
+ - lib/generic/listing.rb
64
+ - lib/scrapin-a-livin.rb
65
+ - lib/yahoo/hotjobs.rb
66
+ - test/dice_parser_test.rb
67
+ - test/queries/dice/queryAustin.html
68
+ - test/queries/dice/queryRaleigh.html
69
+ - test/queries/dice/querySanJose.html
70
+ - test/queries/hotjobs/queryAustin.html
71
+ - test/queries/hotjobs/queryRaleigh.html
72
+ - test/queries/hotjobs/querySanJose.html
73
+ - test/scripts/diceDump.rb
74
+ - test/scripts/hotjobsDump.rb
75
+ - test/test_helper.rb
76
+ - test/yahoo_parser_test.rb
77
+ has_rdoc: true
78
+ homepage: http://github.com/angrytuna/scrapin-a-livin
79
+ licenses: []
80
+
81
+ post_install_message:
82
+ rdoc_options:
83
+ - --charset=UTF-8
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: "0"
91
+ version:
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: "0"
97
+ version:
98
+ requirements: []
99
+
100
+ rubyforge_project:
101
+ rubygems_version: 1.3.5
102
+ signing_key:
103
+ specification_version: 3
104
+ summary: Scrape to find a job
105
+ test_files:
106
+ - test/scripts/diceDump.rb
107
+ - test/scripts/hotjobsDump.rb
108
+ - test/yahoo_parser_test.rb
109
+ - test/dice_parser_test.rb
110
+ - test/test_helper.rb