myuniversaljobsmatch 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 837c02494a5a6a08e58fea53557868f787c6f224
4
- data.tar.gz: c1276943fb43a93ba5fa08d031d34e5656734d4e
2
+ SHA256:
3
+ metadata.gz: 526c9daa5c4db413642d12ab424cabcb7ed88efd3c3fbde667380fe371ed57d2
4
+ data.tar.gz: 50c8a05605223a4b34ac3ea8f3d5be74b1e75a882e91238d682960ecd4cfaac8
5
5
  SHA512:
6
- metadata.gz: f689e903a43f8f50ae6b166c722a289e1b8f75866e3df996a9d4539c58ef3997294c3606bfaf2ffb4763b73a98c0d2d29c80a6b3e827af11c8b033df3737b905
7
- data.tar.gz: 38297b8114aab9237578449a56175d66cdda4178373dea0cf0ea568069871eb0d70e35e9b1536857c6b79cafdd9c2d85dcb6e3b09465cc3609123303f3aa3a58
6
+ metadata.gz: ff01c7f3c147f4ce0d528b04dc2c951df7b3426515e8e97a64225d0c579fbe8969f976f996fe9f3aabd90826e859a3fb7d70117fff8c9caf1dba0ea04f227892
7
+ data.tar.gz: 6a78bfac7569f00f0dff35696e56c73ef95611a75abb126abf666dfc8acdb3f5567d9999ab5f7b612dd5896432857d4f97d6d436589af787c4361f6eeafff28a
Binary file
data.tar.gz.sig CHANGED
Binary file
@@ -1,16 +1,31 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ # file: myuniversaljobsmatch.rb
4
+
5
+ require 'chronic'
3
6
  require 'nokorexi'
4
7
  require 'dynarex'
5
8
 
6
- # file: myuniversaljobsmatch.rb
9
+ module StringToDate
10
+
11
+ refine String do
12
+
13
+ def to_date()
14
+ Chronic.parse(self, :endian_precedence => :little).to_date
15
+ end
16
+
17
+ end
18
+
19
+ end
7
20
 
8
21
  class MyUniversalJobsMatch
22
+ using StringToDate
9
23
 
10
24
  def initialize(filepath: '')
11
25
 
12
26
  @filepath = filepath
13
- @url_base = 'https://jobsearch.direct.gov.uk/'
27
+
28
+ @url_base = 'https://findajob.dwp.gov.uk/'
14
29
 
15
30
  @dx = Dynarex.new 'ujm[title,tags]/item(job_id, title, ' + \
16
31
  'description, posting_date, company, location, industries, job_type)'
@@ -20,51 +35,76 @@ class MyUniversalJobsMatch
20
35
  def dynarex()
21
36
  @dx
22
37
  end
23
-
24
- def search(title: '', where: '')
25
-
26
- url = @url_base + "jobsearch/powersearch.aspx?qt=2&rad=20&tm=-1&" + \
27
- "where=#{where}&tjt=#{title}"
28
- doc = Nokorexi.new(url).to_doc
38
+
39
+ # options:
40
+ # results_per_page: 10, 25, 50
41
+ # sort_by: date, highest_salary, lowest_salary
42
+ # hours: any, full_time, part_time
43
+ # contract_type: any, permanent, temporary, contract, apprenticeship
44
+
45
+ def search(title: '', where: '', results_per_page: nil, sort_by: nil,
46
+ hours: nil, contract_type: nil)
29
47
 
30
- table = doc.root.at_css('.JSresults')
31
-
32
- a = table.xpath('tr/td').map do |row|
33
-
34
- # get the date
35
- #date = row.element('td/span/text()')
36
- date = row.element('div/div[2]/span/text()')
37
-
38
- #link = row.element('td[3]/a')
39
- link = row.element('div[3]/div[2]/a')
40
-
41
- url = link.attributes[:href]
42
- jobid = url[/JobID=(\d+)/,1]
43
- title = link.text
44
-
45
- #company = row.element('td[4]/span/text()')
46
- company = row.element('div[4]/div[2]/span/text()')
47
- #location = row.element('td[5]/span/text()')
48
- location = row.element('div[5]/div[2]/span/text()')
49
-
50
- [jobid, date, title, url, company, location]
48
+ params = {
49
+ adv: 1,
50
+ q: title,
51
+ w: where
52
+ }
53
+
54
+ params[:pp] = results_per_page if results_per_page
55
+ params[:cty] = contract_type if contract_type
56
+ params[:cti] = hours if hours
57
+
58
+ case sort_by.to_sym
59
+ when 'date'
60
+ params[:sb] = :date
61
+ params[:sd] = :down
62
+ when :highest_salary
63
+ params[:sb] = :salary
64
+ params[:sd] = :down
65
+ when :lowest_salary
66
+ params[:sb] = :salary
67
+ params[:sd] = :up
68
+ end
69
+
70
+ url = @url_base + 'search/?' + params.map {|x| x.join('=') }.join('&')
71
+ doc = Nokorexi.new(url).to_doc
72
+
73
+ rows = doc.root.xpath('//div[@class="search-result"]')
74
+
75
+ a = rows.map do |row|
76
+
77
+ items = row.xpath('ul/li')
78
+ joburl = row.element('h3/a/@href').to_s
79
+ jobtitle = row.element('h3/a/text()')
80
+ jobid = joburl[/\d+$/]
81
+ jobref = title[/^\d+/].to_s
82
+
83
+ date = items[0].text.to_date
84
+ company = items[1].text('strong')
85
+ location = items[1].element('span/text()')
86
+ salary = items[2].text('strong') if items[2]
87
+ desc = row.text('p').strip
88
+
89
+ [jobid, jobref, date, jobtitle, joburl, company, location,
90
+ salary, desc]
51
91
 
52
92
  end
53
93
 
54
94
 
55
95
  dx = Dynarex.new('vacancies[title, desc, date, time, tags, xslt]/' + \
56
- 'vacancy(job_id, date, title, url, company, location, created_at)')
96
+ 'vacancy(job_id, job_ref, date, title, url, company, location, salary, desc)')
57
97
 
58
- dx.title = "Universal Jobmatch jobs - Search results for '#{title}'"
59
- dx.desc = "generated from web scrape of jobsearch." + \
60
- "direct.gov.uk; source: " + url
98
+ dx.title = "Find a job - Search results for '#{title}'"
99
+ dx.desc = "generated from web scrape of Find a job." + \
100
+ "findajob.dwp.gov.uk/; source: " + url
61
101
  dx.tags = 'jobs vacancies jobmatch ' + title.split.first
62
- dx.date = Time.now.strftime("%Y-%b-%s")
102
+ dx.date = Time.now.strftime("%Y-%b-%d")
63
103
  dx.time = Time.now.strftime("%H:%M")
64
104
 
65
105
  a.each do |row|
66
- dx.create Hash[%i(job_id date title url company location created_at).\
67
- zip(row)]
106
+ dx.create Hash[(%i(job_id job_ref date title url company) + \
107
+ %i(location salary desc)).zip(row)]
68
108
  end
69
109
 
70
110
  return dx
@@ -72,29 +112,28 @@ class MyUniversalJobsMatch
72
112
 
73
113
  def query(id)
74
114
 
75
- url = @url_base + 'GetJob.aspx?JobID=' + id
76
- buffer = RXFHelper.read(url).first
77
-
78
- doc = Nokorexi.new(buffer.gsub(/<br\s*\/?>/i,"\n")).to_doc
79
- content = doc.root.at_css '.jobViewContent'
80
- title = content.element('h2[2]/text()')
81
- description = content.element('div')
82
-
83
- a = doc.root.at_css('.jobViewSummary').xpath('dl/*')
115
+ doc = Nokorexi.new(@url_base + 'details/' + id).to_doc
84
116
 
85
- a2 = []
86
- while a.length > 0 do
87
-
88
- if a.first.name == 'dt' then
89
- a2 << [a.shift.text, '']
90
- elsif
91
- a2[-1][-1] << a.shift.text + "\n"
92
- end
93
- end
117
+ title = doc.root.text('head/title')
118
+
119
+ rows = doc.root.xpath('//table[1]/tbody/tr')
94
120
 
95
- a2.concat [['title', title], ['description', description.content]]
121
+ h = rows.map do |tr|
122
+
123
+ [
124
+ tr.text('th').downcase.rstrip[0..-2].gsub(/ +/,'_').to_sym,
125
+ tr.text('td').to_s
126
+ ]
127
+
128
+ end.to_h
96
129
 
97
- Hash[a2.map{|k,v| [k.downcase.gsub(' ','_').to_sym,v]}]
130
+ h[:description] = doc.root.element('//div[@itemprop="description"]').xml\
131
+ .gsub(/<br *\/> */,"\n").gsub(/<\/?[^\>]+\/?>/,'').strip
132
+
133
+ h[:posting_date] = h[:posting_date].to_date
134
+ h[:closing_date] = h[:closing_date].to_date
135
+
136
+ {title: title}.merge(h)
98
137
 
99
138
  end
100
139
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: myuniversaljobsmatch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -10,68 +10,93 @@ bindir: bin
10
10
  cert_chain:
11
11
  - |
12
12
  -----BEGIN CERTIFICATE-----
13
- MIIDXjCCAkagAwIBAgIBATANBgkqhkiG9w0BAQUFADAsMSowKAYDVQQDDCFnZW1t
14
- YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMTgwMzE0MTYwNDU5WhcN
15
- MTkwMzE0MTYwNDU5WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
- cnRzb24vREM9ZXUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCwvxO6
17
- /Q9Y1cYxWKKg4PMVCbguM5aDUfvyCr5w20V7tqAsp5MF7El3y/8KeOQlsh7J9o/T
18
- SshCULIy/tSthsvdA97s0+AAkzt1emEfNIqR85wx+Hrrq4D+AQ9q/GlT8V3AFJKI
19
- AiZIjM1ESakVJA5waXrAYh2A8Ayp3UJwUb8hv62kmrmnIktR0b0OyAeKEUaI3ghb
20
- N1iHrWv3QjGmyARTvMhCq0pxo1y5LdHPpq0VSNdopjLSlhsj//1tZ/iaqwhv1TBC
21
- 67ZvGS4+XnCEqPusgMwIdw5fLtKSd/i/sx5KraQZUcnZAqxWlrGKUTCaYH/A9lM4
22
- 5WY4C1vEmtUvlxWBAgMBAAGjgYowgYcwCQYDVR0TBAIwADALBgNVHQ8EBAMCBLAw
23
- HQYDVR0OBBYEFAopjAg4lTdn3ImcdT1OW4cNyOX4MCYGA1UdEQQfMB2BG2dlbW1h
24
- c3RlckBqYW1lc3JvYmVydHNvbi5ldTAmBgNVHRIEHzAdgRtnZW1tYXN0ZXJAamFt
25
- ZXNyb2JlcnRzb24uZXUwDQYJKoZIhvcNAQEFBQADggEBAIVf3GQn8WYBPII/v6Iw
26
- cQflYsQ7wVL+UPY7dDjJur7sPr0u1DZaoniCu2dIAX32XNkYKxHbwOmn/bDP/oi/
27
- JtwEKxpTmaAk8SMnr1vxK6E0XtKsJbcYpG38Guql3tv2N5AnGUsab6iqXy2nNZ9X
28
- lav0CKlK3EoLsO2GaWOvkIAGud/PcvBadlCcAha+UL5Uh2E70BWgK+6CRj5+DvR7
29
- cmxlnHavH4ed7VTk0JkPmqTwxJnaKaztnY+UbUfTbRtUHkd46azAuipUuNecMn3y
30
- UU8JFnL4NzGxTWzYwPOUlB4AT+rPKOw1qGeuy62M7UrpTKGgeNSvA1NPFgkOtWLO
31
- DMA=
13
+ MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjAwMTE5MTYxMzA0WhcN
15
+ MjEwMTE4MTYxMzA0WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQCmy6lP
17
+ Cp884sO5pUZxsR9L5pJ4U3GUKmqgEG5AupSMe6HSm9pZi+f0cF+fNMF53Ac27elI
18
+ +twgMKKIrWc8HXiy3K+9tMSTzU0/uJGK7Y2iIZ1QHUMNy9S4VdglohHnF21IGFtj
19
+ j3Nq+xLCEvxcZoUjbgk563GZKgd5XJYwzyb4q+erGrlasaQ9A+ZakAWT9vCXnaGO
20
+ wSe8Wx7KrvKk3UCvtH0tXDt8T8pwzN38qKPU1lpdDZQi63jqh8+qW2Lyu1KCWrDf
21
+ rj7ef8JC4WBbUbPABvFefjGmLMMYueKpOjCxMWeKFZz5Q83KcJMewkY/8sGLplyB
22
+ f+yKLPa5Mi8NnbWcevxmyss+/1z/W/dW+Dl5/3d/c1xXfi7nAjLfjGF8qQ9E82DM
23
+ 7Z3mp5gSGCxQBzNg50+ytE0hIzg2Hh+HkjMm/wt4nm7m4rmdY2FPvY1h7VKEbQtN
24
+ PUHF5MrVBIC9HCFVaIxpcO7ObK47PytfH3CfhPLHEd5ZU8Wo/WyYHiO+CQECAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU+mA09dUS
26
+ 2VF8+toUk7mApgYy+lAwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
+ c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
+ BgkqhkiG9w0BAQsFAAOCAYEADZRsfL/QTHTM1VPNIJcgGFULZD7SaxAwcS25TOXs
29
+ fwQI62ZGWyoJIhSgmOKFJh+04pzy5s1SygS6GTTfbFV3ES3TAorH4HabUF27CF9i
30
+ 7sCjvT4w/DZC1ue6P2qLdRhhXoqW+2ssGKvAB1VfIEQMA0LQU0IEUN5c+wG3r//J
31
+ g/nBG0lJna47dC6+MQo3tVpBOdxH7pMtTIvAxXUjjJdB9xrvzs/4NZ+k7nGb3uNF
32
+ NLXCpLx5+93WdJMAuyv88wvGChod/PWWDgzZP+/Zms7vvoTpmjMBm2xaRrgZFr+P
33
+ NFCMdBli9Mc0iv9lL5V3C+F0gqwmSs0tIFCr67XxPPbA27uReKc8XHDaVOaU8zJ9
34
+ 3L7/hZqxmix8fqzeNWxLIT9itNGNgiqUyeEwHgKJTj9B3aXs2RVaFIfRNRn4BA5d
35
+ LFRA1HdKZrhZiH0YWMH+NWhB4jh0blOUN/hE0m8hL/vNySv6AUD63CDSgaYE2A4C
36
+ L6XR3+hIK/fqVrE8fRag8DUp
32
37
  -----END CERTIFICATE-----
33
- date: 2018-03-14 00:00:00.000000000 Z
38
+ date: 2020-01-19 00:00:00.000000000 Z
34
39
  dependencies:
35
40
  - !ruby/object:Gem::Dependency
36
- name: dynarex
41
+ name: chronic
37
42
  requirement: !ruby/object:Gem::Requirement
38
43
  requirements:
39
44
  - - "~>"
40
45
  - !ruby/object:Gem::Version
41
- version: '1.7'
46
+ version: '0.10'
42
47
  - - ">="
43
48
  - !ruby/object:Gem::Version
44
- version: 1.7.30
49
+ version: 0.10.2
45
50
  type: :runtime
46
51
  prerelease: false
47
52
  version_requirements: !ruby/object:Gem::Requirement
48
53
  requirements:
49
54
  - - "~>"
50
55
  - !ruby/object:Gem::Version
51
- version: '1.7'
56
+ version: '0.10'
52
57
  - - ">="
53
58
  - !ruby/object:Gem::Version
54
- version: 1.7.30
59
+ version: 0.10.2
55
60
  - !ruby/object:Gem::Dependency
56
- name: nokorexi
61
+ name: dynarex
57
62
  requirement: !ruby/object:Gem::Requirement
58
63
  requirements:
59
64
  - - "~>"
60
65
  - !ruby/object:Gem::Version
61
- version: '0.3'
66
+ version: '1.8'
62
67
  - - ">="
63
68
  - !ruby/object:Gem::Version
64
- version: 0.3.2
69
+ version: 1.8.21
65
70
  type: :runtime
66
71
  prerelease: false
67
72
  version_requirements: !ruby/object:Gem::Requirement
68
73
  requirements:
69
74
  - - "~>"
70
75
  - !ruby/object:Gem::Version
71
- version: '0.3'
76
+ version: '1.8'
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: 1.8.21
80
+ - !ruby/object:Gem::Dependency
81
+ name: nokorexi
82
+ requirement: !ruby/object:Gem::Requirement
83
+ requirements:
72
84
  - - ">="
73
85
  - !ruby/object:Gem::Version
74
- version: 0.3.2
86
+ version: 0.5.0
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.5'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: 0.5.0
97
+ - - "~>"
98
+ - !ruby/object:Gem::Version
99
+ version: '0.5'
75
100
  description:
76
101
  email: james@jamesrobertson.eu
77
102
  executables: []
@@ -98,9 +123,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
98
123
  - !ruby/object:Gem::Version
99
124
  version: '0'
100
125
  requirements: []
101
- rubyforge_project:
102
- rubygems_version: 2.6.13
126
+ rubygems_version: 3.0.3
103
127
  signing_key:
104
128
  specification_version: 4
105
- summary: A web scraper which searches for job adverts on jobsearch.direct.gov.uk
129
+ summary: A web scraper which searches for job adverts on findajob.dwp.gov.uk
106
130
  test_files: []
metadata.gz.sig CHANGED
Binary file