myuniversaljobsmatch 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 837c02494a5a6a08e58fea53557868f787c6f224
4
- data.tar.gz: c1276943fb43a93ba5fa08d031d34e5656734d4e
2
+ SHA256:
3
+ metadata.gz: 526c9daa5c4db413642d12ab424cabcb7ed88efd3c3fbde667380fe371ed57d2
4
+ data.tar.gz: 50c8a05605223a4b34ac3ea8f3d5be74b1e75a882e91238d682960ecd4cfaac8
5
5
  SHA512:
6
- metadata.gz: f689e903a43f8f50ae6b166c722a289e1b8f75866e3df996a9d4539c58ef3997294c3606bfaf2ffb4763b73a98c0d2d29c80a6b3e827af11c8b033df3737b905
7
- data.tar.gz: 38297b8114aab9237578449a56175d66cdda4178373dea0cf0ea568069871eb0d70e35e9b1536857c6b79cafdd9c2d85dcb6e3b09465cc3609123303f3aa3a58
6
+ metadata.gz: ff01c7f3c147f4ce0d528b04dc2c951df7b3426515e8e97a64225d0c579fbe8969f976f996fe9f3aabd90826e859a3fb7d70117fff8c9caf1dba0ea04f227892
7
+ data.tar.gz: 6a78bfac7569f00f0dff35696e56c73ef95611a75abb126abf666dfc8acdb3f5567d9999ab5f7b612dd5896432857d4f97d6d436589af787c4361f6eeafff28a
Binary file
data.tar.gz.sig CHANGED
Binary file
@@ -1,16 +1,31 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ # file: myuniversaljobsmatch.rb
4
+
5
+ require 'chronic'
3
6
  require 'nokorexi'
4
7
  require 'dynarex'
5
8
 
6
- # file: myuniversaljobsmatch.rb
9
+ module StringToDate
10
+
11
+ refine String do
12
+
13
+ def to_date()
14
+ Chronic.parse(self, :endian_precedence => :little).to_date
15
+ end
16
+
17
+ end
18
+
19
+ end
7
20
 
8
21
  class MyUniversalJobsMatch
22
+ using StringToDate
9
23
 
10
24
  def initialize(filepath: '')
11
25
 
12
26
  @filepath = filepath
13
- @url_base = 'https://jobsearch.direct.gov.uk/'
27
+
28
+ @url_base = 'https://findajob.dwp.gov.uk/'
14
29
 
15
30
  @dx = Dynarex.new 'ujm[title,tags]/item(job_id, title, ' + \
16
31
  'description, posting_date, company, location, industries, job_type)'
@@ -20,51 +35,76 @@ class MyUniversalJobsMatch
20
35
  def dynarex()
21
36
  @dx
22
37
  end
23
-
24
- def search(title: '', where: '')
25
-
26
- url = @url_base + "jobsearch/powersearch.aspx?qt=2&rad=20&tm=-1&" + \
27
- "where=#{where}&tjt=#{title}"
28
- doc = Nokorexi.new(url).to_doc
38
+
39
+ # options:
40
+ # results_per_page: 10, 25, 50
41
+ # sort_by: date, highest_salary, lowest_salary
42
+ # hours: any, full_time, part_time
43
+ # contract_type: any, permanent, temporary, contract, apprenticeship
44
+
45
+ def search(title: '', where: '', results_per_page: nil, sort_by: nil,
46
+ hours: nil, contract_type: nil)
29
47
 
30
- table = doc.root.at_css('.JSresults')
31
-
32
- a = table.xpath('tr/td').map do |row|
33
-
34
- # get the date
35
- #date = row.element('td/span/text()')
36
- date = row.element('div/div[2]/span/text()')
37
-
38
- #link = row.element('td[3]/a')
39
- link = row.element('div[3]/div[2]/a')
40
-
41
- url = link.attributes[:href]
42
- jobid = url[/JobID=(\d+)/,1]
43
- title = link.text
44
-
45
- #company = row.element('td[4]/span/text()')
46
- company = row.element('div[4]/div[2]/span/text()')
47
- #location = row.element('td[5]/span/text()')
48
- location = row.element('div[5]/div[2]/span/text()')
49
-
50
- [jobid, date, title, url, company, location]
48
+ params = {
49
+ adv: 1,
50
+ q: title,
51
+ w: where
52
+ }
53
+
54
+ params[:pp] = results_per_page if results_per_page
55
+ params[:cty] = contract_type if contract_type
56
+ params[:cti] = hours if hours
57
+
58
+ case sort_by.to_sym
59
+ when 'date'
60
+ params[:sb] = :date
61
+ params[:sd] = :down
62
+ when :highest_salary
63
+ params[:sb] = :salary
64
+ params[:sd] = :down
65
+ when :lowest_salary
66
+ params[:sb] = :salary
67
+ params[:sd] = :up
68
+ end
69
+
70
+ url = @url_base + 'search/?' + params.map {|x| x.join('=') }.join('&')
71
+ doc = Nokorexi.new(url).to_doc
72
+
73
+ rows = doc.root.xpath('//div[@class="search-result"]')
74
+
75
+ a = rows.map do |row|
76
+
77
+ items = row.xpath('ul/li')
78
+ joburl = row.element('h3/a/@href').to_s
79
+ jobtitle = row.element('h3/a/text()')
80
+ jobid = joburl[/\d+$/]
81
+ jobref = title[/^\d+/].to_s
82
+
83
+ date = items[0].text.to_date
84
+ company = items[1].text('strong')
85
+ location = items[1].element('span/text()')
86
+ salary = items[2].text('strong') if items[2]
87
+ desc = row.text('p').strip
88
+
89
+ [jobid, jobref, date, jobtitle, joburl, company, location,
90
+ salary, desc]
51
91
 
52
92
  end
53
93
 
54
94
 
55
95
  dx = Dynarex.new('vacancies[title, desc, date, time, tags, xslt]/' + \
56
- 'vacancy(job_id, date, title, url, company, location, created_at)')
96
+ 'vacancy(job_id, job_ref, date, title, url, company, location, salary, desc)')
57
97
 
58
- dx.title = "Universal Jobmatch jobs - Search results for '#{title}'"
59
- dx.desc = "generated from web scrape of jobsearch." + \
60
- "direct.gov.uk; source: " + url
98
+ dx.title = "Find a job - Search results for '#{title}'"
99
+ dx.desc = "generated from web scrape of Find a job." + \
100
+ "findajob.dwp.gov.uk/; source: " + url
61
101
  dx.tags = 'jobs vacancies jobmatch ' + title.split.first
62
- dx.date = Time.now.strftime("%Y-%b-%s")
102
+ dx.date = Time.now.strftime("%Y-%b-%d")
63
103
  dx.time = Time.now.strftime("%H:%M")
64
104
 
65
105
  a.each do |row|
66
- dx.create Hash[%i(job_id date title url company location created_at).\
67
- zip(row)]
106
+ dx.create Hash[(%i(job_id job_ref date title url company) + \
107
+ %i(location salary desc)).zip(row)]
68
108
  end
69
109
 
70
110
  return dx
@@ -72,29 +112,28 @@ class MyUniversalJobsMatch
72
112
 
73
113
  def query(id)
74
114
 
75
- url = @url_base + 'GetJob.aspx?JobID=' + id
76
- buffer = RXFHelper.read(url).first
77
-
78
- doc = Nokorexi.new(buffer.gsub(/<br\s*\/?>/i,"\n")).to_doc
79
- content = doc.root.at_css '.jobViewContent'
80
- title = content.element('h2[2]/text()')
81
- description = content.element('div')
82
-
83
- a = doc.root.at_css('.jobViewSummary').xpath('dl/*')
115
+ doc = Nokorexi.new(@url_base + 'details/' + id).to_doc
84
116
 
85
- a2 = []
86
- while a.length > 0 do
87
-
88
- if a.first.name == 'dt' then
89
- a2 << [a.shift.text, '']
90
- elsif
91
- a2[-1][-1] << a.shift.text + "\n"
92
- end
93
- end
117
+ title = doc.root.text('head/title')
118
+
119
+ rows = doc.root.xpath('//table[1]/tbody/tr')
94
120
 
95
- a2.concat [['title', title], ['description', description.content]]
121
+ h = rows.map do |tr|
122
+
123
+ [
124
+ tr.text('th').downcase.rstrip[0..-2].gsub(/ +/,'_').to_sym,
125
+ tr.text('td').to_s
126
+ ]
127
+
128
+ end.to_h
96
129
 
97
- Hash[a2.map{|k,v| [k.downcase.gsub(' ','_').to_sym,v]}]
130
+ h[:description] = doc.root.element('//div[@itemprop="description"]').xml\
131
+ .gsub(/<br *\/> */,"\n").gsub(/<\/?[^\>]+\/?>/,'').strip
132
+
133
+ h[:posting_date] = h[:posting_date].to_date
134
+ h[:closing_date] = h[:closing_date].to_date
135
+
136
+ {title: title}.merge(h)
98
137
 
99
138
  end
100
139
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: myuniversaljobsmatch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -10,68 +10,93 @@ bindir: bin
10
10
  cert_chain:
11
11
  - |
12
12
  -----BEGIN CERTIFICATE-----
13
- MIIDXjCCAkagAwIBAgIBATANBgkqhkiG9w0BAQUFADAsMSowKAYDVQQDDCFnZW1t
14
- YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMTgwMzE0MTYwNDU5WhcN
15
- MTkwMzE0MTYwNDU5WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
- cnRzb24vREM9ZXUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCwvxO6
17
- /Q9Y1cYxWKKg4PMVCbguM5aDUfvyCr5w20V7tqAsp5MF7El3y/8KeOQlsh7J9o/T
18
- SshCULIy/tSthsvdA97s0+AAkzt1emEfNIqR85wx+Hrrq4D+AQ9q/GlT8V3AFJKI
19
- AiZIjM1ESakVJA5waXrAYh2A8Ayp3UJwUb8hv62kmrmnIktR0b0OyAeKEUaI3ghb
20
- N1iHrWv3QjGmyARTvMhCq0pxo1y5LdHPpq0VSNdopjLSlhsj//1tZ/iaqwhv1TBC
21
- 67ZvGS4+XnCEqPusgMwIdw5fLtKSd/i/sx5KraQZUcnZAqxWlrGKUTCaYH/A9lM4
22
- 5WY4C1vEmtUvlxWBAgMBAAGjgYowgYcwCQYDVR0TBAIwADALBgNVHQ8EBAMCBLAw
23
- HQYDVR0OBBYEFAopjAg4lTdn3ImcdT1OW4cNyOX4MCYGA1UdEQQfMB2BG2dlbW1h
24
- c3RlckBqYW1lc3JvYmVydHNvbi5ldTAmBgNVHRIEHzAdgRtnZW1tYXN0ZXJAamFt
25
- ZXNyb2JlcnRzb24uZXUwDQYJKoZIhvcNAQEFBQADggEBAIVf3GQn8WYBPII/v6Iw
26
- cQflYsQ7wVL+UPY7dDjJur7sPr0u1DZaoniCu2dIAX32XNkYKxHbwOmn/bDP/oi/
27
- JtwEKxpTmaAk8SMnr1vxK6E0XtKsJbcYpG38Guql3tv2N5AnGUsab6iqXy2nNZ9X
28
- lav0CKlK3EoLsO2GaWOvkIAGud/PcvBadlCcAha+UL5Uh2E70BWgK+6CRj5+DvR7
29
- cmxlnHavH4ed7VTk0JkPmqTwxJnaKaztnY+UbUfTbRtUHkd46azAuipUuNecMn3y
30
- UU8JFnL4NzGxTWzYwPOUlB4AT+rPKOw1qGeuy62M7UrpTKGgeNSvA1NPFgkOtWLO
31
- DMA=
13
+ MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjAwMTE5MTYxMzA0WhcN
15
+ MjEwMTE4MTYxMzA0WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQCmy6lP
17
+ Cp884sO5pUZxsR9L5pJ4U3GUKmqgEG5AupSMe6HSm9pZi+f0cF+fNMF53Ac27elI
18
+ +twgMKKIrWc8HXiy3K+9tMSTzU0/uJGK7Y2iIZ1QHUMNy9S4VdglohHnF21IGFtj
19
+ j3Nq+xLCEvxcZoUjbgk563GZKgd5XJYwzyb4q+erGrlasaQ9A+ZakAWT9vCXnaGO
20
+ wSe8Wx7KrvKk3UCvtH0tXDt8T8pwzN38qKPU1lpdDZQi63jqh8+qW2Lyu1KCWrDf
21
+ rj7ef8JC4WBbUbPABvFefjGmLMMYueKpOjCxMWeKFZz5Q83KcJMewkY/8sGLplyB
22
+ f+yKLPa5Mi8NnbWcevxmyss+/1z/W/dW+Dl5/3d/c1xXfi7nAjLfjGF8qQ9E82DM
23
+ 7Z3mp5gSGCxQBzNg50+ytE0hIzg2Hh+HkjMm/wt4nm7m4rmdY2FPvY1h7VKEbQtN
24
+ PUHF5MrVBIC9HCFVaIxpcO7ObK47PytfH3CfhPLHEd5ZU8Wo/WyYHiO+CQECAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU+mA09dUS
26
+ 2VF8+toUk7mApgYy+lAwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
+ c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
+ BgkqhkiG9w0BAQsFAAOCAYEADZRsfL/QTHTM1VPNIJcgGFULZD7SaxAwcS25TOXs
29
+ fwQI62ZGWyoJIhSgmOKFJh+04pzy5s1SygS6GTTfbFV3ES3TAorH4HabUF27CF9i
30
+ 7sCjvT4w/DZC1ue6P2qLdRhhXoqW+2ssGKvAB1VfIEQMA0LQU0IEUN5c+wG3r//J
31
+ g/nBG0lJna47dC6+MQo3tVpBOdxH7pMtTIvAxXUjjJdB9xrvzs/4NZ+k7nGb3uNF
32
+ NLXCpLx5+93WdJMAuyv88wvGChod/PWWDgzZP+/Zms7vvoTpmjMBm2xaRrgZFr+P
33
+ NFCMdBli9Mc0iv9lL5V3C+F0gqwmSs0tIFCr67XxPPbA27uReKc8XHDaVOaU8zJ9
34
+ 3L7/hZqxmix8fqzeNWxLIT9itNGNgiqUyeEwHgKJTj9B3aXs2RVaFIfRNRn4BA5d
35
+ LFRA1HdKZrhZiH0YWMH+NWhB4jh0blOUN/hE0m8hL/vNySv6AUD63CDSgaYE2A4C
36
+ L6XR3+hIK/fqVrE8fRag8DUp
32
37
  -----END CERTIFICATE-----
33
- date: 2018-03-14 00:00:00.000000000 Z
38
+ date: 2020-01-19 00:00:00.000000000 Z
34
39
  dependencies:
35
40
  - !ruby/object:Gem::Dependency
36
- name: dynarex
41
+ name: chronic
37
42
  requirement: !ruby/object:Gem::Requirement
38
43
  requirements:
39
44
  - - "~>"
40
45
  - !ruby/object:Gem::Version
41
- version: '1.7'
46
+ version: '0.10'
42
47
  - - ">="
43
48
  - !ruby/object:Gem::Version
44
- version: 1.7.30
49
+ version: 0.10.2
45
50
  type: :runtime
46
51
  prerelease: false
47
52
  version_requirements: !ruby/object:Gem::Requirement
48
53
  requirements:
49
54
  - - "~>"
50
55
  - !ruby/object:Gem::Version
51
- version: '1.7'
56
+ version: '0.10'
52
57
  - - ">="
53
58
  - !ruby/object:Gem::Version
54
- version: 1.7.30
59
+ version: 0.10.2
55
60
  - !ruby/object:Gem::Dependency
56
- name: nokorexi
61
+ name: dynarex
57
62
  requirement: !ruby/object:Gem::Requirement
58
63
  requirements:
59
64
  - - "~>"
60
65
  - !ruby/object:Gem::Version
61
- version: '0.3'
66
+ version: '1.8'
62
67
  - - ">="
63
68
  - !ruby/object:Gem::Version
64
- version: 0.3.2
69
+ version: 1.8.21
65
70
  type: :runtime
66
71
  prerelease: false
67
72
  version_requirements: !ruby/object:Gem::Requirement
68
73
  requirements:
69
74
  - - "~>"
70
75
  - !ruby/object:Gem::Version
71
- version: '0.3'
76
+ version: '1.8'
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: 1.8.21
80
+ - !ruby/object:Gem::Dependency
81
+ name: nokorexi
82
+ requirement: !ruby/object:Gem::Requirement
83
+ requirements:
72
84
  - - ">="
73
85
  - !ruby/object:Gem::Version
74
- version: 0.3.2
86
+ version: 0.5.0
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.5'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: 0.5.0
97
+ - - "~>"
98
+ - !ruby/object:Gem::Version
99
+ version: '0.5'
75
100
  description:
76
101
  email: james@jamesrobertson.eu
77
102
  executables: []
@@ -98,9 +123,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
98
123
  - !ruby/object:Gem::Version
99
124
  version: '0'
100
125
  requirements: []
101
- rubyforge_project:
102
- rubygems_version: 2.6.13
126
+ rubygems_version: 3.0.3
103
127
  signing_key:
104
128
  specification_version: 4
105
- summary: A web scraper which searches for job adverts on jobsearch.direct.gov.uk
129
+ summary: A web scraper which searches for job adverts on findajob.dwp.gov.uk
106
130
  test_files: []
metadata.gz.sig CHANGED
Binary file