myuniversaljobsmatch 0.1.7 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/lib/myuniversaljobsmatch.rb +95 -56
- metadata +58 -34
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 526c9daa5c4db413642d12ab424cabcb7ed88efd3c3fbde667380fe371ed57d2
|
4
|
+
data.tar.gz: 50c8a05605223a4b34ac3ea8f3d5be74b1e75a882e91238d682960ecd4cfaac8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff01c7f3c147f4ce0d528b04dc2c951df7b3426515e8e97a64225d0c579fbe8969f976f996fe9f3aabd90826e859a3fb7d70117fff8c9caf1dba0ea04f227892
|
7
|
+
data.tar.gz: 6a78bfac7569f00f0dff35696e56c73ef95611a75abb126abf666dfc8acdb3f5567d9999ab5f7b612dd5896432857d4f97d6d436589af787c4361f6eeafff28a
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data.tar.gz.sig
CHANGED
Binary file
|
data/lib/myuniversaljobsmatch.rb
CHANGED
@@ -1,16 +1,31 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
# file: myuniversaljobsmatch.rb
|
4
|
+
|
5
|
+
require 'chronic'
|
3
6
|
require 'nokorexi'
|
4
7
|
require 'dynarex'
|
5
8
|
|
6
|
-
|
9
|
+
module StringToDate
|
10
|
+
|
11
|
+
refine String do
|
12
|
+
|
13
|
+
def to_date()
|
14
|
+
Chronic.parse(self, :endian_precedence => :little).to_date
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
7
20
|
|
8
21
|
class MyUniversalJobsMatch
|
22
|
+
using StringToDate
|
9
23
|
|
10
24
|
def initialize(filepath: '')
|
11
25
|
|
12
26
|
@filepath = filepath
|
13
|
-
|
27
|
+
|
28
|
+
@url_base = 'https://findajob.dwp.gov.uk/'
|
14
29
|
|
15
30
|
@dx = Dynarex.new 'ujm[title,tags]/item(job_id, title, ' + \
|
16
31
|
'description, posting_date, company, location, industries, job_type)'
|
@@ -20,51 +35,76 @@ class MyUniversalJobsMatch
|
|
20
35
|
def dynarex()
|
21
36
|
@dx
|
22
37
|
end
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
38
|
+
|
39
|
+
# options:
|
40
|
+
# results_per_page: 10, 25, 50
|
41
|
+
# sort_by: date, highest_salary, lowest_salary
|
42
|
+
# hours: any, full_time, part_time
|
43
|
+
# contract_type: any, permanent, temporary, contract, apprenticeship
|
44
|
+
|
45
|
+
def search(title: '', where: '', results_per_page: nil, sort_by: nil,
|
46
|
+
hours: nil, contract_type: nil)
|
29
47
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
params = {
|
49
|
+
adv: 1,
|
50
|
+
q: title,
|
51
|
+
w: where
|
52
|
+
}
|
53
|
+
|
54
|
+
params[:pp] = results_per_page if results_per_page
|
55
|
+
params[:cty] = contract_type if contract_type
|
56
|
+
params[:cti] = hours if hours
|
57
|
+
|
58
|
+
case sort_by.to_sym
|
59
|
+
when 'date'
|
60
|
+
params[:sb] = :date
|
61
|
+
params[:sd] = :down
|
62
|
+
when :highest_salary
|
63
|
+
params[:sb] = :salary
|
64
|
+
params[:sd] = :down
|
65
|
+
when :lowest_salary
|
66
|
+
params[:sb] = :salary
|
67
|
+
params[:sd] = :up
|
68
|
+
end
|
69
|
+
|
70
|
+
url = @url_base + 'search/?' + params.map {|x| x.join('=') }.join('&')
|
71
|
+
doc = Nokorexi.new(url).to_doc
|
72
|
+
|
73
|
+
rows = doc.root.xpath('//div[@class="search-result"]')
|
74
|
+
|
75
|
+
a = rows.map do |row|
|
76
|
+
|
77
|
+
items = row.xpath('ul/li')
|
78
|
+
joburl = row.element('h3/a/@href').to_s
|
79
|
+
jobtitle = row.element('h3/a/text()')
|
80
|
+
jobid = joburl[/\d+$/]
|
81
|
+
jobref = title[/^\d+/].to_s
|
82
|
+
|
83
|
+
date = items[0].text.to_date
|
84
|
+
company = items[1].text('strong')
|
85
|
+
location = items[1].element('span/text()')
|
86
|
+
salary = items[2].text('strong') if items[2]
|
87
|
+
desc = row.text('p').strip
|
88
|
+
|
89
|
+
[jobid, jobref, date, jobtitle, joburl, company, location,
|
90
|
+
salary, desc]
|
51
91
|
|
52
92
|
end
|
53
93
|
|
54
94
|
|
55
95
|
dx = Dynarex.new('vacancies[title, desc, date, time, tags, xslt]/' + \
|
56
|
-
'vacancy(job_id, date, title, url, company, location,
|
96
|
+
'vacancy(job_id, job_ref, date, title, url, company, location, salary, desc)')
|
57
97
|
|
58
|
-
dx.title = "
|
59
|
-
dx.desc = "generated from web scrape of
|
60
|
-
"
|
98
|
+
dx.title = "Find a job - Search results for '#{title}'"
|
99
|
+
dx.desc = "generated from web scrape of Find a job." + \
|
100
|
+
"findajob.dwp.gov.uk/; source: " + url
|
61
101
|
dx.tags = 'jobs vacancies jobmatch ' + title.split.first
|
62
|
-
dx.date = Time.now.strftime("%Y-%b-%
|
102
|
+
dx.date = Time.now.strftime("%Y-%b-%d")
|
63
103
|
dx.time = Time.now.strftime("%H:%M")
|
64
104
|
|
65
105
|
a.each do |row|
|
66
|
-
dx.create Hash[%i(job_id date title url company
|
67
|
-
|
106
|
+
dx.create Hash[(%i(job_id job_ref date title url company) + \
|
107
|
+
%i(location salary desc)).zip(row)]
|
68
108
|
end
|
69
109
|
|
70
110
|
return dx
|
@@ -72,29 +112,28 @@ class MyUniversalJobsMatch
|
|
72
112
|
|
73
113
|
def query(id)
|
74
114
|
|
75
|
-
|
76
|
-
buffer = RXFHelper.read(url).first
|
77
|
-
|
78
|
-
doc = Nokorexi.new(buffer.gsub(/<br\s*\/?>/i,"\n")).to_doc
|
79
|
-
content = doc.root.at_css '.jobViewContent'
|
80
|
-
title = content.element('h2[2]/text()')
|
81
|
-
description = content.element('div')
|
82
|
-
|
83
|
-
a = doc.root.at_css('.jobViewSummary').xpath('dl/*')
|
115
|
+
doc = Nokorexi.new(@url_base + 'details/' + id).to_doc
|
84
116
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
if a.first.name == 'dt' then
|
89
|
-
a2 << [a.shift.text, '']
|
90
|
-
elsif
|
91
|
-
a2[-1][-1] << a.shift.text + "\n"
|
92
|
-
end
|
93
|
-
end
|
117
|
+
title = doc.root.text('head/title')
|
118
|
+
|
119
|
+
rows = doc.root.xpath('//table[1]/tbody/tr')
|
94
120
|
|
95
|
-
|
121
|
+
h = rows.map do |tr|
|
122
|
+
|
123
|
+
[
|
124
|
+
tr.text('th').downcase.rstrip[0..-2].gsub(/ +/,'_').to_sym,
|
125
|
+
tr.text('td').to_s
|
126
|
+
]
|
127
|
+
|
128
|
+
end.to_h
|
96
129
|
|
97
|
-
|
130
|
+
h[:description] = doc.root.element('//div[@itemprop="description"]').xml\
|
131
|
+
.gsub(/<br *\/> */,"\n").gsub(/<\/?[^\>]+\/?>/,'').strip
|
132
|
+
|
133
|
+
h[:posting_date] = h[:posting_date].to_date
|
134
|
+
h[:closing_date] = h[:closing_date].to_date
|
135
|
+
|
136
|
+
{title: title}.merge(h)
|
98
137
|
|
99
138
|
end
|
100
139
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: myuniversaljobsmatch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -10,68 +10,93 @@ bindir: bin
|
|
10
10
|
cert_chain:
|
11
11
|
- |
|
12
12
|
-----BEGIN CERTIFICATE-----
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
13
|
+
MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
|
14
|
+
YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjAwMTE5MTYxMzA0WhcN
|
15
|
+
MjEwMTE4MTYxMzA0WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
|
16
|
+
cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQCmy6lP
|
17
|
+
Cp884sO5pUZxsR9L5pJ4U3GUKmqgEG5AupSMe6HSm9pZi+f0cF+fNMF53Ac27elI
|
18
|
+
+twgMKKIrWc8HXiy3K+9tMSTzU0/uJGK7Y2iIZ1QHUMNy9S4VdglohHnF21IGFtj
|
19
|
+
j3Nq+xLCEvxcZoUjbgk563GZKgd5XJYwzyb4q+erGrlasaQ9A+ZakAWT9vCXnaGO
|
20
|
+
wSe8Wx7KrvKk3UCvtH0tXDt8T8pwzN38qKPU1lpdDZQi63jqh8+qW2Lyu1KCWrDf
|
21
|
+
rj7ef8JC4WBbUbPABvFefjGmLMMYueKpOjCxMWeKFZz5Q83KcJMewkY/8sGLplyB
|
22
|
+
f+yKLPa5Mi8NnbWcevxmyss+/1z/W/dW+Dl5/3d/c1xXfi7nAjLfjGF8qQ9E82DM
|
23
|
+
7Z3mp5gSGCxQBzNg50+ytE0hIzg2Hh+HkjMm/wt4nm7m4rmdY2FPvY1h7VKEbQtN
|
24
|
+
PUHF5MrVBIC9HCFVaIxpcO7ObK47PytfH3CfhPLHEd5ZU8Wo/WyYHiO+CQECAwEA
|
25
|
+
AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU+mA09dUS
|
26
|
+
2VF8+toUk7mApgYy+lAwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
|
27
|
+
c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
|
28
|
+
BgkqhkiG9w0BAQsFAAOCAYEADZRsfL/QTHTM1VPNIJcgGFULZD7SaxAwcS25TOXs
|
29
|
+
fwQI62ZGWyoJIhSgmOKFJh+04pzy5s1SygS6GTTfbFV3ES3TAorH4HabUF27CF9i
|
30
|
+
7sCjvT4w/DZC1ue6P2qLdRhhXoqW+2ssGKvAB1VfIEQMA0LQU0IEUN5c+wG3r//J
|
31
|
+
g/nBG0lJna47dC6+MQo3tVpBOdxH7pMtTIvAxXUjjJdB9xrvzs/4NZ+k7nGb3uNF
|
32
|
+
NLXCpLx5+93WdJMAuyv88wvGChod/PWWDgzZP+/Zms7vvoTpmjMBm2xaRrgZFr+P
|
33
|
+
NFCMdBli9Mc0iv9lL5V3C+F0gqwmSs0tIFCr67XxPPbA27uReKc8XHDaVOaU8zJ9
|
34
|
+
3L7/hZqxmix8fqzeNWxLIT9itNGNgiqUyeEwHgKJTj9B3aXs2RVaFIfRNRn4BA5d
|
35
|
+
LFRA1HdKZrhZiH0YWMH+NWhB4jh0blOUN/hE0m8hL/vNySv6AUD63CDSgaYE2A4C
|
36
|
+
L6XR3+hIK/fqVrE8fRag8DUp
|
32
37
|
-----END CERTIFICATE-----
|
33
|
-
date:
|
38
|
+
date: 2020-01-19 00:00:00.000000000 Z
|
34
39
|
dependencies:
|
35
40
|
- !ruby/object:Gem::Dependency
|
36
|
-
name:
|
41
|
+
name: chronic
|
37
42
|
requirement: !ruby/object:Gem::Requirement
|
38
43
|
requirements:
|
39
44
|
- - "~>"
|
40
45
|
- !ruby/object:Gem::Version
|
41
|
-
version: '
|
46
|
+
version: '0.10'
|
42
47
|
- - ">="
|
43
48
|
- !ruby/object:Gem::Version
|
44
|
-
version:
|
49
|
+
version: 0.10.2
|
45
50
|
type: :runtime
|
46
51
|
prerelease: false
|
47
52
|
version_requirements: !ruby/object:Gem::Requirement
|
48
53
|
requirements:
|
49
54
|
- - "~>"
|
50
55
|
- !ruby/object:Gem::Version
|
51
|
-
version: '
|
56
|
+
version: '0.10'
|
52
57
|
- - ">="
|
53
58
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
59
|
+
version: 0.10.2
|
55
60
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
61
|
+
name: dynarex
|
57
62
|
requirement: !ruby/object:Gem::Requirement
|
58
63
|
requirements:
|
59
64
|
- - "~>"
|
60
65
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
66
|
+
version: '1.8'
|
62
67
|
- - ">="
|
63
68
|
- !ruby/object:Gem::Version
|
64
|
-
version:
|
69
|
+
version: 1.8.21
|
65
70
|
type: :runtime
|
66
71
|
prerelease: false
|
67
72
|
version_requirements: !ruby/object:Gem::Requirement
|
68
73
|
requirements:
|
69
74
|
- - "~>"
|
70
75
|
- !ruby/object:Gem::Version
|
71
|
-
version: '
|
76
|
+
version: '1.8'
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 1.8.21
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: nokorexi
|
82
|
+
requirement: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
72
84
|
- - ">="
|
73
85
|
- !ruby/object:Gem::Version
|
74
|
-
version: 0.
|
86
|
+
version: 0.5.0
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.5'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 0.5.0
|
97
|
+
- - "~>"
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0.5'
|
75
100
|
description:
|
76
101
|
email: james@jamesrobertson.eu
|
77
102
|
executables: []
|
@@ -98,9 +123,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
98
123
|
- !ruby/object:Gem::Version
|
99
124
|
version: '0'
|
100
125
|
requirements: []
|
101
|
-
|
102
|
-
rubygems_version: 2.6.13
|
126
|
+
rubygems_version: 3.0.3
|
103
127
|
signing_key:
|
104
128
|
specification_version: 4
|
105
|
-
summary: A web scraper which searches for job adverts on
|
129
|
+
summary: A web scraper which searches for job adverts on findajob.dwp.gov.uk
|
106
130
|
test_files: []
|
metadata.gz.sig
CHANGED
Binary file
|