indeed_scraper2022 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data/lib/indeed_scraper2022.rb +103 -0
- data.tar.gz.sig +3 -0
- metadata +111 -0
- metadata.gz.sig +0 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 0455b8d92dab8642f01c53ce43afc11eb24fbff801cce8df3306fd31eebeb223
|
4
|
+
data.tar.gz: eb8d24662b095fb0049209534ddfb1f0805454787105e091dc15eb7ca363d1b1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 342f6534ef851e12e78bf8e190ea98b437a6503c26510938dcb0021a30033c6be5ae819560a46ab0c11a4b42f833238517bbf9779c35c11e671d3a81ed8eedc9
|
7
|
+
data.tar.gz: 74be894713e2572bef1777cfafe099cc875d258fad6d741703a7cf00d22863c44cf02a877aa5106d73182d17408a16fc16f6b9538f4507c602e5588062a12334
|
checksums.yaml.gz.sig
ADDED
Binary file
|
@@ -0,0 +1,103 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# file: indeed_scraper2022.rb
|
4
|
+
|
5
|
+
require 'mechanize'
|
6
|
+
require 'nokorexi'
|
7
|
+
|
8
|
+
# Given the nature of changes to jobsearch websites,
|
9
|
+
# don't rely upon this gem working in the near future.
|
10
|
+
|
11
|
+
|
12
|
+
class IndeedScraper2022
|
13
|
+
|
14
|
+
def initialize(url='https://uk.indeed.com/?r=us', q: '', location: '', debug: false)
|
15
|
+
|
16
|
+
@debug = debug
|
17
|
+
@url_base, @q, @location = url, q, location
|
18
|
+
@results = search
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
# returns an array containing the job search result
|
23
|
+
#
|
24
|
+
def results()
|
25
|
+
@results
|
26
|
+
end
|
27
|
+
|
28
|
+
def page()
|
29
|
+
end
|
30
|
+
|
31
|
+
# used for debugging
|
32
|
+
#
|
33
|
+
def a2()
|
34
|
+
@a2
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def search(q='', location='')
|
40
|
+
|
41
|
+
a = Mechanize.new
|
42
|
+
|
43
|
+
page = a.get(@url_base)
|
44
|
+
form = page.forms.first
|
45
|
+
form.fields[0].value = @q
|
46
|
+
form.fields[1].value = @location
|
47
|
+
pg = form.submit
|
48
|
+
|
49
|
+
doc2 = Nokogiri::XML(pg.body)
|
50
|
+
a2 = doc2.xpath "//a[div/div/div/div/table/tbody/tr/td/div]"
|
51
|
+
puts 'a2: ' + a2.length.inspect if @debug
|
52
|
+
|
53
|
+
@a2 = a2.map {|x| Rexle.new x.to_s }
|
54
|
+
|
55
|
+
@a2.map do |doc|
|
56
|
+
|
57
|
+
div = doc.element("a[@class='desktop']/div[@class='slider" \
|
58
|
+
"_container']/div[@class='slider_list']/div[@class='sl" \
|
59
|
+
"ider_item']/div[@class='job_seen_beacon']")
|
60
|
+
td = div.element("table[@class='jobCard_mainContent']/tbo" \
|
61
|
+
"dy/tr/td[@class='resultContent']")
|
62
|
+
|
63
|
+
# job title (e.g. Software Developer)
|
64
|
+
jobtitle = td.element("div[@class='tapItem-gutter']/h2[@" \
|
65
|
+
"class='jobTitle-color-purple']/span").text
|
66
|
+
puts 'jobtitle: ' + jobtitle.inspect if @debug
|
67
|
+
|
68
|
+
salary = td.element("div[@class='metadataContainer']/" \
|
69
|
+
"div[@class='salary-snippet-container']/div[@class='sa" \
|
70
|
+
"lary-snippet']/span")
|
71
|
+
salary = salary.text if salary
|
72
|
+
puts 'salary: ' + salary.inspect if @debug
|
73
|
+
div1 = td.element("div[@class='companyInfo']")
|
74
|
+
|
75
|
+
# company name (e.g. Coda Octopus Products Ltd)
|
76
|
+
company_name = div1.element("span[@class='companyName']").text
|
77
|
+
|
78
|
+
# company location (e.g. Edinburgh)
|
79
|
+
location = div1.element("div[@class='companyLocation']").text
|
80
|
+
tbody = div.element("table[@class='jobCardShelfContainer']/tbody")
|
81
|
+
|
82
|
+
div3 = tbody.element("tr[@class='underShelfFooter']/td/di" \
|
83
|
+
"v[@class='result-footer']")
|
84
|
+
|
85
|
+
# job (e.g. Our products are primarily written in C#, using...)
|
86
|
+
jobsnippet = div3.element("div[@class='job-snippet']/ul/li").text
|
87
|
+
|
88
|
+
# visually (e.g. Posted 14 days ago)
|
89
|
+
dateposted = div3.element("span[@class='date']").texts
|
90
|
+
date = (Date.today - dateposted.first.to_i).to_s
|
91
|
+
|
92
|
+
{
|
93
|
+
title: jobtitle,
|
94
|
+
salary: salary,
|
95
|
+
company: company_name,
|
96
|
+
location: location,
|
97
|
+
jobsnippet: jobsnippet,
|
98
|
+
date: date
|
99
|
+
}
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
data.tar.gz.sig
ADDED
metadata
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: indeed_scraper2022
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- James Robertson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain:
|
11
|
+
- |
|
12
|
+
-----BEGIN CERTIFICATE-----
|
13
|
+
MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
|
14
|
+
YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMTI0MTkxMjI1WhcN
|
15
|
+
MjMwMTI0MTkxMjI1WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
|
16
|
+
cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDExD/K
|
17
|
+
ccgeJeTOPpYSQXUzgyjX9z9wdK/Y/a6hZw8DIaYRac/4nWKEeCtFFOKJ1DK9hpba
|
18
|
+
jVnY5H4ZRtbp/S4ZBimUTgRwlUVbEz/93Nfnf0ZKn7yyCFyfJCedRW/lqU5ACVR7
|
19
|
+
DkdsGSXxQAeq87GAuW/ofCIWBB/RCrWsj7JYpDnIg+oLMOm1nBQxmiPPkW1H2zpg
|
20
|
+
OvWY6wItQuOxxdk5VBioecASXmQsanLyDyNSycvos24cS0msbbZfs1PuU42HxTS+
|
21
|
+
OjfOLRk1F02VA/lTVJCxdJx4u4jcDVJhqFP61suUN/oUFDob/rQhIT8EUITkPLY5
|
22
|
+
zhxDzCSZs1q5WlkTQ7MIGB1rgl8YwSfRgOhZtjkSPddK+iSRsME1pen0R6sNctRC
|
23
|
+
hsDNFaKAT41ZV/krNSD2lP0p7BUlKZQ6c2Whcw8NlqZkLO85CYrp5lov9EgAA+6M
|
24
|
+
bc4YR2Rg9lOwYFKxKVkMNGpymQANedbGJmOHDgA2O+nL9vi6uFD+HaylJx0CAwEA
|
25
|
+
AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUC7f4K/R/
|
26
|
+
u34HepeoZ/BBZuOx2nQwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
|
27
|
+
c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
|
28
|
+
BgkqhkiG9w0BAQsFAAOCAYEAweTlInWOWdP/OB0U1R5fdHeBOs9I2cn9OqH1I8AZ
|
29
|
+
3VGthsmza7nrrc94hCE1lQWSKwSahFPLY8AQoGibnPknCAuD5MvKSCRn3n8I7som
|
30
|
+
fCb2z05iv1ps49aUSLOp9nvQd2ghqIwZ2u7d7vnUvUh3FiG49KttLrnnJh4hrgpx
|
31
|
+
oMlT4cog/BtPs3pawgt3nbbTDamb3MTQyIMuo3uSjL867+J4I6vcDpPgCd0lPiS8
|
32
|
+
2ZZuRklW3qzm2Vhi7Uhxtm+POst/W6gix+3Acsx3jk+LQTSRLjvCaOCmY0ooi8D+
|
33
|
+
NiLKNdKO0oC8h9MyC+5EOq8Msfti3Vjr1kuFa7bcxzwSIQPFpoo+30V/uAEc0OYQ
|
34
|
+
qXLj/YPWi7B+J84HaWwSktqO8O0VYonAywpXTz0TcAv+6gsNF9HorTXPm/KyB6c6
|
35
|
+
YLGLkwmbiUHX5dRU3RwOwOrZiyvND5BIj7S6dZ6jYHe0I727apgQNc3swTz5mW6I
|
36
|
+
SW/2zInu2bkj/meWm5eBoWHT
|
37
|
+
-----END CERTIFICATE-----
|
38
|
+
date: 2022-01-24 00:00:00.000000000 Z
|
39
|
+
dependencies:
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: nokorexi
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0.5'
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 0.5.5
|
50
|
+
type: :runtime
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - "~>"
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0.5'
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 0.5.5
|
60
|
+
- !ruby/object:Gem::Dependency
|
61
|
+
name: mechanize
|
62
|
+
requirement: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - "~>"
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '2.8'
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 2.8.4
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - "~>"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '2.8'
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 2.8.4
|
80
|
+
description:
|
81
|
+
email: james@jamesrobertson.eu
|
82
|
+
executables: []
|
83
|
+
extensions: []
|
84
|
+
extra_rdoc_files: []
|
85
|
+
files:
|
86
|
+
- lib/indeed_scraper2022.rb
|
87
|
+
homepage: https://github.com/jrobertson/indeed_scraper2022
|
88
|
+
licenses:
|
89
|
+
- MIT
|
90
|
+
metadata: {}
|
91
|
+
post_install_message:
|
92
|
+
rdoc_options: []
|
93
|
+
require_paths:
|
94
|
+
- lib
|
95
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
requirements: []
|
106
|
+
rubyforge_project:
|
107
|
+
rubygems_version: 2.7.10
|
108
|
+
signing_key:
|
109
|
+
specification_version: 4
|
110
|
+
summary: Attempts to scrape the indeed.com jobsearch results (1 page).
|
111
|
+
test_files: []
|
metadata.gz.sig
ADDED
Binary file
|