indeed_scraper2022 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data/lib/indeed_scraper2022.rb +103 -0
- data.tar.gz.sig +3 -0
- metadata +111 -0
- metadata.gz.sig +0 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 0455b8d92dab8642f01c53ce43afc11eb24fbff801cce8df3306fd31eebeb223
|
4
|
+
data.tar.gz: eb8d24662b095fb0049209534ddfb1f0805454787105e091dc15eb7ca363d1b1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 342f6534ef851e12e78bf8e190ea98b437a6503c26510938dcb0021a30033c6be5ae819560a46ab0c11a4b42f833238517bbf9779c35c11e671d3a81ed8eedc9
|
7
|
+
data.tar.gz: 74be894713e2572bef1777cfafe099cc875d258fad6d741703a7cf00d22863c44cf02a877aa5106d73182d17408a16fc16f6b9538f4507c602e5588062a12334
|
checksums.yaml.gz.sig
ADDED
Binary file
|
@@ -0,0 +1,103 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# file: indeed_scraper2022.rb
|
4
|
+
|
5
|
+
require 'mechanize'
|
6
|
+
require 'nokorexi'
|
7
|
+
|
8
|
+
# Given the nature of changes to jobsearch websites,
|
9
|
+
# don't rely upon this gem working in the near future.
|
10
|
+
|
11
|
+
|
12
|
+
class IndeedScraper2022
|
13
|
+
|
14
|
+
def initialize(url='https://uk.indeed.com/?r=us', q: '', location: '', debug: false)
|
15
|
+
|
16
|
+
@debug = debug
|
17
|
+
@url_base, @q, @location = url, q, location
|
18
|
+
@results = search
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
# returns an array containing the job search result
|
23
|
+
#
|
24
|
+
def results()
|
25
|
+
@results
|
26
|
+
end
|
27
|
+
|
28
|
+
def page()
|
29
|
+
end
|
30
|
+
|
31
|
+
# used for debugging
|
32
|
+
#
|
33
|
+
def a2()
|
34
|
+
@a2
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def search(q='', location='')
|
40
|
+
|
41
|
+
a = Mechanize.new
|
42
|
+
|
43
|
+
page = a.get(@url_base)
|
44
|
+
form = page.forms.first
|
45
|
+
form.fields[0].value = @q
|
46
|
+
form.fields[1].value = @location
|
47
|
+
pg = form.submit
|
48
|
+
|
49
|
+
doc2 = Nokogiri::XML(pg.body)
|
50
|
+
a2 = doc2.xpath "//a[div/div/div/div/table/tbody/tr/td/div]"
|
51
|
+
puts 'a2: ' + a2.length.inspect if @debug
|
52
|
+
|
53
|
+
@a2 = a2.map {|x| Rexle.new x.to_s }
|
54
|
+
|
55
|
+
@a2.map do |doc|
|
56
|
+
|
57
|
+
div = doc.element("a[@class='desktop']/div[@class='slider" \
|
58
|
+
"_container']/div[@class='slider_list']/div[@class='sl" \
|
59
|
+
"ider_item']/div[@class='job_seen_beacon']")
|
60
|
+
td = div.element("table[@class='jobCard_mainContent']/tbo" \
|
61
|
+
"dy/tr/td[@class='resultContent']")
|
62
|
+
|
63
|
+
# job title (e.g. Software Developer)
|
64
|
+
jobtitle = td.element("div[@class='tapItem-gutter']/h2[@" \
|
65
|
+
"class='jobTitle-color-purple']/span").text
|
66
|
+
puts 'jobtitle: ' + jobtitle.inspect if @debug
|
67
|
+
|
68
|
+
salary = td.element("div[@class='metadataContainer']/" \
|
69
|
+
"div[@class='salary-snippet-container']/div[@class='sa" \
|
70
|
+
"lary-snippet']/span")
|
71
|
+
salary = salary.text if salary
|
72
|
+
puts 'salary: ' + salary.inspect if @debug
|
73
|
+
div1 = td.element("div[@class='companyInfo']")
|
74
|
+
|
75
|
+
# company name (e.g. Coda Octopus Products Ltd)
|
76
|
+
company_name = div1.element("span[@class='companyName']").text
|
77
|
+
|
78
|
+
# company location (e.g. Edinburgh)
|
79
|
+
location = div1.element("div[@class='companyLocation']").text
|
80
|
+
tbody = div.element("table[@class='jobCardShelfContainer']/tbody")
|
81
|
+
|
82
|
+
div3 = tbody.element("tr[@class='underShelfFooter']/td/di" \
|
83
|
+
"v[@class='result-footer']")
|
84
|
+
|
85
|
+
# job (e.g. Our products are primarily written in C#, using...)
|
86
|
+
jobsnippet = div3.element("div[@class='job-snippet']/ul/li").text
|
87
|
+
|
88
|
+
# visually (e.g. Posted 14 days ago)
|
89
|
+
dateposted = div3.element("span[@class='date']").texts
|
90
|
+
date = (Date.today - dateposted.first.to_i).to_s
|
91
|
+
|
92
|
+
{
|
93
|
+
title: jobtitle,
|
94
|
+
salary: salary,
|
95
|
+
company: company_name,
|
96
|
+
location: location,
|
97
|
+
jobsnippet: jobsnippet,
|
98
|
+
date: date
|
99
|
+
}
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
data.tar.gz.sig
ADDED
metadata
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: indeed_scraper2022
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- James Robertson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain:
|
11
|
+
- |
|
12
|
+
-----BEGIN CERTIFICATE-----
|
13
|
+
MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
|
14
|
+
YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMTI0MTkxMjI1WhcN
|
15
|
+
MjMwMTI0MTkxMjI1WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
|
16
|
+
cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDExD/K
|
17
|
+
ccgeJeTOPpYSQXUzgyjX9z9wdK/Y/a6hZw8DIaYRac/4nWKEeCtFFOKJ1DK9hpba
|
18
|
+
jVnY5H4ZRtbp/S4ZBimUTgRwlUVbEz/93Nfnf0ZKn7yyCFyfJCedRW/lqU5ACVR7
|
19
|
+
DkdsGSXxQAeq87GAuW/ofCIWBB/RCrWsj7JYpDnIg+oLMOm1nBQxmiPPkW1H2zpg
|
20
|
+
OvWY6wItQuOxxdk5VBioecASXmQsanLyDyNSycvos24cS0msbbZfs1PuU42HxTS+
|
21
|
+
OjfOLRk1F02VA/lTVJCxdJx4u4jcDVJhqFP61suUN/oUFDob/rQhIT8EUITkPLY5
|
22
|
+
zhxDzCSZs1q5WlkTQ7MIGB1rgl8YwSfRgOhZtjkSPddK+iSRsME1pen0R6sNctRC
|
23
|
+
hsDNFaKAT41ZV/krNSD2lP0p7BUlKZQ6c2Whcw8NlqZkLO85CYrp5lov9EgAA+6M
|
24
|
+
bc4YR2Rg9lOwYFKxKVkMNGpymQANedbGJmOHDgA2O+nL9vi6uFD+HaylJx0CAwEA
|
25
|
+
AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUC7f4K/R/
|
26
|
+
u34HepeoZ/BBZuOx2nQwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
|
27
|
+
c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
|
28
|
+
BgkqhkiG9w0BAQsFAAOCAYEAweTlInWOWdP/OB0U1R5fdHeBOs9I2cn9OqH1I8AZ
|
29
|
+
3VGthsmza7nrrc94hCE1lQWSKwSahFPLY8AQoGibnPknCAuD5MvKSCRn3n8I7som
|
30
|
+
fCb2z05iv1ps49aUSLOp9nvQd2ghqIwZ2u7d7vnUvUh3FiG49KttLrnnJh4hrgpx
|
31
|
+
oMlT4cog/BtPs3pawgt3nbbTDamb3MTQyIMuo3uSjL867+J4I6vcDpPgCd0lPiS8
|
32
|
+
2ZZuRklW3qzm2Vhi7Uhxtm+POst/W6gix+3Acsx3jk+LQTSRLjvCaOCmY0ooi8D+
|
33
|
+
NiLKNdKO0oC8h9MyC+5EOq8Msfti3Vjr1kuFa7bcxzwSIQPFpoo+30V/uAEc0OYQ
|
34
|
+
qXLj/YPWi7B+J84HaWwSktqO8O0VYonAywpXTz0TcAv+6gsNF9HorTXPm/KyB6c6
|
35
|
+
YLGLkwmbiUHX5dRU3RwOwOrZiyvND5BIj7S6dZ6jYHe0I727apgQNc3swTz5mW6I
|
36
|
+
SW/2zInu2bkj/meWm5eBoWHT
|
37
|
+
-----END CERTIFICATE-----
|
38
|
+
date: 2022-01-24 00:00:00.000000000 Z
|
39
|
+
dependencies:
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: nokorexi
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0.5'
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 0.5.5
|
50
|
+
type: :runtime
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - "~>"
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0.5'
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 0.5.5
|
60
|
+
- !ruby/object:Gem::Dependency
|
61
|
+
name: mechanize
|
62
|
+
requirement: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - "~>"
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '2.8'
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 2.8.4
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - "~>"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '2.8'
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 2.8.4
|
80
|
+
description:
|
81
|
+
email: james@jamesrobertson.eu
|
82
|
+
executables: []
|
83
|
+
extensions: []
|
84
|
+
extra_rdoc_files: []
|
85
|
+
files:
|
86
|
+
- lib/indeed_scraper2022.rb
|
87
|
+
homepage: https://github.com/jrobertson/indeed_scraper2022
|
88
|
+
licenses:
|
89
|
+
- MIT
|
90
|
+
metadata: {}
|
91
|
+
post_install_message:
|
92
|
+
rdoc_options: []
|
93
|
+
require_paths:
|
94
|
+
- lib
|
95
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
requirements: []
|
106
|
+
rubyforge_project:
|
107
|
+
rubygems_version: 2.7.10
|
108
|
+
signing_key:
|
109
|
+
specification_version: 4
|
110
|
+
summary: Attempts to scrape the indeed.com jobsearch results (1 page).
|
111
|
+
test_files: []
|
metadata.gz.sig
ADDED
Binary file
|