indeed_scraper2022 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/indeed_scraper2022.rb +69 -7
- data.tar.gz.sig +5 -3
- metadata +2 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 50a484cf1a272522091413129241620336f12ca94d795b7ab132dd6911802d1c
|
4
|
+
data.tar.gz: 06dffee1253aa5076da9b6897bc48009687e8df86aa1a3629ee1d8a4432fdd13
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7327fc5bf9668c4f292eabf673574bfd7ca9fbf180133896c559ac1b7415d4ee880365302d24550aba4b83ee2a709a77bdb059145310e4a5de21840fe11a5058
|
7
|
+
data.tar.gz: a84b587275793166a7ac40d63607c8ea2f8dd340a3197ce782f901ca8c27de27b8eeef7f36fc9399aba286ddc399da109b2d14cd9db99b3c6a4a545e6ad9f21c
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/lib/indeed_scraper2022.rb
CHANGED
@@ -25,17 +25,74 @@ class IndeedScraper2022
|
|
25
25
|
@results
|
26
26
|
end
|
27
27
|
|
28
|
-
def page()
|
29
|
-
|
30
|
-
|
31
|
-
# used for debugging
|
32
|
-
#
|
33
|
-
def a2()
|
34
|
-
@a2
|
28
|
+
def page(n)
|
29
|
+
url = @results[n-1][:link]
|
30
|
+
fetchjob(url)
|
35
31
|
end
|
36
32
|
|
37
33
|
private
|
38
34
|
|
35
|
+
def fetchjob(url)
|
36
|
+
|
37
|
+
doc = Nokorexi.new(url).to_doc
|
38
|
+
e0 = doc.element("html/body/div/div/div/div/div/div/div/div")
|
39
|
+
|
40
|
+
#div = e0.element("//div[@class='jobsearch-JobComponent']")
|
41
|
+
div1 = e0.element("//div[@class='jobsearch-DesktopStickyContainer']")
|
42
|
+
div2 = div1.element("div")
|
43
|
+
|
44
|
+
# jobsearch (e.g. Full Stack Website Developer (Wordpress))
|
45
|
+
jobtitle = div2.element("div[@class='jobsearch-JobInfoHead" \
|
46
|
+
"er-title-container']/h1[@class='jobsearch-JobInfoHead" \
|
47
|
+
"er-title']").text
|
48
|
+
|
49
|
+
div3 = div2.element("div[@class='jobsearch-CompanyInfoCon" \
|
50
|
+
"tainer']/div[@class='jobsearch-CompanyInfoWithoutHead" \
|
51
|
+
"erImage']/div/div[@class='jobsearch-DesktopStickyCont" \
|
52
|
+
"ainer-subtitle']")
|
53
|
+
|
54
|
+
# icl (e.g. Lyles Sutherland)
|
55
|
+
cname = div3.xpath("div[@class='jobsearch-DesktopSt" \
|
56
|
+
"ickyContainer-companyrating']/div/div[@class='icl-u-x" \
|
57
|
+
"s-mr--xs']")[1]
|
58
|
+
clink = div3.element('//a')
|
59
|
+
company = cname ? cname.text : clink.text
|
60
|
+
companylink = clink.attributes[:href] if clink
|
61
|
+
|
62
|
+
div5 = div3.xpath("div/div")
|
63
|
+
location, worklocation = div5.map(&:text).compact
|
64
|
+
|
65
|
+
# icl (e.g. Full-time, Permanent)
|
66
|
+
jobtype = div1.element("div/div/div[@class='jobsearch-J" \
|
67
|
+
"obMetadataHeader-item']/span[@class='icl-u-xs-mt--xs']")
|
68
|
+
jobtype = jobtype.texts.join if jobtype
|
69
|
+
|
70
|
+
# jobsearch (e.g. Urgently needed)
|
71
|
+
jobnote1 = e0.element("//div[@class='jobsearch-DesktopTag" \
|
72
|
+
"']/div[@class='urgently-hiring']/div[@class='jobsearc" \
|
73
|
+
"h-DesktopTag-text']")
|
74
|
+
jobnote1 = jobnote1.text if jobnote1
|
75
|
+
|
76
|
+
# jobsearch (e.g. 10 days ago)
|
77
|
+
datepost = e0.element("//div[@class='jobsearch-JobTab-con" \
|
78
|
+
"tent']/div[@class='jobsearch-JobMetadataFooter']/div").text
|
79
|
+
|
80
|
+
jobdesc = e0.element("//div[@class='icl-u-xs-mt--md']/div[@cl" \
|
81
|
+
"ass='jobsearch-jobDescriptionText']")
|
82
|
+
|
83
|
+
{
|
84
|
+
title: jobtitle,
|
85
|
+
company: company,
|
86
|
+
companylink: companylink,
|
87
|
+
location: location,
|
88
|
+
worklocation: worklocation,
|
89
|
+
note: jobnote1,
|
90
|
+
date: (Date.today - datepost.to_i).to_s,
|
91
|
+
desc: jobdesc
|
92
|
+
}
|
93
|
+
|
94
|
+
end
|
95
|
+
|
39
96
|
def search(q='', location='')
|
40
97
|
|
41
98
|
a = Mechanize.new
|
@@ -47,6 +104,7 @@ class IndeedScraper2022
|
|
47
104
|
pg = form.submit
|
48
105
|
|
49
106
|
doc2 = Nokogiri::XML(pg.body)
|
107
|
+
|
50
108
|
a2 = doc2.xpath "//a[div/div/div/div/table/tbody/tr/td/div]"
|
51
109
|
puts 'a2: ' + a2.length.inspect if @debug
|
52
110
|
|
@@ -90,6 +148,8 @@ class IndeedScraper2022
|
|
90
148
|
date = (Date.today - dateposted.first.to_i).to_s
|
91
149
|
|
92
150
|
{
|
151
|
+
link: @url_base.sub(/\/[^\/]+$/,'') \
|
152
|
+
+ doc.root.attributes[:href].gsub(/&/,'&'),
|
93
153
|
title: jobtitle,
|
94
154
|
salary: salary,
|
95
155
|
company: company_name,
|
@@ -101,3 +161,5 @@ class IndeedScraper2022
|
|
101
161
|
end
|
102
162
|
end
|
103
163
|
end
|
164
|
+
|
165
|
+
|
data.tar.gz.sig
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
�u�qfT���t��\!a�?�-��x���0���}��Ɉ��[Ъ
|
2
|
+
��c�.D���r
|
3
|
+
c�Θ�r�1���$Pp������0aQ�d��ږ�x.��]~M�|Х�Ex�S1���ͼ�e��2:\)Kj7s��J
|
4
|
+
d&�ݙ�堟;[�I�A�v��P'�q~Ub�_�k=>42�i�4�
|
5
|
+
�q���W�?
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: indeed_scraper2022
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
YLGLkwmbiUHX5dRU3RwOwOrZiyvND5BIj7S6dZ6jYHe0I727apgQNc3swTz5mW6I
|
36
36
|
SW/2zInu2bkj/meWm5eBoWHT
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2022-01-
|
38
|
+
date: 2022-01-25 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: nokorexi
|
metadata.gz.sig
CHANGED
Binary file
|