indeed_scraper2022 0.1.0 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0455b8d92dab8642f01c53ce43afc11eb24fbff801cce8df3306fd31eebeb223
4
- data.tar.gz: eb8d24662b095fb0049209534ddfb1f0805454787105e091dc15eb7ca363d1b1
3
+ metadata.gz: 6bcc9484e0c35971209ee1419b9ff7d07d912831eedf889d10789c2ab4bf0863
4
+ data.tar.gz: c3be84be0f499ed92794a8de0c3109cc99d3ba73f6aef74f9426a12b0a250e10
5
5
  SHA512:
6
- metadata.gz: 342f6534ef851e12e78bf8e190ea98b437a6503c26510938dcb0021a30033c6be5ae819560a46ab0c11a4b42f833238517bbf9779c35c11e671d3a81ed8eedc9
7
- data.tar.gz: 74be894713e2572bef1777cfafe099cc875d258fad6d741703a7cf00d22863c44cf02a877aa5106d73182d17408a16fc16f6b9538f4507c602e5588062a12334
6
+ metadata.gz: 9e6d64b754bbd242f6d038f385602ed4dc5fe2d8112da12709a673a494419a159c9afe2ef4fa6bc95cccd160f9bceb22b03783108ffec213ed96442372092f81
7
+ data.tar.gz: eae4c65e2cd7784aff9c3db394fa29e44344e61a5275a31790a1fc21c57df34204585aa5e9e3db8af28a251cf2366675d4d9027b9a6578456dd9f64744842caa
checksums.yaml.gz.sig CHANGED
Binary file
@@ -9,6 +9,9 @@ require 'nokorexi'
9
9
  # don't rely upon this gem working in the near future.
10
10
 
11
11
 
12
+ class IndeedScraper2022Err < Exception
13
+ end
14
+
12
15
  class IndeedScraper2022
13
16
 
14
17
  def initialize(url='https://uk.indeed.com/?r=us', q: '', location: '', debug: false)
@@ -25,17 +28,78 @@ class IndeedScraper2022
25
28
  @results
26
29
  end
27
30
 
28
- def page()
29
- end
31
+ def page(n)
30
32
 
31
- # used for debugging
32
- #
33
- def a2()
34
- @a2
33
+ if n < 1 or n > @results.length then
34
+ raise IndeedScraper2022Err, 'Invalid page no.'
35
+ end
36
+
37
+ url = @results[n-1][:link]
38
+ fetchjob(url)
35
39
  end
36
40
 
37
41
  private
38
42
 
43
+ def fetchjob(url)
44
+
45
+ doc = Nokorexi.new(url).to_doc
46
+ e0 = doc.element("html/body/div/div/div/div/div/div/div/div")
47
+
48
+ #div = e0.element("//div[@class='jobsearch-JobComponent']")
49
+ div1 = e0.element("//div[@class='jobsearch-DesktopStickyContainer']")
50
+ div2 = div1.element("div")
51
+
52
+ # jobsearch (e.g. Full Stack Website Developer (Wordpress))
53
+ jobtitle = div2.element("div[@class='jobsearch-JobInfoHead" \
54
+ "er-title-container']/h1[@class='jobsearch-JobInfoHead" \
55
+ "er-title']")&.text
56
+
57
+ div3 = div2.element("div[@class='jobsearch-CompanyInfoCon" \
58
+ "tainer']/div[@class='jobsearch-CompanyInfoWithoutHead" \
59
+ "erImage']/div/div[@class='jobsearch-DesktopStickyCont" \
60
+ "ainer-subtitle']")
61
+
62
+ # icl (e.g. Lyles Sutherland)
63
+ cname = div3.xpath("div[@class='jobsearch-DesktopSt" \
64
+ "ickyContainer-companyrating']/div/div[@class='icl-u-x" \
65
+ "s-mr--xs']")[1]
66
+ clink = div3.element('//a')
67
+ company = cname ? cname.text : clink.text
68
+ companylink = clink.attributes[:href] if clink
69
+
70
+ div5 = div3.xpath("div/div")
71
+ location, worklocation = div5.map(&:text).compact
72
+
73
+ # icl (e.g. Full-time, Permanent)
74
+ jobtype = div1.element("div/div/div[@class='jobsearch-J" \
75
+ "obMetadataHeader-item']/span[@class='icl-u-xs-mt--xs']")
76
+ jobtype = jobtype.texts.join if jobtype
77
+
78
+ # jobsearch (e.g. Urgently needed)
79
+ jobnote1 = e0.element("//div[@class='jobsearch-DesktopTag" \
80
+ "']/div[@class='urgently-hiring']/div[@class='jobsearc" \
81
+ "h-DesktopTag-text']")&.text
82
+
83
+ # jobsearch (e.g. 10 days ago)
84
+ datepost = e0.element("//div[@class='jobsearch-JobTab-con" \
85
+ "tent']/div[@class='jobsearch-JobMetadataFooter']/div")&.text
86
+
87
+ jobdesc = e0.element("//div[@class='icl-u-xs-mt--md']/div[@cl" \
88
+ "ass='jobsearch-jobDescriptionText']").xml
89
+
90
+ {
91
+ title: jobtitle,
92
+ company: company,
93
+ companylink: companylink,
94
+ location: location,
95
+ worklocation: worklocation,
96
+ note: jobnote1,
97
+ date: (Date.today - datepost.to_i).to_s,
98
+ desc: jobdesc
99
+ }
100
+
101
+ end
102
+
39
103
  def search(q='', location='')
40
104
 
41
105
  a = Mechanize.new
@@ -47,6 +111,7 @@ class IndeedScraper2022
47
111
  pg = form.submit
48
112
 
49
113
  doc2 = Nokogiri::XML(pg.body)
114
+
50
115
  a2 = doc2.xpath "//a[div/div/div/div/table/tbody/tr/td/div]"
51
116
  puts 'a2: ' + a2.length.inspect if @debug
52
117
 
@@ -62,34 +127,36 @@ class IndeedScraper2022
62
127
 
63
128
  # job title (e.g. Software Developer)
64
129
  jobtitle = td.element("div[@class='tapItem-gutter']/h2[@" \
65
- "class='jobTitle-color-purple']/span").text
130
+ "class='jobTitle-color-purple']/span")&.text
66
131
  puts 'jobtitle: ' + jobtitle.inspect if @debug
67
132
 
68
133
  salary = td.element("div[@class='metadataContainer']/" \
69
134
  "div[@class='salary-snippet-container']/div[@class='sa" \
70
- "lary-snippet']/span")
71
- salary = salary.text if salary
135
+ "lary-snippet']/span")&.text
136
+
72
137
  puts 'salary: ' + salary.inspect if @debug
73
138
  div1 = td.element("div[@class='companyInfo']")
74
139
 
75
140
  # company name (e.g. Coda Octopus Products Ltd)
76
- company_name = div1.element("span[@class='companyName']").text
141
+ company_name = div1.element("span[@class='companyName']")&.text
77
142
 
78
143
  # company location (e.g. Edinburgh)
79
- location = div1.element("div[@class='companyLocation']").text
144
+ location = div1.element("div[@class='companyLocation']")&.text
80
145
  tbody = div.element("table[@class='jobCardShelfContainer']/tbody")
81
146
 
82
147
  div3 = tbody.element("tr[@class='underShelfFooter']/td/di" \
83
148
  "v[@class='result-footer']")
84
149
 
85
150
  # job (e.g. Our products are primarily written in C#, using...)
86
- jobsnippet = div3.element("div[@class='job-snippet']/ul/li").text
151
+ jobsnippet = div3.xpath("div[@class='job-snippet']/ul/li/text()").join("\n")
87
152
 
88
153
  # visually (e.g. Posted 14 days ago)
89
154
  dateposted = div3.element("span[@class='date']").texts
90
155
  date = (Date.today - dateposted.first.to_i).to_s
91
156
 
92
157
  {
158
+ link: @url_base.sub(/\/[^\/]+$/,'') \
159
+ + doc.root.attributes[:href].gsub(/&amp;/,'&'),
93
160
  title: jobtitle,
94
161
  salary: salary,
95
162
  company: company_name,
@@ -101,3 +168,19 @@ class IndeedScraper2022
101
168
  end
102
169
  end
103
170
  end
171
+
172
+ class IS22Plus < IndeedScraper2022
173
+
174
+ def initialize(q: '', location: '', debug: false)
175
+ super(q: q, location: location, debug: debug)
176
+ end
177
+
178
+ def list()
179
+
180
+ @results.map.with_index do |x,i|
181
+ "%2d. %s" % [i+1,x[:title]]
182
+ end.join("\n")
183
+
184
+ end
185
+
186
+ end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indeed_scraper2022
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  YLGLkwmbiUHX5dRU3RwOwOrZiyvND5BIj7S6dZ6jYHe0I727apgQNc3swTz5mW6I
36
36
  SW/2zInu2bkj/meWm5eBoWHT
37
37
  -----END CERTIFICATE-----
38
- date: 2022-01-24 00:00:00.000000000 Z
38
+ date: 2022-03-22 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: nokorexi
@@ -43,20 +43,20 @@ dependencies:
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '0.5'
46
+ version: '0.7'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: 0.5.5
49
+ version: 0.7.0
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - "~>"
55
55
  - !ruby/object:Gem::Version
56
- version: '0.5'
56
+ version: '0.7'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: 0.5.5
59
+ version: 0.7.0
60
60
  - !ruby/object:Gem::Dependency
61
61
  name: mechanize
62
62
  requirement: !ruby/object:Gem::Requirement
@@ -78,7 +78,7 @@ dependencies:
78
78
  - !ruby/object:Gem::Version
79
79
  version: 2.8.4
80
80
  description:
81
- email: james@jamesrobertson.eu
81
+ email: digital.robertson@gmail.com
82
82
  executables: []
83
83
  extensions: []
84
84
  extra_rdoc_files: []
@@ -96,15 +96,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
96
96
  requirements:
97
97
  - - ">="
98
98
  - !ruby/object:Gem::Version
99
- version: '0'
99
+ version: 2.3.0
100
100
  required_rubygems_version: !ruby/object:Gem::Requirement
101
101
  requirements:
102
102
  - - ">="
103
103
  - !ruby/object:Gem::Version
104
104
  version: '0'
105
105
  requirements: []
106
- rubyforge_project:
107
- rubygems_version: 2.7.10
106
+ rubygems_version: 3.2.22
108
107
  signing_key:
109
108
  specification_version: 4
110
109
  summary: Attempts to scrape the indeed.com jobsearch results (1 page).
metadata.gz.sig CHANGED
Binary file