indeed_scraper2022 0.1.0 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0455b8d92dab8642f01c53ce43afc11eb24fbff801cce8df3306fd31eebeb223
4
- data.tar.gz: eb8d24662b095fb0049209534ddfb1f0805454787105e091dc15eb7ca363d1b1
3
+ metadata.gz: 6bcc9484e0c35971209ee1419b9ff7d07d912831eedf889d10789c2ab4bf0863
4
+ data.tar.gz: c3be84be0f499ed92794a8de0c3109cc99d3ba73f6aef74f9426a12b0a250e10
5
5
  SHA512:
6
- metadata.gz: 342f6534ef851e12e78bf8e190ea98b437a6503c26510938dcb0021a30033c6be5ae819560a46ab0c11a4b42f833238517bbf9779c35c11e671d3a81ed8eedc9
7
- data.tar.gz: 74be894713e2572bef1777cfafe099cc875d258fad6d741703a7cf00d22863c44cf02a877aa5106d73182d17408a16fc16f6b9538f4507c602e5588062a12334
6
+ metadata.gz: 9e6d64b754bbd242f6d038f385602ed4dc5fe2d8112da12709a673a494419a159c9afe2ef4fa6bc95cccd160f9bceb22b03783108ffec213ed96442372092f81
7
+ data.tar.gz: eae4c65e2cd7784aff9c3db394fa29e44344e61a5275a31790a1fc21c57df34204585aa5e9e3db8af28a251cf2366675d4d9027b9a6578456dd9f64744842caa
checksums.yaml.gz.sig CHANGED
Binary file
@@ -9,6 +9,9 @@ require 'nokorexi'
9
9
  # don't rely upon this gem working in the near future.
10
10
 
11
11
 
12
+ class IndeedScraper2022Err < Exception
13
+ end
14
+
12
15
  class IndeedScraper2022
13
16
 
14
17
  def initialize(url='https://uk.indeed.com/?r=us', q: '', location: '', debug: false)
@@ -25,17 +28,78 @@ class IndeedScraper2022
25
28
  @results
26
29
  end
27
30
 
28
- def page()
29
- end
31
+ def page(n)
30
32
 
31
- # used for debugging
32
- #
33
- def a2()
34
- @a2
33
+ if n < 1 or n > @results.length then
34
+ raise IndeedScraper2022Err, 'Invalid page no.'
35
+ end
36
+
37
+ url = @results[n-1][:link]
38
+ fetchjob(url)
35
39
  end
36
40
 
37
41
  private
38
42
 
43
+ def fetchjob(url)
44
+
45
+ doc = Nokorexi.new(url).to_doc
46
+ e0 = doc.element("html/body/div/div/div/div/div/div/div/div")
47
+
48
+ #div = e0.element("//div[@class='jobsearch-JobComponent']")
49
+ div1 = e0.element("//div[@class='jobsearch-DesktopStickyContainer']")
50
+ div2 = div1.element("div")
51
+
52
+ # jobsearch (e.g. Full Stack Website Developer (Wordpress))
53
+ jobtitle = div2.element("div[@class='jobsearch-JobInfoHead" \
54
+ "er-title-container']/h1[@class='jobsearch-JobInfoHead" \
55
+ "er-title']")&.text
56
+
57
+ div3 = div2.element("div[@class='jobsearch-CompanyInfoCon" \
58
+ "tainer']/div[@class='jobsearch-CompanyInfoWithoutHead" \
59
+ "erImage']/div/div[@class='jobsearch-DesktopStickyCont" \
60
+ "ainer-subtitle']")
61
+
62
+ # icl (e.g. Lyles Sutherland)
63
+ cname = div3.xpath("div[@class='jobsearch-DesktopSt" \
64
+ "ickyContainer-companyrating']/div/div[@class='icl-u-x" \
65
+ "s-mr--xs']")[1]
66
+ clink = div3.element('//a')
67
+ company = cname ? cname.text : clink.text
68
+ companylink = clink.attributes[:href] if clink
69
+
70
+ div5 = div3.xpath("div/div")
71
+ location, worklocation = div5.map(&:text).compact
72
+
73
+ # icl (e.g. Full-time, Permanent)
74
+ jobtype = div1.element("div/div/div[@class='jobsearch-J" \
75
+ "obMetadataHeader-item']/span[@class='icl-u-xs-mt--xs']")
76
+ jobtype = jobtype.texts.join if jobtype
77
+
78
+ # jobsearch (e.g. Urgently needed)
79
+ jobnote1 = e0.element("//div[@class='jobsearch-DesktopTag" \
80
+ "']/div[@class='urgently-hiring']/div[@class='jobsearc" \
81
+ "h-DesktopTag-text']")&.text
82
+
83
+ # jobsearch (e.g. 10 days ago)
84
+ datepost = e0.element("//div[@class='jobsearch-JobTab-con" \
85
+ "tent']/div[@class='jobsearch-JobMetadataFooter']/div")&.text
86
+
87
+ jobdesc = e0.element("//div[@class='icl-u-xs-mt--md']/div[@cl" \
88
+ "ass='jobsearch-jobDescriptionText']").xml
89
+
90
+ {
91
+ title: jobtitle,
92
+ company: company,
93
+ companylink: companylink,
94
+ location: location,
95
+ worklocation: worklocation,
96
+ note: jobnote1,
97
+ date: (Date.today - datepost.to_i).to_s,
98
+ desc: jobdesc
99
+ }
100
+
101
+ end
102
+
39
103
  def search(q='', location='')
40
104
 
41
105
  a = Mechanize.new
@@ -47,6 +111,7 @@ class IndeedScraper2022
47
111
  pg = form.submit
48
112
 
49
113
  doc2 = Nokogiri::XML(pg.body)
114
+
50
115
  a2 = doc2.xpath "//a[div/div/div/div/table/tbody/tr/td/div]"
51
116
  puts 'a2: ' + a2.length.inspect if @debug
52
117
 
@@ -62,34 +127,36 @@ class IndeedScraper2022
62
127
 
63
128
  # job title (e.g. Software Developer)
64
129
  jobtitle = td.element("div[@class='tapItem-gutter']/h2[@" \
65
- "class='jobTitle-color-purple']/span").text
130
+ "class='jobTitle-color-purple']/span")&.text
66
131
  puts 'jobtitle: ' + jobtitle.inspect if @debug
67
132
 
68
133
  salary = td.element("div[@class='metadataContainer']/" \
69
134
  "div[@class='salary-snippet-container']/div[@class='sa" \
70
- "lary-snippet']/span")
71
- salary = salary.text if salary
135
+ "lary-snippet']/span")&.text
136
+
72
137
  puts 'salary: ' + salary.inspect if @debug
73
138
  div1 = td.element("div[@class='companyInfo']")
74
139
 
75
140
  # company name (e.g. Coda Octopus Products Ltd)
76
- company_name = div1.element("span[@class='companyName']").text
141
+ company_name = div1.element("span[@class='companyName']")&.text
77
142
 
78
143
  # company location (e.g. Edinburgh)
79
- location = div1.element("div[@class='companyLocation']").text
144
+ location = div1.element("div[@class='companyLocation']")&.text
80
145
  tbody = div.element("table[@class='jobCardShelfContainer']/tbody")
81
146
 
82
147
  div3 = tbody.element("tr[@class='underShelfFooter']/td/di" \
83
148
  "v[@class='result-footer']")
84
149
 
85
150
  # job (e.g. Our products are primarily written in C#, using...)
86
- jobsnippet = div3.element("div[@class='job-snippet']/ul/li").text
151
+ jobsnippet = div3.xpath("div[@class='job-snippet']/ul/li/text()").join("\n")
87
152
 
88
153
  # visually (e.g. Posted 14 days ago)
89
154
  dateposted = div3.element("span[@class='date']").texts
90
155
  date = (Date.today - dateposted.first.to_i).to_s
91
156
 
92
157
  {
158
+ link: @url_base.sub(/\/[^\/]+$/,'') \
159
+ + doc.root.attributes[:href].gsub(/&amp;/,'&'),
93
160
  title: jobtitle,
94
161
  salary: salary,
95
162
  company: company_name,
@@ -101,3 +168,19 @@ class IndeedScraper2022
101
168
  end
102
169
  end
103
170
  end
171
+
172
+ class IS22Plus < IndeedScraper2022
173
+
174
+ def initialize(q: '', location: '', debug: false)
175
+ super(q: q, location: location, debug: debug)
176
+ end
177
+
178
+ def list()
179
+
180
+ @results.map.with_index do |x,i|
181
+ "%2d. %s" % [i+1,x[:title]]
182
+ end.join("\n")
183
+
184
+ end
185
+
186
+ end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indeed_scraper2022
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  YLGLkwmbiUHX5dRU3RwOwOrZiyvND5BIj7S6dZ6jYHe0I727apgQNc3swTz5mW6I
36
36
  SW/2zInu2bkj/meWm5eBoWHT
37
37
  -----END CERTIFICATE-----
38
- date: 2022-01-24 00:00:00.000000000 Z
38
+ date: 2022-03-22 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: nokorexi
@@ -43,20 +43,20 @@ dependencies:
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '0.5'
46
+ version: '0.7'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: 0.5.5
49
+ version: 0.7.0
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - "~>"
55
55
  - !ruby/object:Gem::Version
56
- version: '0.5'
56
+ version: '0.7'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: 0.5.5
59
+ version: 0.7.0
60
60
  - !ruby/object:Gem::Dependency
61
61
  name: mechanize
62
62
  requirement: !ruby/object:Gem::Requirement
@@ -78,7 +78,7 @@ dependencies:
78
78
  - !ruby/object:Gem::Version
79
79
  version: 2.8.4
80
80
  description:
81
- email: james@jamesrobertson.eu
81
+ email: digital.robertson@gmail.com
82
82
  executables: []
83
83
  extensions: []
84
84
  extra_rdoc_files: []
@@ -96,15 +96,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
96
96
  requirements:
97
97
  - - ">="
98
98
  - !ruby/object:Gem::Version
99
- version: '0'
99
+ version: 2.3.0
100
100
  required_rubygems_version: !ruby/object:Gem::Requirement
101
101
  requirements:
102
102
  - - ">="
103
103
  - !ruby/object:Gem::Version
104
104
  version: '0'
105
105
  requirements: []
106
- rubyforge_project:
107
- rubygems_version: 2.7.10
106
+ rubygems_version: 3.2.22
108
107
  signing_key:
109
108
  specification_version: 4
110
109
  summary: Attempts to scrape the indeed.com jobsearch results (1 page).
metadata.gz.sig CHANGED
Binary file