indeed_scraper2022 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 0455b8d92dab8642f01c53ce43afc11eb24fbff801cce8df3306fd31eebeb223
4
+ data.tar.gz: eb8d24662b095fb0049209534ddfb1f0805454787105e091dc15eb7ca363d1b1
5
+ SHA512:
6
+ metadata.gz: 342f6534ef851e12e78bf8e190ea98b437a6503c26510938dcb0021a30033c6be5ae819560a46ab0c11a4b42f833238517bbf9779c35c11e671d3a81ed8eedc9
7
+ data.tar.gz: 74be894713e2572bef1777cfafe099cc875d258fad6d741703a7cf00d22863c44cf02a877aa5106d73182d17408a16fc16f6b9538f4507c602e5588062a12334
checksums.yaml.gz.sig ADDED
Binary file
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # file: indeed_scraper2022.rb
4
+
5
+ require 'mechanize'
6
+ require 'nokorexi'
7
+
8
+ # Given the nature of changes to jobsearch websites,
9
+ # don't rely upon this gem working in the near future.
10
+
11
+
12
+ class IndeedScraper2022
13
+
14
+ def initialize(url='https://uk.indeed.com/?r=us', q: '', location: '', debug: false)
15
+
16
+ @debug = debug
17
+ @url_base, @q, @location = url, q, location
18
+ @results = search
19
+
20
+ end
21
+
22
+ # returns an array containing the job search result
23
+ #
24
+ def results()
25
+ @results
26
+ end
27
+
28
+ def page()
29
+ end
30
+
31
+ # used for debugging
32
+ #
33
+ def a2()
34
+ @a2
35
+ end
36
+
37
+ private
38
+
39
+ def search(q='', location='')
40
+
41
+ a = Mechanize.new
42
+
43
+ page = a.get(@url_base)
44
+ form = page.forms.first
45
+ form.fields[0].value = @q
46
+ form.fields[1].value = @location
47
+ pg = form.submit
48
+
49
+ doc2 = Nokogiri::XML(pg.body)
50
+ a2 = doc2.xpath "//a[div/div/div/div/table/tbody/tr/td/div]"
51
+ puts 'a2: ' + a2.length.inspect if @debug
52
+
53
+ @a2 = a2.map {|x| Rexle.new x.to_s }
54
+
55
+ @a2.map do |doc|
56
+
57
+ div = doc.element("a[@class='desktop']/div[@class='slider" \
58
+ "_container']/div[@class='slider_list']/div[@class='sl" \
59
+ "ider_item']/div[@class='job_seen_beacon']")
60
+ td = div.element("table[@class='jobCard_mainContent']/tbo" \
61
+ "dy/tr/td[@class='resultContent']")
62
+
63
+ # job title (e.g. Software Developer)
64
+ jobtitle = td.element("div[@class='tapItem-gutter']/h2[@" \
65
+ "class='jobTitle-color-purple']/span").text
66
+ puts 'jobtitle: ' + jobtitle.inspect if @debug
67
+
68
+ salary = td.element("div[@class='metadataContainer']/" \
69
+ "div[@class='salary-snippet-container']/div[@class='sa" \
70
+ "lary-snippet']/span")
71
+ salary = salary.text if salary
72
+ puts 'salary: ' + salary.inspect if @debug
73
+ div1 = td.element("div[@class='companyInfo']")
74
+
75
+ # company name (e.g. Coda Octopus Products Ltd)
76
+ company_name = div1.element("span[@class='companyName']").text
77
+
78
+ # company location (e.g. Edinburgh)
79
+ location = div1.element("div[@class='companyLocation']").text
80
+ tbody = div.element("table[@class='jobCardShelfContainer']/tbody")
81
+
82
+ div3 = tbody.element("tr[@class='underShelfFooter']/td/di" \
83
+ "v[@class='result-footer']")
84
+
85
+ # job (e.g. Our products are primarily written in C#, using...)
86
+ jobsnippet = div3.element("div[@class='job-snippet']/ul/li").text
87
+
88
+ # visually (e.g. Posted 14 days ago)
89
+ dateposted = div3.element("span[@class='date']").texts
90
+ date = (Date.today - dateposted.first.to_i).to_s
91
+
92
+ {
93
+ title: jobtitle,
94
+ salary: salary,
95
+ company: company_name,
96
+ location: location,
97
+ jobsnippet: jobsnippet,
98
+ date: date
99
+ }
100
+
101
+ end
102
+ end
103
+ end
data.tar.gz.sig ADDED
@@ -0,0 +1,3 @@
1
+ o����(-i}
2
+ ?*�i��b��S�='IS�x�$`�Wo��LBtM���sK�"����r]O�d��$Y�7[HY(x[OLL&Rc:��惆>q�ڶ�����e4�9��N�v�5g��%��YD.� �۷�@�m� &�IK B_i�ٽt�s��L����\�!���n�]vR�>�)��� MB�ML%�����<#8�_��=]�V� ��>~X$�<Lt����e�-���SKU0d�)�%��&�8���
3
+ (]�d��3=�@����I 3�(�dd��<�
metadata ADDED
@@ -0,0 +1,111 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: indeed_scraper2022
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - James Robertson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMTI0MTkxMjI1WhcN
15
+ MjMwMTI0MTkxMjI1WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDExD/K
17
+ ccgeJeTOPpYSQXUzgyjX9z9wdK/Y/a6hZw8DIaYRac/4nWKEeCtFFOKJ1DK9hpba
18
+ jVnY5H4ZRtbp/S4ZBimUTgRwlUVbEz/93Nfnf0ZKn7yyCFyfJCedRW/lqU5ACVR7
19
+ DkdsGSXxQAeq87GAuW/ofCIWBB/RCrWsj7JYpDnIg+oLMOm1nBQxmiPPkW1H2zpg
20
+ OvWY6wItQuOxxdk5VBioecASXmQsanLyDyNSycvos24cS0msbbZfs1PuU42HxTS+
21
+ OjfOLRk1F02VA/lTVJCxdJx4u4jcDVJhqFP61suUN/oUFDob/rQhIT8EUITkPLY5
22
+ zhxDzCSZs1q5WlkTQ7MIGB1rgl8YwSfRgOhZtjkSPddK+iSRsME1pen0R6sNctRC
23
+ hsDNFaKAT41ZV/krNSD2lP0p7BUlKZQ6c2Whcw8NlqZkLO85CYrp5lov9EgAA+6M
24
+ bc4YR2Rg9lOwYFKxKVkMNGpymQANedbGJmOHDgA2O+nL9vi6uFD+HaylJx0CAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUC7f4K/R/
26
+ u34HepeoZ/BBZuOx2nQwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
+ c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
+ BgkqhkiG9w0BAQsFAAOCAYEAweTlInWOWdP/OB0U1R5fdHeBOs9I2cn9OqH1I8AZ
29
+ 3VGthsmza7nrrc94hCE1lQWSKwSahFPLY8AQoGibnPknCAuD5MvKSCRn3n8I7som
30
+ fCb2z05iv1ps49aUSLOp9nvQd2ghqIwZ2u7d7vnUvUh3FiG49KttLrnnJh4hrgpx
31
+ oMlT4cog/BtPs3pawgt3nbbTDamb3MTQyIMuo3uSjL867+J4I6vcDpPgCd0lPiS8
32
+ 2ZZuRklW3qzm2Vhi7Uhxtm+POst/W6gix+3Acsx3jk+LQTSRLjvCaOCmY0ooi8D+
33
+ NiLKNdKO0oC8h9MyC+5EOq8Msfti3Vjr1kuFa7bcxzwSIQPFpoo+30V/uAEc0OYQ
34
+ qXLj/YPWi7B+J84HaWwSktqO8O0VYonAywpXTz0TcAv+6gsNF9HorTXPm/KyB6c6
35
+ YLGLkwmbiUHX5dRU3RwOwOrZiyvND5BIj7S6dZ6jYHe0I727apgQNc3swTz5mW6I
36
+ SW/2zInu2bkj/meWm5eBoWHT
37
+ -----END CERTIFICATE-----
38
+ date: 2022-01-24 00:00:00.000000000 Z
39
+ dependencies:
40
+ - !ruby/object:Gem::Dependency
41
+ name: nokorexi
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '0.5'
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 0.5.5
50
+ type: :runtime
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - "~>"
55
+ - !ruby/object:Gem::Version
56
+ version: '0.5'
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: 0.5.5
60
+ - !ruby/object:Gem::Dependency
61
+ name: mechanize
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - "~>"
65
+ - !ruby/object:Gem::Version
66
+ version: '2.8'
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: 2.8.4
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '2.8'
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: 2.8.4
80
+ description:
81
+ email: james@jamesrobertson.eu
82
+ executables: []
83
+ extensions: []
84
+ extra_rdoc_files: []
85
+ files:
86
+ - lib/indeed_scraper2022.rb
87
+ homepage: https://github.com/jrobertson/indeed_scraper2022
88
+ licenses:
89
+ - MIT
90
+ metadata: {}
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubyforge_project:
107
+ rubygems_version: 2.7.10
108
+ signing_key:
109
+ specification_version: 4
110
+ summary: Attempts to scrape the indeed.com jobsearch results (1 page).
111
+ test_files: []
metadata.gz.sig ADDED
Binary file