indeed_scraper2022 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 0455b8d92dab8642f01c53ce43afc11eb24fbff801cce8df3306fd31eebeb223
4
+ data.tar.gz: eb8d24662b095fb0049209534ddfb1f0805454787105e091dc15eb7ca363d1b1
5
+ SHA512:
6
+ metadata.gz: 342f6534ef851e12e78bf8e190ea98b437a6503c26510938dcb0021a30033c6be5ae819560a46ab0c11a4b42f833238517bbf9779c35c11e671d3a81ed8eedc9
7
+ data.tar.gz: 74be894713e2572bef1777cfafe099cc875d258fad6d741703a7cf00d22863c44cf02a877aa5106d73182d17408a16fc16f6b9538f4507c602e5588062a12334
checksums.yaml.gz.sig ADDED
Binary file
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # file: indeed_scraper2022.rb
4
+
5
+ require 'mechanize'
6
+ require 'nokorexi'
7
+
8
+ # Given the nature of changes to jobsearch websites,
9
+ # don't rely upon this gem working in the near future.
10
+
11
+
12
+ class IndeedScraper2022
13
+
14
+ def initialize(url='https://uk.indeed.com/?r=us', q: '', location: '', debug: false)
15
+
16
+ @debug = debug
17
+ @url_base, @q, @location = url, q, location
18
+ @results = search
19
+
20
+ end
21
+
22
+ # returns an array containing the job search result
23
+ #
24
+ def results()
25
+ @results
26
+ end
27
+
28
+ def page()
29
+ end
30
+
31
+ # used for debugging
32
+ #
33
+ def a2()
34
+ @a2
35
+ end
36
+
37
+ private
38
+
39
+ def search(q='', location='')
40
+
41
+ a = Mechanize.new
42
+
43
+ page = a.get(@url_base)
44
+ form = page.forms.first
45
+ form.fields[0].value = @q
46
+ form.fields[1].value = @location
47
+ pg = form.submit
48
+
49
+ doc2 = Nokogiri::XML(pg.body)
50
+ a2 = doc2.xpath "//a[div/div/div/div/table/tbody/tr/td/div]"
51
+ puts 'a2: ' + a2.length.inspect if @debug
52
+
53
+ @a2 = a2.map {|x| Rexle.new x.to_s }
54
+
55
+ @a2.map do |doc|
56
+
57
+ div = doc.element("a[@class='desktop']/div[@class='slider" \
58
+ "_container']/div[@class='slider_list']/div[@class='sl" \
59
+ "ider_item']/div[@class='job_seen_beacon']")
60
+ td = div.element("table[@class='jobCard_mainContent']/tbo" \
61
+ "dy/tr/td[@class='resultContent']")
62
+
63
+ # job title (e.g. Software Developer)
64
+ jobtitle = td.element("div[@class='tapItem-gutter']/h2[@" \
65
+ "class='jobTitle-color-purple']/span").text
66
+ puts 'jobtitle: ' + jobtitle.inspect if @debug
67
+
68
+ salary = td.element("div[@class='metadataContainer']/" \
69
+ "div[@class='salary-snippet-container']/div[@class='sa" \
70
+ "lary-snippet']/span")
71
+ salary = salary.text if salary
72
+ puts 'salary: ' + salary.inspect if @debug
73
+ div1 = td.element("div[@class='companyInfo']")
74
+
75
+ # company name (e.g. Coda Octopus Products Ltd)
76
+ company_name = div1.element("span[@class='companyName']").text
77
+
78
+ # company location (e.g. Edinburgh)
79
+ location = div1.element("div[@class='companyLocation']").text
80
+ tbody = div.element("table[@class='jobCardShelfContainer']/tbody")
81
+
82
+ div3 = tbody.element("tr[@class='underShelfFooter']/td/di" \
83
+ "v[@class='result-footer']")
84
+
85
+ # job (e.g. Our products are primarily written in C#, using...)
86
+ jobsnippet = div3.element("div[@class='job-snippet']/ul/li").text
87
+
88
+ # visually (e.g. Posted 14 days ago)
89
+ dateposted = div3.element("span[@class='date']").texts
90
+ date = (Date.today - dateposted.first.to_i).to_s
91
+
92
+ {
93
+ title: jobtitle,
94
+ salary: salary,
95
+ company: company_name,
96
+ location: location,
97
+ jobsnippet: jobsnippet,
98
+ date: date
99
+ }
100
+
101
+ end
102
+ end
103
+ end
data.tar.gz.sig ADDED
@@ -0,0 +1,3 @@
1
+ o����(-i}
2
+ ?*�i��b��S�='IS�x�$`�Wo��LBtM���sK�"����r]O�d��$Y�7[HY(x[OLL&Rc:��惆>q�ڶ�����e4�9��N�v�5g��%��YD.� �۷�@�m� &�IK B_i�ٽt�s��L����\�!���n�]vR�>�)��� MB�ML%�����<#8�_��=]�V� ��>~X$�<Lt����e�-���SKU0d�)�%��&�8���
3
+ (]�d��3=�@����I 3�(�dd��<�
metadata ADDED
@@ -0,0 +1,111 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: indeed_scraper2022
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - James Robertson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMTI0MTkxMjI1WhcN
15
+ MjMwMTI0MTkxMjI1WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDExD/K
17
+ ccgeJeTOPpYSQXUzgyjX9z9wdK/Y/a6hZw8DIaYRac/4nWKEeCtFFOKJ1DK9hpba
18
+ jVnY5H4ZRtbp/S4ZBimUTgRwlUVbEz/93Nfnf0ZKn7yyCFyfJCedRW/lqU5ACVR7
19
+ DkdsGSXxQAeq87GAuW/ofCIWBB/RCrWsj7JYpDnIg+oLMOm1nBQxmiPPkW1H2zpg
20
+ OvWY6wItQuOxxdk5VBioecASXmQsanLyDyNSycvos24cS0msbbZfs1PuU42HxTS+
21
+ OjfOLRk1F02VA/lTVJCxdJx4u4jcDVJhqFP61suUN/oUFDob/rQhIT8EUITkPLY5
22
+ zhxDzCSZs1q5WlkTQ7MIGB1rgl8YwSfRgOhZtjkSPddK+iSRsME1pen0R6sNctRC
23
+ hsDNFaKAT41ZV/krNSD2lP0p7BUlKZQ6c2Whcw8NlqZkLO85CYrp5lov9EgAA+6M
24
+ bc4YR2Rg9lOwYFKxKVkMNGpymQANedbGJmOHDgA2O+nL9vi6uFD+HaylJx0CAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUC7f4K/R/
26
+ u34HepeoZ/BBZuOx2nQwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
+ c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
+ BgkqhkiG9w0BAQsFAAOCAYEAweTlInWOWdP/OB0U1R5fdHeBOs9I2cn9OqH1I8AZ
29
+ 3VGthsmza7nrrc94hCE1lQWSKwSahFPLY8AQoGibnPknCAuD5MvKSCRn3n8I7som
30
+ fCb2z05iv1ps49aUSLOp9nvQd2ghqIwZ2u7d7vnUvUh3FiG49KttLrnnJh4hrgpx
31
+ oMlT4cog/BtPs3pawgt3nbbTDamb3MTQyIMuo3uSjL867+J4I6vcDpPgCd0lPiS8
32
+ 2ZZuRklW3qzm2Vhi7Uhxtm+POst/W6gix+3Acsx3jk+LQTSRLjvCaOCmY0ooi8D+
33
+ NiLKNdKO0oC8h9MyC+5EOq8Msfti3Vjr1kuFa7bcxzwSIQPFpoo+30V/uAEc0OYQ
34
+ qXLj/YPWi7B+J84HaWwSktqO8O0VYonAywpXTz0TcAv+6gsNF9HorTXPm/KyB6c6
35
+ YLGLkwmbiUHX5dRU3RwOwOrZiyvND5BIj7S6dZ6jYHe0I727apgQNc3swTz5mW6I
36
+ SW/2zInu2bkj/meWm5eBoWHT
37
+ -----END CERTIFICATE-----
38
+ date: 2022-01-24 00:00:00.000000000 Z
39
+ dependencies:
40
+ - !ruby/object:Gem::Dependency
41
+ name: nokorexi
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '0.5'
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 0.5.5
50
+ type: :runtime
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - "~>"
55
+ - !ruby/object:Gem::Version
56
+ version: '0.5'
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: 0.5.5
60
+ - !ruby/object:Gem::Dependency
61
+ name: mechanize
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - "~>"
65
+ - !ruby/object:Gem::Version
66
+ version: '2.8'
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: 2.8.4
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '2.8'
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: 2.8.4
80
+ description:
81
+ email: james@jamesrobertson.eu
82
+ executables: []
83
+ extensions: []
84
+ extra_rdoc_files: []
85
+ files:
86
+ - lib/indeed_scraper2022.rb
87
+ homepage: https://github.com/jrobertson/indeed_scraper2022
88
+ licenses:
89
+ - MIT
90
+ metadata: {}
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubyforge_project:
107
+ rubygems_version: 2.7.10
108
+ signing_key:
109
+ specification_version: 4
110
+ summary: Attempts to scrape the indeed.com jobsearch results (1 page).
111
+ test_files: []
metadata.gz.sig ADDED
Binary file