indeed_scraper2022 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0455b8d92dab8642f01c53ce43afc11eb24fbff801cce8df3306fd31eebeb223
4
- data.tar.gz: eb8d24662b095fb0049209534ddfb1f0805454787105e091dc15eb7ca363d1b1
3
+ metadata.gz: 50a484cf1a272522091413129241620336f12ca94d795b7ab132dd6911802d1c
4
+ data.tar.gz: 06dffee1253aa5076da9b6897bc48009687e8df86aa1a3629ee1d8a4432fdd13
5
5
  SHA512:
6
- metadata.gz: 342f6534ef851e12e78bf8e190ea98b437a6503c26510938dcb0021a30033c6be5ae819560a46ab0c11a4b42f833238517bbf9779c35c11e671d3a81ed8eedc9
7
- data.tar.gz: 74be894713e2572bef1777cfafe099cc875d258fad6d741703a7cf00d22863c44cf02a877aa5106d73182d17408a16fc16f6b9538f4507c602e5588062a12334
6
+ metadata.gz: 7327fc5bf9668c4f292eabf673574bfd7ca9fbf180133896c559ac1b7415d4ee880365302d24550aba4b83ee2a709a77bdb059145310e4a5de21840fe11a5058
7
+ data.tar.gz: a84b587275793166a7ac40d63607c8ea2f8dd340a3197ce782f901ca8c27de27b8eeef7f36fc9399aba286ddc399da109b2d14cd9db99b3c6a4a545e6ad9f21c
checksums.yaml.gz.sig CHANGED
Binary file
@@ -25,17 +25,74 @@ class IndeedScraper2022
25
25
  @results
26
26
  end
27
27
 
28
- def page()
29
- end
30
-
31
- # used for debugging
32
- #
33
- def a2()
34
- @a2
28
+ def page(n)
29
+ url = @results[n-1][:link]
30
+ fetchjob(url)
35
31
  end
36
32
 
37
33
  private
38
34
 
35
+ def fetchjob(url)
36
+
37
+ doc = Nokorexi.new(url).to_doc
38
+ e0 = doc.element("html/body/div/div/div/div/div/div/div/div")
39
+
40
+ #div = e0.element("//div[@class='jobsearch-JobComponent']")
41
+ div1 = e0.element("//div[@class='jobsearch-DesktopStickyContainer']")
42
+ div2 = div1.element("div")
43
+
44
+ # jobsearch (e.g. Full Stack Website Developer (Wordpress))
45
+ jobtitle = div2.element("div[@class='jobsearch-JobInfoHead" \
46
+ "er-title-container']/h1[@class='jobsearch-JobInfoHead" \
47
+ "er-title']").text
48
+
49
+ div3 = div2.element("div[@class='jobsearch-CompanyInfoCon" \
50
+ "tainer']/div[@class='jobsearch-CompanyInfoWithoutHead" \
51
+ "erImage']/div/div[@class='jobsearch-DesktopStickyCont" \
52
+ "ainer-subtitle']")
53
+
54
+ # icl (e.g. Lyles Sutherland)
55
+ cname = div3.xpath("div[@class='jobsearch-DesktopSt" \
56
+ "ickyContainer-companyrating']/div/div[@class='icl-u-x" \
57
+ "s-mr--xs']")[1]
58
+ clink = div3.element('//a')
59
+ company = cname ? cname.text : clink.text
60
+ companylink = clink.attributes[:href] if clink
61
+
62
+ div5 = div3.xpath("div/div")
63
+ location, worklocation = div5.map(&:text).compact
64
+
65
+ # icl (e.g. Full-time, Permanent)
66
+ jobtype = div1.element("div/div/div[@class='jobsearch-J" \
67
+ "obMetadataHeader-item']/span[@class='icl-u-xs-mt--xs']")
68
+ jobtype = jobtype.texts.join if jobtype
69
+
70
+ # jobsearch (e.g. Urgently needed)
71
+ jobnote1 = e0.element("//div[@class='jobsearch-DesktopTag" \
72
+ "']/div[@class='urgently-hiring']/div[@class='jobsearc" \
73
+ "h-DesktopTag-text']")
74
+ jobnote1 = jobnote1.text if jobnote1
75
+
76
+ # jobsearch (e.g. 10 days ago)
77
+ datepost = e0.element("//div[@class='jobsearch-JobTab-con" \
78
+ "tent']/div[@class='jobsearch-JobMetadataFooter']/div").text
79
+
80
+ jobdesc = e0.element("//div[@class='icl-u-xs-mt--md']/div[@cl" \
81
+ "ass='jobsearch-jobDescriptionText']")
82
+
83
+ {
84
+ title: jobtitle,
85
+ company: company,
86
+ companylink: companylink,
87
+ location: location,
88
+ worklocation: worklocation,
89
+ note: jobnote1,
90
+ date: (Date.today - datepost.to_i).to_s,
91
+ desc: jobdesc
92
+ }
93
+
94
+ end
95
+
39
96
  def search(q='', location='')
40
97
 
41
98
  a = Mechanize.new
@@ -47,6 +104,7 @@ class IndeedScraper2022
47
104
  pg = form.submit
48
105
 
49
106
  doc2 = Nokogiri::XML(pg.body)
107
+
50
108
  a2 = doc2.xpath "//a[div/div/div/div/table/tbody/tr/td/div]"
51
109
  puts 'a2: ' + a2.length.inspect if @debug
52
110
 
@@ -90,6 +148,8 @@ class IndeedScraper2022
90
148
  date = (Date.today - dateposted.first.to_i).to_s
91
149
 
92
150
  {
151
+ link: @url_base.sub(/\/[^\/]+$/,'') \
152
+ + doc.root.attributes[:href].gsub(/&/,'&'),
93
153
  title: jobtitle,
94
154
  salary: salary,
95
155
  company: company_name,
@@ -101,3 +161,5 @@ class IndeedScraper2022
101
161
  end
102
162
  end
103
163
  end
164
+
165
+
data.tar.gz.sig CHANGED
@@ -1,3 +1,5 @@
1
- o����(-i}
2
- ?*�i��b��S�='IS�x�$`�Wo��LBtM���sK�"����r]O�d��$Y�7[HY(x[OLL&Rc:��惆>q�ڶ�����e4�9��N�v�5g��%��YD.� �۷�@�m� &�IK B_i�ٽt�s��L����\�!���n�]vR�>�)��� MB�ML%�����<#8�_��=]�V� ��>~X$�<Lt����e�-���SKU0d�)�%��&�8���
3
- (]�d��3=�@����I 3�(�dd��<�
1
+ �u �qfT���t��\!a�?�-�� x���0���}��Ɉ��[Ъ
2
+ ��c�.D���r
3
+ c�Θ�r1���$Pp������0aQ�d��ږ�x.��]~M�|Х�Ex�S1���ͼ�e��2:\)Kj7s��J
4
+ d&�ݙ�堟;[�I�A�v��P'�q~Ub�_�k=>42�i�4�
5
+ �q���W�?׾
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indeed_scraper2022
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  YLGLkwmbiUHX5dRU3RwOwOrZiyvND5BIj7S6dZ6jYHe0I727apgQNc3swTz5mW6I
36
36
  SW/2zInu2bkj/meWm5eBoWHT
37
37
  -----END CERTIFICATE-----
38
- date: 2022-01-24 00:00:00.000000000 Z
38
+ date: 2022-01-25 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: nokorexi
metadata.gz.sig CHANGED
Binary file