indeed_scraper2022 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0455b8d92dab8642f01c53ce43afc11eb24fbff801cce8df3306fd31eebeb223
4
- data.tar.gz: eb8d24662b095fb0049209534ddfb1f0805454787105e091dc15eb7ca363d1b1
3
+ metadata.gz: 50a484cf1a272522091413129241620336f12ca94d795b7ab132dd6911802d1c
4
+ data.tar.gz: 06dffee1253aa5076da9b6897bc48009687e8df86aa1a3629ee1d8a4432fdd13
5
5
  SHA512:
6
- metadata.gz: 342f6534ef851e12e78bf8e190ea98b437a6503c26510938dcb0021a30033c6be5ae819560a46ab0c11a4b42f833238517bbf9779c35c11e671d3a81ed8eedc9
7
- data.tar.gz: 74be894713e2572bef1777cfafe099cc875d258fad6d741703a7cf00d22863c44cf02a877aa5106d73182d17408a16fc16f6b9538f4507c602e5588062a12334
6
+ metadata.gz: 7327fc5bf9668c4f292eabf673574bfd7ca9fbf180133896c559ac1b7415d4ee880365302d24550aba4b83ee2a709a77bdb059145310e4a5de21840fe11a5058
7
+ data.tar.gz: a84b587275793166a7ac40d63607c8ea2f8dd340a3197ce782f901ca8c27de27b8eeef7f36fc9399aba286ddc399da109b2d14cd9db99b3c6a4a545e6ad9f21c
checksums.yaml.gz.sig CHANGED
Binary file
@@ -25,17 +25,74 @@ class IndeedScraper2022
25
25
  @results
26
26
  end
27
27
 
28
- def page()
29
- end
30
-
31
- # used for debugging
32
- #
33
- def a2()
34
- @a2
28
+ def page(n)
29
+ url = @results[n-1][:link]
30
+ fetchjob(url)
35
31
  end
36
32
 
37
33
  private
38
34
 
35
+ def fetchjob(url)
36
+
37
+ doc = Nokorexi.new(url).to_doc
38
+ e0 = doc.element("html/body/div/div/div/div/div/div/div/div")
39
+
40
+ #div = e0.element("//div[@class='jobsearch-JobComponent']")
41
+ div1 = e0.element("//div[@class='jobsearch-DesktopStickyContainer']")
42
+ div2 = div1.element("div")
43
+
44
+ # jobsearch (e.g. Full Stack Website Developer (Wordpress))
45
+ jobtitle = div2.element("div[@class='jobsearch-JobInfoHead" \
46
+ "er-title-container']/h1[@class='jobsearch-JobInfoHead" \
47
+ "er-title']").text
48
+
49
+ div3 = div2.element("div[@class='jobsearch-CompanyInfoCon" \
50
+ "tainer']/div[@class='jobsearch-CompanyInfoWithoutHead" \
51
+ "erImage']/div/div[@class='jobsearch-DesktopStickyCont" \
52
+ "ainer-subtitle']")
53
+
54
+ # icl (e.g. Lyles Sutherland)
55
+ cname = div3.xpath("div[@class='jobsearch-DesktopSt" \
56
+ "ickyContainer-companyrating']/div/div[@class='icl-u-x" \
57
+ "s-mr--xs']")[1]
58
+ clink = div3.element('//a')
59
+ company = cname ? cname.text : clink.text
60
+ companylink = clink.attributes[:href] if clink
61
+
62
+ div5 = div3.xpath("div/div")
63
+ location, worklocation = div5.map(&:text).compact
64
+
65
+ # icl (e.g. Full-time, Permanent)
66
+ jobtype = div1.element("div/div/div[@class='jobsearch-J" \
67
+ "obMetadataHeader-item']/span[@class='icl-u-xs-mt--xs']")
68
+ jobtype = jobtype.texts.join if jobtype
69
+
70
+ # jobsearch (e.g. Urgently needed)
71
+ jobnote1 = e0.element("//div[@class='jobsearch-DesktopTag" \
72
+ "']/div[@class='urgently-hiring']/div[@class='jobsearc" \
73
+ "h-DesktopTag-text']")
74
+ jobnote1 = jobnote1.text if jobnote1
75
+
76
+ # jobsearch (e.g. 10 days ago)
77
+ datepost = e0.element("//div[@class='jobsearch-JobTab-con" \
78
+ "tent']/div[@class='jobsearch-JobMetadataFooter']/div").text
79
+
80
+ jobdesc = e0.element("//div[@class='icl-u-xs-mt--md']/div[@cl" \
81
+ "ass='jobsearch-jobDescriptionText']")
82
+
83
+ {
84
+ title: jobtitle,
85
+ company: company,
86
+ companylink: companylink,
87
+ location: location,
88
+ worklocation: worklocation,
89
+ note: jobnote1,
90
+ date: (Date.today - datepost.to_i).to_s,
91
+ desc: jobdesc
92
+ }
93
+
94
+ end
95
+
39
96
  def search(q='', location='')
40
97
 
41
98
  a = Mechanize.new
@@ -47,6 +104,7 @@ class IndeedScraper2022
47
104
  pg = form.submit
48
105
 
49
106
  doc2 = Nokogiri::XML(pg.body)
107
+
50
108
  a2 = doc2.xpath "//a[div/div/div/div/table/tbody/tr/td/div]"
51
109
  puts 'a2: ' + a2.length.inspect if @debug
52
110
 
@@ -90,6 +148,8 @@ class IndeedScraper2022
90
148
  date = (Date.today - dateposted.first.to_i).to_s
91
149
 
92
150
  {
151
+ link: @url_base.sub(/\/[^\/]+$/,'') \
152
+ + doc.root.attributes[:href].gsub(/&/,'&'),
93
153
  title: jobtitle,
94
154
  salary: salary,
95
155
  company: company_name,
@@ -101,3 +161,5 @@ class IndeedScraper2022
101
161
  end
102
162
  end
103
163
  end
164
+
165
+
data.tar.gz.sig CHANGED
@@ -1,3 +1,5 @@
1
- o����(-i}
2
- ?*�i��b��S�='IS�x�$`�Wo��LBtM���sK�"����r]O�d��$Y�7[HY(x[OLL&Rc:��惆>q�ڶ�����e4�9��N�v�5g��%��YD.� �۷�@�m� &�IK B_i�ٽt�s��L����\�!���n�]vR�>�)��� MB�ML%�����<#8�_��=]�V� ��>~X$�<Lt����e�-���SKU0d�)�%��&�8���
3
- (]�d��3=�@����I 3�(�dd��<�
1
+ �u �qfT���t��\!a�?�-�� x���0���}��Ɉ��[Ъ
2
+ ��c�.D���r
3
+ c�Θ�r1���$Pp������0aQ�d��ږ�x.��]~M�|Х�Ex�S1���ͼ�e��2:\)Kj7s��J
4
+ d&�ݙ�堟;[�I�A�v��P'�q~Ub�_�k=>42�i�4�
5
+ �q���W�?׾
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indeed_scraper2022
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  YLGLkwmbiUHX5dRU3RwOwOrZiyvND5BIj7S6dZ6jYHe0I727apgQNc3swTz5mW6I
36
36
  SW/2zInu2bkj/meWm5eBoWHT
37
37
  -----END CERTIFICATE-----
38
- date: 2022-01-24 00:00:00.000000000 Z
38
+ date: 2022-01-25 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: nokorexi
metadata.gz.sig CHANGED
Binary file