indeed_scraper2022 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5e33dfd54667ecc9f8b7985aa07af403be8d95729ce68e8a40d8c985d57bd4e1
4
- data.tar.gz: a2c041ec8103b6afac3a422e7b73bc82c89fd7f8d955240439a29ec0347c8a5f
3
+ metadata.gz: 972f811430fae59121e39c9c4752b64fc43b37165a52dcec8c3eac42cf1e4555
4
+ data.tar.gz: 85e987eb264b098b4c892e2e05d2ab082e3b8968fff2bdd519552e889f014f9d
5
5
  SHA512:
6
- metadata.gz: 8e640cb8262a057bb588b501ee1122a59e6e239e2a5988dd0566ffffb814a2fef763c36fdeae1ba5dc4e6f819ca145374058bd62373ce776df1e393057a49fc0
7
- data.tar.gz: 0a6bfe0ef2b685d5711a95704cee3fa67d58eb7c9d0f149c872f9c23b0cc489382ab1327b101f7accf982f7ea1f1a6d56dc20ae528ef1fd4d6105c9ef93067da
6
+ metadata.gz: e7d3c2a13e315383248c557806dd0184d8831f46a2e314816395f62fcb886ba2e38a3e1f2deb180ceb33b614cf0b7be8a13379028fc56982e388948575bdb02c
7
+ data.tar.gz: 6ca4792d260c43b22fcee5ead8928525df17f1db112c49ac4cc7d7a5c9b29a8f483b94c18cd39bace1d4a4ee553ff83254a7fb445d76abf06c1705d19bac455c
checksums.yaml.gz.sig CHANGED
Binary file
@@ -4,11 +4,13 @@
4
4
 
5
5
  require 'ferrumwizard'
6
6
  require 'nokorexi'
7
+ require 'yaml'
7
8
 
8
9
  # Given the nature of changes to jobsearch websites,
9
10
  # don't rely upon this gem working in the near future.
10
11
 
11
12
 
13
+
12
14
  class IndeedScraper2022Err < Exception
13
15
  end
14
16
 
@@ -218,14 +220,53 @@ class IS22Plus < IndeedScraper2022
218
220
  debug: debug)
219
221
  end
220
222
 
221
- def archive()
223
+ def archive(filepath='/tmp/indeed')
222
224
 
223
225
  return unless @results
224
226
 
225
- 1.upto(@results.length).each do |n|
226
- page(n)
227
+ FileUtils.mkdir_p filepath
228
+
229
+ idxfile = File.join(filepath, 'index.yml')
230
+
231
+ index = if File.exists? idxfile then
232
+ YAML.load(File.read(idxfile))
233
+ else
234
+ {}
227
235
  end
228
236
 
237
+ @results.each.with_index do |item, i|
238
+
239
+ puts 'saving ' + item[:title] if @debug
240
+ puts 'link: ' + item[:link].inspect
241
+ links = RXFReader.reveal(item[:link])
242
+ puts 'links: ' + links.inspect
243
+
244
+ url = links.last
245
+ id = url[/(?<=\?jk=)[^&]+/]
246
+
247
+ if index[id.to_sym] then
248
+ next
249
+ else
250
+
251
+ File.write File.join(filepath, 'j' + id + '.txt'), page(i+1)
252
+
253
+ h = {
254
+ link: url[/^[^&]+/],
255
+ title: item[:title].to_s,
256
+ salary: item[:salary].to_s,
257
+ company: item[:company].to_s.strip,
258
+ location: item[:location].to_s,
259
+ jobsnippet: item[:jobsnippet],
260
+ date: item[:date]
261
+ }
262
+
263
+ index[id.to_sym] = h
264
+ end
265
+
266
+ end
267
+
268
+ File.write idxfile, index.to_yaml
269
+
229
270
  end
230
271
 
231
272
  def list()
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indeed_scraper2022
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  YLGLkwmbiUHX5dRU3RwOwOrZiyvND5BIj7S6dZ6jYHe0I727apgQNc3swTz5mW6I
36
36
  SW/2zInu2bkj/meWm5eBoWHT
37
37
  -----END CERTIFICATE-----
38
- date: 2022-03-30 00:00:00.000000000 Z
38
+ date: 2022-04-01 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: nokorexi
metadata.gz.sig CHANGED
Binary file