indeed_scraper2022 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5e33dfd54667ecc9f8b7985aa07af403be8d95729ce68e8a40d8c985d57bd4e1
4
- data.tar.gz: a2c041ec8103b6afac3a422e7b73bc82c89fd7f8d955240439a29ec0347c8a5f
3
+ metadata.gz: 972f811430fae59121e39c9c4752b64fc43b37165a52dcec8c3eac42cf1e4555
4
+ data.tar.gz: 85e987eb264b098b4c892e2e05d2ab082e3b8968fff2bdd519552e889f014f9d
5
5
  SHA512:
6
- metadata.gz: 8e640cb8262a057bb588b501ee1122a59e6e239e2a5988dd0566ffffb814a2fef763c36fdeae1ba5dc4e6f819ca145374058bd62373ce776df1e393057a49fc0
7
- data.tar.gz: 0a6bfe0ef2b685d5711a95704cee3fa67d58eb7c9d0f149c872f9c23b0cc489382ab1327b101f7accf982f7ea1f1a6d56dc20ae528ef1fd4d6105c9ef93067da
6
+ metadata.gz: e7d3c2a13e315383248c557806dd0184d8831f46a2e314816395f62fcb886ba2e38a3e1f2deb180ceb33b614cf0b7be8a13379028fc56982e388948575bdb02c
7
+ data.tar.gz: 6ca4792d260c43b22fcee5ead8928525df17f1db112c49ac4cc7d7a5c9b29a8f483b94c18cd39bace1d4a4ee553ff83254a7fb445d76abf06c1705d19bac455c
checksums.yaml.gz.sig CHANGED
Binary file
@@ -4,11 +4,13 @@
4
4
 
5
5
  require 'ferrumwizard'
6
6
  require 'nokorexi'
7
+ require 'yaml'
7
8
 
8
9
  # Given the nature of changes to jobsearch websites,
9
10
  # don't rely upon this gem working in the near future.
10
11
 
11
12
 
13
+
12
14
  class IndeedScraper2022Err < Exception
13
15
  end
14
16
 
@@ -218,14 +220,53 @@ class IS22Plus < IndeedScraper2022
218
220
  debug: debug)
219
221
  end
220
222
 
221
- def archive()
223
+ def archive(filepath='/tmp/indeed')
222
224
 
223
225
  return unless @results
224
226
 
225
- 1.upto(@results.length).each do |n|
226
- page(n)
227
+ FileUtils.mkdir_p filepath
228
+
229
+ idxfile = File.join(filepath, 'index.yml')
230
+
231
+ index = if File.exists? idxfile then
232
+ YAML.load(File.read(idxfile))
233
+ else
234
+ {}
227
235
  end
228
236
 
237
+ @results.each.with_index do |item, i|
238
+
239
+ puts 'saving ' + item[:title] if @debug
240
+ puts 'link: ' + item[:link].inspect
241
+ links = RXFReader.reveal(item[:link])
242
+ puts 'links: ' + links.inspect
243
+
244
+ url = links.last
245
+ id = url[/(?<=\?jk=)[^&]+/]
246
+
247
+ if index[id.to_sym] then
248
+ next
249
+ else
250
+
251
+ File.write File.join(filepath, 'j' + id + '.txt'), page(i+1)
252
+
253
+ h = {
254
+ link: url[/^[^&]+/],
255
+ title: item[:title].to_s,
256
+ salary: item[:salary].to_s,
257
+ company: item[:company].to_s.strip,
258
+ location: item[:location].to_s,
259
+ jobsnippet: item[:jobsnippet],
260
+ date: item[:date]
261
+ }
262
+
263
+ index[id.to_sym] = h
264
+ end
265
+
266
+ end
267
+
268
+ File.write idxfile, index.to_yaml
269
+
229
270
  end
230
271
 
231
272
  def list()
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indeed_scraper2022
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  YLGLkwmbiUHX5dRU3RwOwOrZiyvND5BIj7S6dZ6jYHe0I727apgQNc3swTz5mW6I
36
36
  SW/2zInu2bkj/meWm5eBoWHT
37
37
  -----END CERTIFICATE-----
38
- date: 2022-03-30 00:00:00.000000000 Z
38
+ date: 2022-04-01 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: nokorexi
metadata.gz.sig CHANGED
Binary file