indeed_scraper2022 0.5.1 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/indeed_scraper2022.rb +68 -4
- data.tar.gz.sig +0 -0
- metadata +23 -3
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1cf994e7ea8c7fd89ad96e55d2d3f2d3908b8c565e3f07bad5056e4d63b90b65
|
4
|
+
data.tar.gz: eff54d2290a32a6a78cd31646ceef2736d329b40ea327f72f7a5eb4594dd3fe2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4da62ce0099ee02985981f8f166e371a4b05388998c94b5b5d3e33637e7dc649cc7aefbbeb19e002d777634b0bef9222563aad552c89d575757633c7764b5eb9
|
7
|
+
data.tar.gz: dc4a0bcdb86bef4925880c95c5b558de5e1c4cff0756e134f8502be97e0dc2a16e4e38fd7217db6b9994a02c2384956c90ec7a66913b8baaf5fbdfe92d46b322
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/lib/indeed_scraper2022.rb
CHANGED
@@ -262,7 +262,7 @@ class IS22Plus < IndeedScraper2022
|
|
262
262
|
|
263
263
|
def initialize(q: '', location: '', headless: true, cookies: nil, debug: false)
|
264
264
|
super(q: q, location: location, headless: headless, cookies: cookies,
|
265
|
-
debug:
|
265
|
+
debug: debug)
|
266
266
|
end
|
267
267
|
|
268
268
|
# note: The most efficient method to accumulate vacancy articles is to
|
@@ -342,16 +342,17 @@ end
|
|
342
342
|
|
343
343
|
|
344
344
|
class IS22Archive
|
345
|
+
include RXFReadWriteModule
|
345
346
|
|
346
347
|
attr_reader :index
|
347
348
|
|
348
349
|
def initialize(filepath='/tmp/indeed', debug: false)
|
349
350
|
|
350
|
-
|
351
|
+
FileX.mkdir_p filepath
|
351
352
|
@idxfile = File.join(filepath, 'index.yml')
|
352
353
|
|
353
|
-
@index = if
|
354
|
-
YAML.load(
|
354
|
+
@index = if FileX.exists? @idxfile then
|
355
|
+
YAML.load(FileX.read(@idxfile))
|
355
356
|
else
|
356
357
|
{}
|
357
358
|
end
|
@@ -372,4 +373,67 @@ class IS22Archive
|
|
372
373
|
|
373
374
|
end
|
374
375
|
|
376
|
+
def to_html()
|
377
|
+
|
378
|
+
rows = latest().map do |h|
|
379
|
+
|
380
|
+
puts 'h: ' + h.inspect if @debug
|
381
|
+
co = h[:company].length > 1 ? " (%s)" % h[:company] : ''
|
382
|
+
"* %s: [%s](%s)%s" % [h[:added].strftime("%d %b"), h[:title], h[:link], co]
|
383
|
+
|
384
|
+
end.join("\n")
|
385
|
+
|
386
|
+
|
387
|
+
md = '# Indeed.com: Latest jobs
|
388
|
+
|
389
|
+
' + rows
|
390
|
+
|
391
|
+
RDiscount.new(md).to_html
|
392
|
+
|
393
|
+
end
|
394
|
+
|
395
|
+
def to_form(action: '')
|
396
|
+
|
397
|
+
rows = latest().map.with_index do |h, i|
|
398
|
+
|
399
|
+
co = h[:company].length > 1 ? " (%s)" % h[:company] : ''
|
400
|
+
|
401
|
+
"<input type='checkbox' id='#{h[:jobid]}' name='#{h[:jobid]}' value='#{h[:title]}'/>
|
402
|
+
<label for='j#{i}'>#{h[:added].strftime("%d %b")}: #{h[:title] + ' ' + co}</label><br/>
|
403
|
+
"
|
404
|
+
|
405
|
+
end.join("\n")
|
406
|
+
|
407
|
+
|
408
|
+
return "<form action='#{action}'>#{rows}" +
|
409
|
+
"<input type='submit' value='submit'/></form>"
|
410
|
+
|
411
|
+
end
|
412
|
+
|
413
|
+
def filter(a)
|
414
|
+
|
415
|
+
dx = Dynarex.new
|
416
|
+
a2 = latest().select {|h| a.include? h[:jobid] }
|
417
|
+
dx.import a2
|
418
|
+
|
419
|
+
return dx
|
420
|
+
end
|
421
|
+
|
422
|
+
private
|
423
|
+
|
424
|
+
def latest()
|
425
|
+
|
426
|
+
a = @index.to_a.map do |id, h|
|
427
|
+
h[:jobid] = id
|
428
|
+
h[:added] = Date.parse(h[:added]) if h[:added].is_a? String
|
429
|
+
h
|
430
|
+
end
|
431
|
+
|
432
|
+
a.select do |x|
|
433
|
+
x[:added] >= (Date.today - 7)
|
434
|
+
end.reverse
|
435
|
+
|
436
|
+
end
|
437
|
+
|
375
438
|
end
|
439
|
+
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: indeed_scraper2022
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
YLGLkwmbiUHX5dRU3RwOwOrZiyvND5BIj7S6dZ6jYHe0I727apgQNc3swTz5mW6I
|
36
36
|
SW/2zInu2bkj/meWm5eBoWHT
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2022-05-
|
38
|
+
date: 2022-05-25 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: nokorexi
|
@@ -78,7 +78,7 @@ dependencies:
|
|
78
78
|
- !ruby/object:Gem::Version
|
79
79
|
version: 0.3.1
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
|
-
name:
|
81
|
+
name: url_reveal22
|
82
82
|
requirement: !ruby/object:Gem::Requirement
|
83
83
|
requirements:
|
84
84
|
- - "~>"
|
@@ -97,6 +97,26 @@ dependencies:
|
|
97
97
|
- - ">="
|
98
98
|
- !ruby/object:Gem::Version
|
99
99
|
version: 0.1.0
|
100
|
+
- !ruby/object:Gem::Dependency
|
101
|
+
name: dynarex
|
102
|
+
requirement: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - "~>"
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '1.9'
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.9.11
|
110
|
+
type: :runtime
|
111
|
+
prerelease: false
|
112
|
+
version_requirements: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - "~>"
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '1.9'
|
117
|
+
- - ">="
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: 1.9.11
|
100
120
|
description:
|
101
121
|
email: digital.robertson@gmail.com
|
102
122
|
executables: []
|
metadata.gz.sig
CHANGED
Binary file
|