indeed_scraper2022 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/indeed_scraper2022.rb +68 -4
- data.tar.gz.sig +0 -0
- metadata +23 -3
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1cf994e7ea8c7fd89ad96e55d2d3f2d3908b8c565e3f07bad5056e4d63b90b65
|
4
|
+
data.tar.gz: eff54d2290a32a6a78cd31646ceef2736d329b40ea327f72f7a5eb4594dd3fe2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4da62ce0099ee02985981f8f166e371a4b05388998c94b5b5d3e33637e7dc649cc7aefbbeb19e002d777634b0bef9222563aad552c89d575757633c7764b5eb9
|
7
|
+
data.tar.gz: dc4a0bcdb86bef4925880c95c5b558de5e1c4cff0756e134f8502be97e0dc2a16e4e38fd7217db6b9994a02c2384956c90ec7a66913b8baaf5fbdfe92d46b322
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/lib/indeed_scraper2022.rb
CHANGED
@@ -262,7 +262,7 @@ class IS22Plus < IndeedScraper2022
|
|
262
262
|
|
263
263
|
def initialize(q: '', location: '', headless: true, cookies: nil, debug: false)
|
264
264
|
super(q: q, location: location, headless: headless, cookies: cookies,
|
265
|
-
debug:
|
265
|
+
debug: debug)
|
266
266
|
end
|
267
267
|
|
268
268
|
# note: The most efficient method to accumulate vacancy articles is to
|
@@ -342,16 +342,17 @@ end
|
|
342
342
|
|
343
343
|
|
344
344
|
class IS22Archive
|
345
|
+
include RXFReadWriteModule
|
345
346
|
|
346
347
|
attr_reader :index
|
347
348
|
|
348
349
|
def initialize(filepath='/tmp/indeed', debug: false)
|
349
350
|
|
350
|
-
|
351
|
+
FileX.mkdir_p filepath
|
351
352
|
@idxfile = File.join(filepath, 'index.yml')
|
352
353
|
|
353
|
-
@index = if
|
354
|
-
YAML.load(
|
354
|
+
@index = if FileX.exists? @idxfile then
|
355
|
+
YAML.load(FileX.read(@idxfile))
|
355
356
|
else
|
356
357
|
{}
|
357
358
|
end
|
@@ -372,4 +373,67 @@ class IS22Archive
|
|
372
373
|
|
373
374
|
end
|
374
375
|
|
376
|
+
def to_html()
|
377
|
+
|
378
|
+
rows = latest().map do |h|
|
379
|
+
|
380
|
+
puts 'h: ' + h.inspect if @debug
|
381
|
+
co = h[:company].length > 1 ? " (%s)" % h[:company] : ''
|
382
|
+
"* %s: [%s](%s)%s" % [h[:added].strftime("%d %b"), h[:title], h[:link], co]
|
383
|
+
|
384
|
+
end.join("\n")
|
385
|
+
|
386
|
+
|
387
|
+
md = '# Indeed.com: Latest jobs
|
388
|
+
|
389
|
+
' + rows
|
390
|
+
|
391
|
+
RDiscount.new(md).to_html
|
392
|
+
|
393
|
+
end
|
394
|
+
|
395
|
+
def to_form(action: '')
|
396
|
+
|
397
|
+
rows = latest().map.with_index do |h, i|
|
398
|
+
|
399
|
+
co = h[:company].length > 1 ? " (%s)" % h[:company] : ''
|
400
|
+
|
401
|
+
"<input type='checkbox' id='#{h[:jobid]}' name='#{h[:jobid]}' value='#{h[:title]}'/>
|
402
|
+
<label for='j#{i}'>#{h[:added].strftime("%d %b")}: #{h[:title] + ' ' + co}</label><br/>
|
403
|
+
"
|
404
|
+
|
405
|
+
end.join("\n")
|
406
|
+
|
407
|
+
|
408
|
+
return "<form action='#{action}'>#{rows}" +
|
409
|
+
"<input type='submit' value='submit'/></form>"
|
410
|
+
|
411
|
+
end
|
412
|
+
|
413
|
+
def filter(a)
|
414
|
+
|
415
|
+
dx = Dynarex.new
|
416
|
+
a2 = latest().select {|h| a.include? h[:jobid] }
|
417
|
+
dx.import a2
|
418
|
+
|
419
|
+
return dx
|
420
|
+
end
|
421
|
+
|
422
|
+
private
|
423
|
+
|
424
|
+
def latest()
|
425
|
+
|
426
|
+
a = @index.to_a.map do |id, h|
|
427
|
+
h[:jobid] = id
|
428
|
+
h[:added] = Date.parse(h[:added]) if h[:added].is_a? String
|
429
|
+
h
|
430
|
+
end
|
431
|
+
|
432
|
+
a.select do |x|
|
433
|
+
x[:added] >= (Date.today - 7)
|
434
|
+
end.reverse
|
435
|
+
|
436
|
+
end
|
437
|
+
|
375
438
|
end
|
439
|
+
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: indeed_scraper2022
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
YLGLkwmbiUHX5dRU3RwOwOrZiyvND5BIj7S6dZ6jYHe0I727apgQNc3swTz5mW6I
|
36
36
|
SW/2zInu2bkj/meWm5eBoWHT
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2022-05-
|
38
|
+
date: 2022-05-25 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: nokorexi
|
@@ -78,7 +78,7 @@ dependencies:
|
|
78
78
|
- !ruby/object:Gem::Version
|
79
79
|
version: 0.3.1
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
|
-
name:
|
81
|
+
name: url_reveal22
|
82
82
|
requirement: !ruby/object:Gem::Requirement
|
83
83
|
requirements:
|
84
84
|
- - "~>"
|
@@ -97,6 +97,26 @@ dependencies:
|
|
97
97
|
- - ">="
|
98
98
|
- !ruby/object:Gem::Version
|
99
99
|
version: 0.1.0
|
100
|
+
- !ruby/object:Gem::Dependency
|
101
|
+
name: dynarex
|
102
|
+
requirement: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - "~>"
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '1.9'
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.9.11
|
110
|
+
type: :runtime
|
111
|
+
prerelease: false
|
112
|
+
version_requirements: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - "~>"
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '1.9'
|
117
|
+
- - ">="
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: 1.9.11
|
100
120
|
description:
|
101
121
|
email: digital.robertson@gmail.com
|
102
122
|
executables: []
|
metadata.gz.sig
CHANGED
Binary file
|