jobs_crawler 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/jobs_crawler/indexers/atoo_job.rb +13 -0
- data/lib/jobs_crawler/robots/atoo_job.rb +39 -0
- data/lib/jobs_crawler/robots/emploi_senegal.rb +13 -0
- data/lib/jobs_crawler/robots/pages_jaunes_senegal.rb +13 -0
- data/lib/jobs_crawler/robots/senjob.rb +9 -0
- data/lib/jobs_crawler/robots.rb +12 -0
- data/lib/jobs_crawler/version.rb +1 -1
- data/lib/jobs_crawler.rb +1 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 07b12ecf9c7a3bbaa3b6a64a7df48ed4a0ea5d73b6500e75a6324ed35a17cafc
|
4
|
+
data.tar.gz: ce32b52ec4c6cb59ec21383b85c9d9dbdd7a7b4f81f9834b2db309fd71a0c7ae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f1981134e58e19aef91430542eb611bf4267e01b4b9ce3f6cf9b1dedde907df0692b45a1d7aa1951dfe0cd3fc097368b550d9864d3d480db34deafa51b8453fa
|
7
|
+
data.tar.gz: cba1310909ee0b858c530894e6a7dfa4b643a952ca9ef878f9dd11f4e71a1d69681ac696cccdaf77e13e4546ca69a5332661a3609e8cea60710062c352012ac0
|
data/Gemfile.lock
CHANGED
@@ -0,0 +1,13 @@
|
|
1
|
+
module JobsCrawler
|
2
|
+
class Indexers::AtooJob
|
3
|
+
include Wombat::Crawler
|
4
|
+
|
5
|
+
base_url 'https://www.atoojob.com'
|
6
|
+
path '/jobs'
|
7
|
+
¬
|
8
|
+
links 'css=.cs-post-title h5 a', :iterator do
|
9
|
+
url({ xpath: ".//@href" })
|
10
|
+
titre({ xpath: ".//text()" })
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module JobsCrawler
|
2
|
+
class Robots::AtooJob
|
3
|
+
attr_reader :url
|
4
|
+
|
5
|
+
def initialize(url)
|
6
|
+
@url = url
|
7
|
+
@engine = Mechanize.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def crawl
|
11
|
+
set_html
|
12
|
+
to_json
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def set_html
|
18
|
+
@html = Nokogiri::HTML(body)
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_json
|
22
|
+
{
|
23
|
+
date_de_publication: date_publication,
|
24
|
+
description: description,
|
25
|
+
}
|
26
|
+
end
|
27
|
+
def description
|
28
|
+
@html.css('#main > article > div.main-section.jobs-detail-3 > div:nth-child(2) > div > div > div.section-content.col-lg-8.col-md-8.col-sm-12.col-xs-12 > div.row > div.col-lg-8.col-md-8.col-sm-12.col-xs-12.col-xs-12 > div').text
|
29
|
+
end
|
30
|
+
|
31
|
+
def date_publication
|
32
|
+
@html.css('#main > article > div.main-section.jobs-detail-3 > div:nth-child(1) > div > div > div > div > div > div > div.cs-text > ul > li:nth-child(1) > span').text
|
33
|
+
end
|
34
|
+
|
35
|
+
def body
|
36
|
+
@engine.get(url).body
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module JobsCrawler
|
2
|
+
class Robots::EmploiSenegal
|
3
|
+
include Wombat::Crawler
|
4
|
+
|
5
|
+
base_url 'https://www.emploisenegal.com'
|
6
|
+
path '/recherche-jobs-senegal'
|
7
|
+
¬
|
8
|
+
links 'css=.search-results .job-description-wrapper', :iterator do
|
9
|
+
url({ xpath: ".//@data-href" })
|
10
|
+
titre({ xpath: ".//text()" })
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module JobsCrawler
|
2
|
+
class Robots::PagesJaunesSenegal
|
3
|
+
include Wombat::Crawler
|
4
|
+
|
5
|
+
base_url 'https://www.emploisenegal.com'
|
6
|
+
path '/recherche-jobs-senegal'
|
7
|
+
¬
|
8
|
+
links 'css=.search-results .job-description-wrapper', :iterator do
|
9
|
+
url({ xpath: ".//@data-href" })
|
10
|
+
titre({ xpath: ".//text()" })
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
module JobsCrawler::Robots
|
2
|
+
class Senjob
|
3
|
+
include Wombat::Crawler
|
4
|
+
|
5
|
+
description "css=#articlebi .preview"
|
6
|
+
reference "xpath=//html/body/div[3]/table/tbody/tr[5]/td[2]"
|
7
|
+
deadline "css=body > div:nth-child(16) > table > tbody > tr:nth-child(5) > td:nth-child(2)"
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module JobsCrawler::Robots
|
2
|
+
require 'jobs_crawler/robots/senjob'
|
3
|
+
require 'jobs_crawler/robots/atoo_job'
|
4
|
+
require 'jobs_crawler/robots/emploi_senegal'
|
5
|
+
require 'jobs_crawler/robots/pages_jaunes_senegal'
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def crawl_atoo_job(url)
|
9
|
+
JobsCrawler::Robots::AtooJob.new(url).crawl
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
data/lib/jobs_crawler/version.rb
CHANGED
data/lib/jobs_crawler.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jobs_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pathe
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-04-
|
11
|
+
date: 2019-04-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -88,9 +88,15 @@ files:
|
|
88
88
|
- jobs_crawler.gemspec
|
89
89
|
- lib/jobs_crawler.rb
|
90
90
|
- lib/jobs_crawler/indexers.rb
|
91
|
+
- lib/jobs_crawler/indexers/atoo_job.rb
|
91
92
|
- lib/jobs_crawler/indexers/emploi_senegal.rb
|
92
93
|
- lib/jobs_crawler/indexers/pages_jaunes_senegal.rb
|
93
94
|
- lib/jobs_crawler/indexers/senjob.rb
|
95
|
+
- lib/jobs_crawler/robots.rb
|
96
|
+
- lib/jobs_crawler/robots/atoo_job.rb
|
97
|
+
- lib/jobs_crawler/robots/emploi_senegal.rb
|
98
|
+
- lib/jobs_crawler/robots/pages_jaunes_senegal.rb
|
99
|
+
- lib/jobs_crawler/robots/senjob.rb
|
94
100
|
- lib/jobs_crawler/version.rb
|
95
101
|
homepage: http://taag-at.com
|
96
102
|
licenses:
|