jobs_crawler 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/jobs_crawler/robots/atoo_job.rb +5 -24
- data/lib/jobs_crawler/robots/base.rb +33 -0
- data/lib/jobs_crawler/robots/emploi_senegal.rb +19 -10
- data/lib/jobs_crawler/robots/pages_jaunes_senegal.rb +16 -9
- data/lib/jobs_crawler/robots/senjob.rb +16 -5
- data/lib/jobs_crawler/robots.rb +9 -0
- data/lib/jobs_crawler/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0911fc283f16a8ffac616ba37ab85b8cc6dc546db997923e8c8389ace1217b6f'
|
4
|
+
data.tar.gz: 2a7479d3999a5f61b957c2b2295c5d2b60f625ba67eba8a1c048773cda515d20
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 80aa00dccd2f024104ea007f3ebc132ea3d25d42684634e3a0876fa2c95434988c426ba9a0704992cda5f99d4e812c7a2f3fa42e5baa0094543e3f9ad0562807
|
7
|
+
data.tar.gz: 8586fcfc2dc87ac888daeeb44f89d7b95db79ed80357cc5b2e75d2225213bc8cea217e8cd62aa740110b51fcef8d7a969c03a30a4ed451279742e193ad8f4c6f
|
data/Gemfile.lock
CHANGED
@@ -1,29 +1,14 @@
|
|
1
|
-
module JobsCrawler
|
2
|
-
class
|
3
|
-
attr_reader :url
|
4
|
-
|
5
|
-
def initialize(url)
|
6
|
-
@url = url
|
7
|
-
@engine = Mechanize.new
|
8
|
-
end
|
9
|
-
|
10
|
-
def crawl
|
11
|
-
set_html
|
12
|
-
to_json
|
13
|
-
end
|
14
|
-
|
15
|
-
private
|
16
|
-
|
17
|
-
def set_html
|
18
|
-
@html = Nokogiri::HTML(body)
|
19
|
-
end
|
20
|
-
|
1
|
+
module JobsCrawler::Robots
|
2
|
+
class AtooJob < Base
|
21
3
|
def to_json
|
22
4
|
{
|
23
5
|
date_de_publication: date_publication,
|
24
6
|
description: description,
|
25
7
|
}
|
26
8
|
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
27
12
|
def description
|
28
13
|
@html.css('#main > article > div.main-section.jobs-detail-3 > div:nth-child(2) > div > div > div.section-content.col-lg-8.col-md-8.col-sm-12.col-xs-12 > div.row > div.col-lg-8.col-md-8.col-sm-12.col-xs-12.col-xs-12 > div').text
|
29
14
|
end
|
@@ -31,9 +16,5 @@ module JobsCrawler
|
|
31
16
|
def date_publication
|
32
17
|
@html.css('#main > article > div.main-section.jobs-detail-3 > div:nth-child(1) > div > div > div > div > div > div > div.cs-text > ul > li:nth-child(1) > span').text
|
33
18
|
end
|
34
|
-
|
35
|
-
def body
|
36
|
-
@engine.get(url).body
|
37
|
-
end
|
38
19
|
end
|
39
20
|
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module JobsCrawler::Robots
|
2
|
+
class Base
|
3
|
+
attr_reader :url
|
4
|
+
|
5
|
+
def initialize(url)
|
6
|
+
@url = url
|
7
|
+
@engine = Mechanize.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def crawl
|
11
|
+
set_html
|
12
|
+
to_json
|
13
|
+
end
|
14
|
+
|
15
|
+
def extract_content(css_selector)
|
16
|
+
@html.css(css_selector).text
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_json
|
20
|
+
raise NotImplemetedError, 'You need to provide a concrete implemetatioen'
|
21
|
+
end
|
22
|
+
|
23
|
+
def set_html
|
24
|
+
@html = Nokogiri::HTML(body)
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def body
|
30
|
+
@engine.get(url).body
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -1,13 +1,22 @@
|
|
1
|
-
module JobsCrawler
|
2
|
-
class
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
1
|
+
module JobsCrawler::Robots
|
2
|
+
class EmploiSenegal < Base
|
3
|
+
|
4
|
+
def to_json
|
5
|
+
{
|
6
|
+
date_de_publication: date_de_publication,
|
7
|
+
description: description,
|
8
|
+
}
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def description
|
14
|
+
extract_content('#job-ad-details-261761 > div > div')
|
11
15
|
end
|
16
|
+
|
17
|
+
def date_de_publication
|
18
|
+
extract_content('#node-261761 > div > div.job-ad-publication-date')
|
19
|
+
end
|
20
|
+
|
12
21
|
end
|
13
22
|
end
|
@@ -1,13 +1,20 @@
|
|
1
|
-
module JobsCrawler
|
2
|
-
class
|
3
|
-
|
1
|
+
module JobsCrawler::Robots
|
2
|
+
class PagesJaunesSenegal < Base
|
3
|
+
def to_json
|
4
|
+
{
|
5
|
+
date_de_publication: date_de_publication,
|
6
|
+
description: description,
|
7
|
+
}
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def date_de_publication
|
13
|
+
I18n.l Date.today, format: :long
|
14
|
+
end
|
4
15
|
|
5
|
-
|
6
|
-
|
7
|
-
¬
|
8
|
-
links 'css=.search-results .job-description-wrapper', :iterator do
|
9
|
-
url({ xpath: ".//@data-href" })
|
10
|
-
titre({ xpath: ".//text()" })
|
16
|
+
def description
|
17
|
+
extract_content('#contenu > div:nth-child(1) > table')
|
11
18
|
end
|
12
19
|
end
|
13
20
|
end
|
@@ -1,9 +1,20 @@
|
|
1
1
|
module JobsCrawler::Robots
|
2
|
-
class Senjob
|
3
|
-
|
2
|
+
class Senjob < Base
|
3
|
+
def to_json
|
4
|
+
{
|
5
|
+
date_de_publication: date_de_publication,
|
6
|
+
description: description
|
7
|
+
}
|
8
|
+
end
|
4
9
|
|
5
|
-
|
6
|
-
|
7
|
-
|
10
|
+
private
|
11
|
+
|
12
|
+
def date_de_publication
|
13
|
+
@html.xpath('//*[@id="tablesOffres"]/tbody/tr[1]/td').text
|
14
|
+
end
|
15
|
+
|
16
|
+
def description
|
17
|
+
extract_content('#tablesOffres > tbody > tr:nth-child(1) > td')
|
18
|
+
end
|
8
19
|
end
|
9
20
|
end
|
data/lib/jobs_crawler/robots.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
module JobsCrawler::Robots
|
2
|
+
require 'jobs_crawler/robots/base'
|
2
3
|
require 'jobs_crawler/robots/senjob'
|
3
4
|
require 'jobs_crawler/robots/atoo_job'
|
4
5
|
require 'jobs_crawler/robots/emploi_senegal'
|
@@ -8,5 +9,13 @@ module JobsCrawler::Robots
|
|
8
9
|
def crawl_atoo_job(url)
|
9
10
|
JobsCrawler::Robots::AtooJob.new(url).crawl
|
10
11
|
end
|
12
|
+
|
13
|
+
def crawl_emploi_senegal(url)
|
14
|
+
JobsCrawler::Robots::EmploiSenegal.new(url).crawl
|
15
|
+
end
|
16
|
+
|
17
|
+
def crawl_pages_jaunes_senegal(url)
|
18
|
+
JobsCrawler::Robots::EmploiSenegal.new(url).crawl
|
19
|
+
end
|
11
20
|
end
|
12
21
|
end
|
data/lib/jobs_crawler/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jobs_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pathe
|
@@ -94,6 +94,7 @@ files:
|
|
94
94
|
- lib/jobs_crawler/indexers/senjob.rb
|
95
95
|
- lib/jobs_crawler/robots.rb
|
96
96
|
- lib/jobs_crawler/robots/atoo_job.rb
|
97
|
+
- lib/jobs_crawler/robots/base.rb
|
97
98
|
- lib/jobs_crawler/robots/emploi_senegal.rb
|
98
99
|
- lib/jobs_crawler/robots/pages_jaunes_senegal.rb
|
99
100
|
- lib/jobs_crawler/robots/senjob.rb
|