jobs_crawler 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/jobs_crawler/robots/atoo_job.rb +5 -24
- data/lib/jobs_crawler/robots/base.rb +33 -0
- data/lib/jobs_crawler/robots/emploi_senegal.rb +19 -10
- data/lib/jobs_crawler/robots/pages_jaunes_senegal.rb +16 -9
- data/lib/jobs_crawler/robots/senjob.rb +16 -5
- data/lib/jobs_crawler/robots.rb +9 -0
- data/lib/jobs_crawler/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: '0911fc283f16a8ffac616ba37ab85b8cc6dc546db997923e8c8389ace1217b6f'
|
|
4
|
+
data.tar.gz: 2a7479d3999a5f61b957c2b2295c5d2b60f625ba67eba8a1c048773cda515d20
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 80aa00dccd2f024104ea007f3ebc132ea3d25d42684634e3a0876fa2c95434988c426ba9a0704992cda5f99d4e812c7a2f3fa42e5baa0094543e3f9ad0562807
|
|
7
|
+
data.tar.gz: 8586fcfc2dc87ac888daeeb44f89d7b95db79ed80357cc5b2e75d2225213bc8cea217e8cd62aa740110b51fcef8d7a969c03a30a4ed451279742e193ad8f4c6f
|
data/Gemfile.lock
CHANGED
|
@@ -1,29 +1,14 @@
|
|
|
1
|
-
module JobsCrawler
|
|
2
|
-
class
|
|
3
|
-
attr_reader :url
|
|
4
|
-
|
|
5
|
-
def initialize(url)
|
|
6
|
-
@url = url
|
|
7
|
-
@engine = Mechanize.new
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
def crawl
|
|
11
|
-
set_html
|
|
12
|
-
to_json
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
private
|
|
16
|
-
|
|
17
|
-
def set_html
|
|
18
|
-
@html = Nokogiri::HTML(body)
|
|
19
|
-
end
|
|
20
|
-
|
|
1
|
+
module JobsCrawler::Robots
|
|
2
|
+
class AtooJob < Base
|
|
21
3
|
def to_json
|
|
22
4
|
{
|
|
23
5
|
date_de_publication: date_publication,
|
|
24
6
|
description: description,
|
|
25
7
|
}
|
|
26
8
|
end
|
|
9
|
+
|
|
10
|
+
private
|
|
11
|
+
|
|
27
12
|
def description
|
|
28
13
|
@html.css('#main > article > div.main-section.jobs-detail-3 > div:nth-child(2) > div > div > div.section-content.col-lg-8.col-md-8.col-sm-12.col-xs-12 > div.row > div.col-lg-8.col-md-8.col-sm-12.col-xs-12.col-xs-12 > div').text
|
|
29
14
|
end
|
|
@@ -31,9 +16,5 @@ module JobsCrawler
|
|
|
31
16
|
def date_publication
|
|
32
17
|
@html.css('#main > article > div.main-section.jobs-detail-3 > div:nth-child(1) > div > div > div > div > div > div > div.cs-text > ul > li:nth-child(1) > span').text
|
|
33
18
|
end
|
|
34
|
-
|
|
35
|
-
def body
|
|
36
|
-
@engine.get(url).body
|
|
37
|
-
end
|
|
38
19
|
end
|
|
39
20
|
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
module JobsCrawler::Robots
|
|
2
|
+
class Base
|
|
3
|
+
attr_reader :url
|
|
4
|
+
|
|
5
|
+
def initialize(url)
|
|
6
|
+
@url = url
|
|
7
|
+
@engine = Mechanize.new
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def crawl
|
|
11
|
+
set_html
|
|
12
|
+
to_json
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def extract_content(css_selector)
|
|
16
|
+
@html.css(css_selector).text
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def to_json
|
|
20
|
+
raise NotImplemetedError, 'You need to provide a concrete implemetatioen'
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def set_html
|
|
24
|
+
@html = Nokogiri::HTML(body)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
def body
|
|
30
|
+
@engine.get(url).body
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -1,13 +1,22 @@
|
|
|
1
|
-
module JobsCrawler
|
|
2
|
-
class
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
1
|
+
module JobsCrawler::Robots
|
|
2
|
+
class EmploiSenegal < Base
|
|
3
|
+
|
|
4
|
+
def to_json
|
|
5
|
+
{
|
|
6
|
+
date_de_publication: date_de_publication,
|
|
7
|
+
description: description,
|
|
8
|
+
}
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
private
|
|
12
|
+
|
|
13
|
+
def description
|
|
14
|
+
extract_content('#job-ad-details-261761 > div > div')
|
|
11
15
|
end
|
|
16
|
+
|
|
17
|
+
def date_de_publication
|
|
18
|
+
extract_content('#node-261761 > div > div.job-ad-publication-date')
|
|
19
|
+
end
|
|
20
|
+
|
|
12
21
|
end
|
|
13
22
|
end
|
|
@@ -1,13 +1,20 @@
|
|
|
1
|
-
module JobsCrawler
|
|
2
|
-
class
|
|
3
|
-
|
|
1
|
+
module JobsCrawler::Robots
|
|
2
|
+
class PagesJaunesSenegal < Base
|
|
3
|
+
def to_json
|
|
4
|
+
{
|
|
5
|
+
date_de_publication: date_de_publication,
|
|
6
|
+
description: description,
|
|
7
|
+
}
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
private
|
|
11
|
+
|
|
12
|
+
def date_de_publication
|
|
13
|
+
I18n.l Date.today, format: :long
|
|
14
|
+
end
|
|
4
15
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
¬
|
|
8
|
-
links 'css=.search-results .job-description-wrapper', :iterator do
|
|
9
|
-
url({ xpath: ".//@data-href" })
|
|
10
|
-
titre({ xpath: ".//text()" })
|
|
16
|
+
def description
|
|
17
|
+
extract_content('#contenu > div:nth-child(1) > table')
|
|
11
18
|
end
|
|
12
19
|
end
|
|
13
20
|
end
|
|
@@ -1,9 +1,20 @@
|
|
|
1
1
|
module JobsCrawler::Robots
|
|
2
|
-
class Senjob
|
|
3
|
-
|
|
2
|
+
class Senjob < Base
|
|
3
|
+
def to_json
|
|
4
|
+
{
|
|
5
|
+
date_de_publication: date_de_publication,
|
|
6
|
+
description: description
|
|
7
|
+
}
|
|
8
|
+
end
|
|
4
9
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
10
|
+
private
|
|
11
|
+
|
|
12
|
+
def date_de_publication
|
|
13
|
+
@html.xpath('//*[@id="tablesOffres"]/tbody/tr[1]/td').text
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def description
|
|
17
|
+
extract_content('#tablesOffres > tbody > tr:nth-child(1) > td')
|
|
18
|
+
end
|
|
8
19
|
end
|
|
9
20
|
end
|
data/lib/jobs_crawler/robots.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
module JobsCrawler::Robots
|
|
2
|
+
require 'jobs_crawler/robots/base'
|
|
2
3
|
require 'jobs_crawler/robots/senjob'
|
|
3
4
|
require 'jobs_crawler/robots/atoo_job'
|
|
4
5
|
require 'jobs_crawler/robots/emploi_senegal'
|
|
@@ -8,5 +9,13 @@ module JobsCrawler::Robots
|
|
|
8
9
|
def crawl_atoo_job(url)
|
|
9
10
|
JobsCrawler::Robots::AtooJob.new(url).crawl
|
|
10
11
|
end
|
|
12
|
+
|
|
13
|
+
def crawl_emploi_senegal(url)
|
|
14
|
+
JobsCrawler::Robots::EmploiSenegal.new(url).crawl
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def crawl_pages_jaunes_senegal(url)
|
|
18
|
+
JobsCrawler::Robots::EmploiSenegal.new(url).crawl
|
|
19
|
+
end
|
|
11
20
|
end
|
|
12
21
|
end
|
data/lib/jobs_crawler/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: jobs_crawler
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Pathe
|
|
@@ -94,6 +94,7 @@ files:
|
|
|
94
94
|
- lib/jobs_crawler/indexers/senjob.rb
|
|
95
95
|
- lib/jobs_crawler/robots.rb
|
|
96
96
|
- lib/jobs_crawler/robots/atoo_job.rb
|
|
97
|
+
- lib/jobs_crawler/robots/base.rb
|
|
97
98
|
- lib/jobs_crawler/robots/emploi_senegal.rb
|
|
98
99
|
- lib/jobs_crawler/robots/pages_jaunes_senegal.rb
|
|
99
100
|
- lib/jobs_crawler/robots/senjob.rb
|