jobs_crawler 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4bf276967ec890fe47ae3556b931865531dce5eb427a78387d804745939d88c7
4
- data.tar.gz: 5ff554b07156269e2c09663f98f39dbb1fd91cce060e7fa153bde29686a9c329
3
+ metadata.gz: '0911fc283f16a8ffac616ba37ab85b8cc6dc546db997923e8c8389ace1217b6f'
4
+ data.tar.gz: 2a7479d3999a5f61b957c2b2295c5d2b60f625ba67eba8a1c048773cda515d20
5
5
  SHA512:
6
- metadata.gz: 1dd42c07d06b1a2c8132da264974ac55682be7d2c0e342987786f8a6a22952a0e3545e05719a0bf6ced18ece7ee1cbf61ec178e96d5937207f5e331acc812dcc
7
- data.tar.gz: 193672e516faa7680bce476cd614b7d0c6f2020b15abf63cb154af0b5aab7383fa3a5bd6333485474277a28ae55f7605760ea8ad251d806de9cd4931b7a37720
6
+ metadata.gz: 80aa00dccd2f024104ea007f3ebc132ea3d25d42684634e3a0876fa2c95434988c426ba9a0704992cda5f99d4e812c7a2f3fa42e5baa0094543e3f9ad0562807
7
+ data.tar.gz: 8586fcfc2dc87ac888daeeb44f89d7b95db79ed80357cc5b2e75d2225213bc8cea217e8cd62aa740110b51fcef8d7a969c03a30a4ed451279742e193ad8f4c6f
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- jobs_crawler (0.1.5)
4
+ jobs_crawler (0.1.6)
5
5
  wombat
6
6
 
7
7
  GEM
@@ -1,29 +1,14 @@
1
- module JobsCrawler
2
- class Robots::AtooJob
3
- attr_reader :url
4
-
5
- def initialize(url)
6
- @url = url
7
- @engine = Mechanize.new
8
- end
9
-
10
- def crawl
11
- set_html
12
- to_json
13
- end
14
-
15
- private
16
-
17
- def set_html
18
- @html = Nokogiri::HTML(body)
19
- end
20
-
1
+ module JobsCrawler::Robots
2
+ class AtooJob < Base
21
3
  def to_json
22
4
  {
23
5
  date_de_publication: date_publication,
24
6
  description: description,
25
7
  }
26
8
  end
9
+
10
+ private
11
+
27
12
  def description
28
13
  @html.css('#main > article > div.main-section.jobs-detail-3 > div:nth-child(2) > div > div > div.section-content.col-lg-8.col-md-8.col-sm-12.col-xs-12 > div.row > div.col-lg-8.col-md-8.col-sm-12.col-xs-12.col-xs-12 > div').text
29
14
  end
@@ -31,9 +16,5 @@ module JobsCrawler
31
16
  def date_publication
32
17
  @html.css('#main > article > div.main-section.jobs-detail-3 > div:nth-child(1) > div > div > div > div > div > div > div.cs-text > ul > li:nth-child(1) > span').text
33
18
  end
34
-
35
- def body
36
- @engine.get(url).body
37
- end
38
19
  end
39
20
  end
@@ -0,0 +1,33 @@
1
+ module JobsCrawler::Robots
2
+ class Base
3
+ attr_reader :url
4
+
5
+ def initialize(url)
6
+ @url = url
7
+ @engine = Mechanize.new
8
+ end
9
+
10
+ def crawl
11
+ set_html
12
+ to_json
13
+ end
14
+
15
+ def extract_content(css_selector)
16
+ @html.css(css_selector).text
17
+ end
18
+
19
+ def to_json
20
+ raise NotImplemetedError, 'You need to provide a concrete implemetatioen'
21
+ end
22
+
23
+ def set_html
24
+ @html = Nokogiri::HTML(body)
25
+ end
26
+
27
+ private
28
+
29
+ def body
30
+ @engine.get(url).body
31
+ end
32
+ end
33
+ end
@@ -1,13 +1,22 @@
1
- module JobsCrawler
2
- class Robots::EmploiSenegal
3
- include Wombat::Crawler
4
-
5
- base_url 'https://www.emploisenegal.com'
6
- path '/recherche-jobs-senegal'
7
- ¬
8
- links 'css=.search-results .job-description-wrapper', :iterator do
9
- url({ xpath: ".//@data-href" })
10
- titre({ xpath: ".//text()" })
1
+ module JobsCrawler::Robots
2
+ class EmploiSenegal < Base
3
+
4
+ def to_json
5
+ {
6
+ date_de_publication: date_de_publication,
7
+ description: description,
8
+ }
9
+ end
10
+
11
+ private
12
+
13
+ def description
14
+ extract_content('#job-ad-details-261761 > div > div')
11
15
  end
16
+
17
+ def date_de_publication
18
+ extract_content('#node-261761 > div > div.job-ad-publication-date')
19
+ end
20
+
12
21
  end
13
22
  end
@@ -1,13 +1,20 @@
1
- module JobsCrawler
2
- class Robots::PagesJaunesSenegal
3
- include Wombat::Crawler
1
+ module JobsCrawler::Robots
2
+ class PagesJaunesSenegal < Base
3
+ def to_json
4
+ {
5
+ date_de_publication: date_de_publication,
6
+ description: description,
7
+ }
8
+ end
9
+
10
+ private
11
+
12
+ def date_de_publication
13
+ I18n.l Date.today, format: :long
14
+ end
4
15
 
5
- base_url 'https://www.emploisenegal.com'
6
- path '/recherche-jobs-senegal'
7
- ¬
8
- links 'css=.search-results .job-description-wrapper', :iterator do
9
- url({ xpath: ".//@data-href" })
10
- titre({ xpath: ".//text()" })
16
+ def description
17
+ extract_content('#contenu > div:nth-child(1) > table')
11
18
  end
12
19
  end
13
20
  end
@@ -1,9 +1,20 @@
1
1
  module JobsCrawler::Robots
2
- class Senjob
3
- include Wombat::Crawler
2
+ class Senjob < Base
3
+ def to_json
4
+ {
5
+ date_de_publication: date_de_publication,
6
+ description: description
7
+ }
8
+ end
4
9
 
5
- description "css=#articlebi .preview"
6
- reference "xpath=//html/body/div[3]/table/tbody/tr[5]/td[2]"
7
- deadline "css=body > div:nth-child(16) > table > tbody > tr:nth-child(5) > td:nth-child(2)"
10
+ private
11
+
12
+ def date_de_publication
13
+ @html.xpath('//*[@id="tablesOffres"]/tbody/tr[1]/td').text
14
+ end
15
+
16
+ def description
17
+ extract_content('#tablesOffres > tbody > tr:nth-child(1) > td')
18
+ end
8
19
  end
9
20
  end
@@ -1,4 +1,5 @@
1
1
  module JobsCrawler::Robots
2
+ require 'jobs_crawler/robots/base'
2
3
  require 'jobs_crawler/robots/senjob'
3
4
  require 'jobs_crawler/robots/atoo_job'
4
5
  require 'jobs_crawler/robots/emploi_senegal'
@@ -8,5 +9,13 @@ module JobsCrawler::Robots
8
9
  def crawl_atoo_job(url)
9
10
  JobsCrawler::Robots::AtooJob.new(url).crawl
10
11
  end
12
+
13
+ def crawl_emploi_senegal(url)
14
+ JobsCrawler::Robots::EmploiSenegal.new(url).crawl
15
+ end
16
+
17
+ def crawl_pages_jaunes_senegal(url)
18
+ JobsCrawler::Robots::EmploiSenegal.new(url).crawl
19
+ end
11
20
  end
12
21
  end
@@ -1,3 +1,3 @@
1
1
  module JobsCrawler
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jobs_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pathe
@@ -94,6 +94,7 @@ files:
94
94
  - lib/jobs_crawler/indexers/senjob.rb
95
95
  - lib/jobs_crawler/robots.rb
96
96
  - lib/jobs_crawler/robots/atoo_job.rb
97
+ - lib/jobs_crawler/robots/base.rb
97
98
  - lib/jobs_crawler/robots/emploi_senegal.rb
98
99
  - lib/jobs_crawler/robots/pages_jaunes_senegal.rb
99
100
  - lib/jobs_crawler/robots/senjob.rb