jobs_crawler 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4bf276967ec890fe47ae3556b931865531dce5eb427a78387d804745939d88c7
4
- data.tar.gz: 5ff554b07156269e2c09663f98f39dbb1fd91cce060e7fa153bde29686a9c329
3
+ metadata.gz: '0911fc283f16a8ffac616ba37ab85b8cc6dc546db997923e8c8389ace1217b6f'
4
+ data.tar.gz: 2a7479d3999a5f61b957c2b2295c5d2b60f625ba67eba8a1c048773cda515d20
5
5
  SHA512:
6
- metadata.gz: 1dd42c07d06b1a2c8132da264974ac55682be7d2c0e342987786f8a6a22952a0e3545e05719a0bf6ced18ece7ee1cbf61ec178e96d5937207f5e331acc812dcc
7
- data.tar.gz: 193672e516faa7680bce476cd614b7d0c6f2020b15abf63cb154af0b5aab7383fa3a5bd6333485474277a28ae55f7605760ea8ad251d806de9cd4931b7a37720
6
+ metadata.gz: 80aa00dccd2f024104ea007f3ebc132ea3d25d42684634e3a0876fa2c95434988c426ba9a0704992cda5f99d4e812c7a2f3fa42e5baa0094543e3f9ad0562807
7
+ data.tar.gz: 8586fcfc2dc87ac888daeeb44f89d7b95db79ed80357cc5b2e75d2225213bc8cea217e8cd62aa740110b51fcef8d7a969c03a30a4ed451279742e193ad8f4c6f
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- jobs_crawler (0.1.5)
4
+ jobs_crawler (0.1.6)
5
5
  wombat
6
6
 
7
7
  GEM
@@ -1,29 +1,14 @@
1
- module JobsCrawler
2
- class Robots::AtooJob
3
- attr_reader :url
4
-
5
- def initialize(url)
6
- @url = url
7
- @engine = Mechanize.new
8
- end
9
-
10
- def crawl
11
- set_html
12
- to_json
13
- end
14
-
15
- private
16
-
17
- def set_html
18
- @html = Nokogiri::HTML(body)
19
- end
20
-
1
+ module JobsCrawler::Robots
2
+ class AtooJob < Base
21
3
  def to_json
22
4
  {
23
5
  date_de_publication: date_publication,
24
6
  description: description,
25
7
  }
26
8
  end
9
+
10
+ private
11
+
27
12
  def description
28
13
  @html.css('#main > article > div.main-section.jobs-detail-3 > div:nth-child(2) > div > div > div.section-content.col-lg-8.col-md-8.col-sm-12.col-xs-12 > div.row > div.col-lg-8.col-md-8.col-sm-12.col-xs-12.col-xs-12 > div').text
29
14
  end
@@ -31,9 +16,5 @@ module JobsCrawler
31
16
  def date_publication
32
17
  @html.css('#main > article > div.main-section.jobs-detail-3 > div:nth-child(1) > div > div > div > div > div > div > div.cs-text > ul > li:nth-child(1) > span').text
33
18
  end
34
-
35
- def body
36
- @engine.get(url).body
37
- end
38
19
  end
39
20
  end
@@ -0,0 +1,33 @@
1
+ module JobsCrawler::Robots
2
+ class Base
3
+ attr_reader :url
4
+
5
+ def initialize(url)
6
+ @url = url
7
+ @engine = Mechanize.new
8
+ end
9
+
10
+ def crawl
11
+ set_html
12
+ to_json
13
+ end
14
+
15
+ def extract_content(css_selector)
16
+ @html.css(css_selector).text
17
+ end
18
+
19
+ def to_json
20
+ raise NotImplemetedError, 'You need to provide a concrete implemetatioen'
21
+ end
22
+
23
+ def set_html
24
+ @html = Nokogiri::HTML(body)
25
+ end
26
+
27
+ private
28
+
29
+ def body
30
+ @engine.get(url).body
31
+ end
32
+ end
33
+ end
@@ -1,13 +1,22 @@
1
- module JobsCrawler
2
- class Robots::EmploiSenegal
3
- include Wombat::Crawler
4
-
5
- base_url 'https://www.emploisenegal.com'
6
- path '/recherche-jobs-senegal'
7
- ¬
8
- links 'css=.search-results .job-description-wrapper', :iterator do
9
- url({ xpath: ".//@data-href" })
10
- titre({ xpath: ".//text()" })
1
+ module JobsCrawler::Robots
2
+ class EmploiSenegal < Base
3
+
4
+ def to_json
5
+ {
6
+ date_de_publication: date_de_publication,
7
+ description: description,
8
+ }
9
+ end
10
+
11
+ private
12
+
13
+ def description
14
+ extract_content('#job-ad-details-261761 > div > div')
11
15
  end
16
+
17
+ def date_de_publication
18
+ extract_content('#node-261761 > div > div.job-ad-publication-date')
19
+ end
20
+
12
21
  end
13
22
  end
@@ -1,13 +1,20 @@
1
- module JobsCrawler
2
- class Robots::PagesJaunesSenegal
3
- include Wombat::Crawler
1
+ module JobsCrawler::Robots
2
+ class PagesJaunesSenegal < Base
3
+ def to_json
4
+ {
5
+ date_de_publication: date_de_publication,
6
+ description: description,
7
+ }
8
+ end
9
+
10
+ private
11
+
12
+ def date_de_publication
13
+ I18n.l Date.today, format: :long
14
+ end
4
15
 
5
- base_url 'https://www.emploisenegal.com'
6
- path '/recherche-jobs-senegal'
7
- ¬
8
- links 'css=.search-results .job-description-wrapper', :iterator do
9
- url({ xpath: ".//@data-href" })
10
- titre({ xpath: ".//text()" })
16
+ def description
17
+ extract_content('#contenu > div:nth-child(1) > table')
11
18
  end
12
19
  end
13
20
  end
@@ -1,9 +1,20 @@
1
1
  module JobsCrawler::Robots
2
- class Senjob
3
- include Wombat::Crawler
2
+ class Senjob < Base
3
+ def to_json
4
+ {
5
+ date_de_publication: date_de_publication,
6
+ description: description
7
+ }
8
+ end
4
9
 
5
- description "css=#articlebi .preview"
6
- reference "xpath=//html/body/div[3]/table/tbody/tr[5]/td[2]"
7
- deadline "css=body > div:nth-child(16) > table > tbody > tr:nth-child(5) > td:nth-child(2)"
10
+ private
11
+
12
+ def date_de_publication
13
+ @html.xpath('//*[@id="tablesOffres"]/tbody/tr[1]/td').text
14
+ end
15
+
16
+ def description
17
+ extract_content('#tablesOffres > tbody > tr:nth-child(1) > td')
18
+ end
8
19
  end
9
20
  end
@@ -1,4 +1,5 @@
1
1
  module JobsCrawler::Robots
2
+ require 'jobs_crawler/robots/base'
2
3
  require 'jobs_crawler/robots/senjob'
3
4
  require 'jobs_crawler/robots/atoo_job'
4
5
  require 'jobs_crawler/robots/emploi_senegal'
@@ -8,5 +9,13 @@ module JobsCrawler::Robots
8
9
  def crawl_atoo_job(url)
9
10
  JobsCrawler::Robots::AtooJob.new(url).crawl
10
11
  end
12
+
13
+ def crawl_emploi_senegal(url)
14
+ JobsCrawler::Robots::EmploiSenegal.new(url).crawl
15
+ end
16
+
17
+ def crawl_pages_jaunes_senegal(url)
18
+ JobsCrawler::Robots::EmploiSenegal.new(url).crawl
19
+ end
11
20
  end
12
21
  end
@@ -1,3 +1,3 @@
1
1
  module JobsCrawler
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jobs_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pathe
@@ -94,6 +94,7 @@ files:
94
94
  - lib/jobs_crawler/indexers/senjob.rb
95
95
  - lib/jobs_crawler/robots.rb
96
96
  - lib/jobs_crawler/robots/atoo_job.rb
97
+ - lib/jobs_crawler/robots/base.rb
97
98
  - lib/jobs_crawler/robots/emploi_senegal.rb
98
99
  - lib/jobs_crawler/robots/pages_jaunes_senegal.rb
99
100
  - lib/jobs_crawler/robots/senjob.rb