jobs_crawler 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a817c1a6c6757e2183dff2ede4daf2ad39ae140d7c5a4f2d6863cc59de562fc8
4
- data.tar.gz: b807d2ffcd566f6953bf97ef72457ac2657e447694adc394b234b742ba01eda5
3
+ metadata.gz: 07b12ecf9c7a3bbaa3b6a64a7df48ed4a0ea5d73b6500e75a6324ed35a17cafc
4
+ data.tar.gz: ce32b52ec4c6cb59ec21383b85c9d9dbdd7a7b4f81f9834b2db309fd71a0c7ae
5
5
  SHA512:
6
- metadata.gz: 7e19c50037052f0689cba5cf030fe343fe5880b5c074e0d4fdc004cfaa58ec2098a84822f262991a7be68a1c19dc3566d3f8d9a6d835488cc906a472d2561236
7
- data.tar.gz: f55f16430096811e666380e5b7a881f8a10a8dad7556347a9ee50e972f7aaf701589f2a6bbc9215d6b8f13c27cd9140f87829745960a0a58e9928761fb14e471
6
+ metadata.gz: f1981134e58e19aef91430542eb611bf4267e01b4b9ce3f6cf9b1dedde907df0692b45a1d7aa1951dfe0cd3fc097368b550d9864d3d480db34deafa51b8453fa
7
+ data.tar.gz: cba1310909ee0b858c530894e6a7dfa4b643a952ca9ef878f9dd11f4e71a1d69681ac696cccdaf77e13e4546ca69a5332661a3609e8cea60710062c352012ac0
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- jobs_crawler (0.1.2)
4
+ jobs_crawler (0.1.3)
5
5
  wombat
6
6
 
7
7
  GEM
@@ -0,0 +1,13 @@
1
+ module JobsCrawler
2
+ class Indexers::AtooJob
3
+ include Wombat::Crawler
4
+
5
+ base_url 'https://www.atoojob.com'
6
+ path '/jobs'
7
+ ¬
8
+ links 'css=.cs-post-title h5 a', :iterator do
9
+ url({ xpath: ".//@href" })
10
+ titre({ xpath: ".//text()" })
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,39 @@
1
+ module JobsCrawler
2
+ class Robots::AtooJob
3
+ attr_reader :url
4
+
5
+ def initialize(url)
6
+ @url = url
7
+ @engine = Mechanize.new
8
+ end
9
+
10
+ def crawl
11
+ set_html
12
+ to_json
13
+ end
14
+
15
+ private
16
+
17
+ def set_html
18
+ @html = Nokogiri::HTML(body)
19
+ end
20
+
21
+ def to_json
22
+ {
23
+ date_de_publication: date_publication,
24
+ description: description,
25
+ }
26
+ end
27
+ def description
28
+ @html.css('#main > article > div.main-section.jobs-detail-3 > div:nth-child(2) > div > div > div.section-content.col-lg-8.col-md-8.col-sm-12.col-xs-12 > div.row > div.col-lg-8.col-md-8.col-sm-12.col-xs-12.col-xs-12 > div').text
29
+ end
30
+
31
+ def date_publication
32
+ @html.css('#main > article > div.main-section.jobs-detail-3 > div:nth-child(1) > div > div > div > div > div > div > div.cs-text > ul > li:nth-child(1) > span').text
33
+ end
34
+
35
+ def body
36
+ @engine.get(url).body
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,13 @@
1
+ module JobsCrawler
2
+ class Robots::EmploiSenegal
3
+ include Wombat::Crawler
4
+
5
+ base_url 'https://www.emploisenegal.com'
6
+ path '/recherche-jobs-senegal'
7
+ ¬
8
+ links 'css=.search-results .job-description-wrapper', :iterator do
9
+ url({ xpath: ".//@data-href" })
10
+ titre({ xpath: ".//text()" })
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ module JobsCrawler
2
+ class Robots::PagesJaunesSenegal
3
+ include Wombat::Crawler
4
+
5
+ base_url 'https://www.emploisenegal.com'
6
+ path '/recherche-jobs-senegal'
7
+ ¬
8
+ links 'css=.search-results .job-description-wrapper', :iterator do
9
+ url({ xpath: ".//@data-href" })
10
+ titre({ xpath: ".//text()" })
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,9 @@
1
+ module JobsCrawler::Robots
2
+ class Senjob
3
+ include Wombat::Crawler
4
+
5
+ description "css=#articlebi .preview"
6
+ reference "xpath=//html/body/div[3]/table/tbody/tr[5]/td[2]"
7
+ deadline "css=body > div:nth-child(16) > table > tbody > tr:nth-child(5) > td:nth-child(2)"
8
+ end
9
+ end
@@ -0,0 +1,12 @@
1
+ module JobsCrawler::Robots
2
+ require 'jobs_crawler/robots/senjob'
3
+ require 'jobs_crawler/robots/atoo_job'
4
+ require 'jobs_crawler/robots/emploi_senegal'
5
+ require 'jobs_crawler/robots/pages_jaunes_senegal'
6
+
7
+ class << self
8
+ def crawl_atoo_job(url)
9
+ JobsCrawler::Robots::AtooJob.new(url).crawl
10
+ end
11
+ end
12
+ end
@@ -1,3 +1,3 @@
1
1
  module JobsCrawler
2
- VERSION = "0.1.3"
2
+ VERSION = "0.1.4"
3
3
  end
data/lib/jobs_crawler.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require "wombat"
2
2
  require "jobs_crawler/version"
3
3
  require "jobs_crawler/indexers"
4
+ require "jobs_crawler/robots"
4
5
  require "jobs_crawler/indexers/senjob"
5
6
  require "jobs_crawler/indexers/atoo_job"
6
7
  require "jobs_crawler/indexers/emploi_senegal"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jobs_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pathe
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-04-02 00:00:00.000000000 Z
11
+ date: 2019-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -88,9 +88,15 @@ files:
88
88
  - jobs_crawler.gemspec
89
89
  - lib/jobs_crawler.rb
90
90
  - lib/jobs_crawler/indexers.rb
91
+ - lib/jobs_crawler/indexers/atoo_job.rb
91
92
  - lib/jobs_crawler/indexers/emploi_senegal.rb
92
93
  - lib/jobs_crawler/indexers/pages_jaunes_senegal.rb
93
94
  - lib/jobs_crawler/indexers/senjob.rb
95
+ - lib/jobs_crawler/robots.rb
96
+ - lib/jobs_crawler/robots/atoo_job.rb
97
+ - lib/jobs_crawler/robots/emploi_senegal.rb
98
+ - lib/jobs_crawler/robots/pages_jaunes_senegal.rb
99
+ - lib/jobs_crawler/robots/senjob.rb
94
100
  - lib/jobs_crawler/version.rb
95
101
  homepage: http://taag-at.com
96
102
  licenses: