remote_job_scraper 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6629f970e902f9ec6789b5574156730e7c9066a0
4
- data.tar.gz: 6b4eca4e512e432f4c4e642da57253e4756d0290
3
+ metadata.gz: 8f12102d94abaccc92c800bb21d459fddce99380
4
+ data.tar.gz: 2af9bb36f83a766ab3ce90b3a9473b4cb1166683
5
5
  SHA512:
6
- metadata.gz: 88559d103a0b1e5d70185641684e05f221d45fca0ea146b15324d34cc32c0ae40f42473f5564cc215410675f4b532e4a7a158fc659f73bfb6fb8c2d16208da11
7
- data.tar.gz: f6e2e97da63b78a378200be0925221d80e569f4b1c3573cfa9928c3910332e5be834dc89bb4df0803c280b1a7ccff36e60519778d9c709295fe9ed1fa4612ed6
6
+ metadata.gz: af613f9bff539e13a3585d80801e1983792e6d2125e0322526c411ee3a58ad8eb9ea955e88b07847e51a151e2eb072a7e4697b2141111ccadf5de047ad81bf46
7
+ data.tar.gz: a49fb2f1e599c3df9d49aa4c5be63d3930b43873c11a33465ad447fdc0fe7c7f33030c1c1f7d73fae0faf49ef7fa2fe5205262a9186c8260eec468265486a0fe
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.4.1
1
+ 2.4.4
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- remote_job_scraper (0.4.3)
4
+ remote_job_scraper (0.5.0)
5
5
  nokogiri
6
6
  spreadsheet
7
7
  thor
@@ -36,7 +36,7 @@ GEM
36
36
  rspec-support (3.8.0)
37
37
  ruby-ole (1.2.12.1)
38
38
  safe_yaml (1.0.4)
39
- spreadsheet (1.1.8)
39
+ spreadsheet (1.2.0)
40
40
  ruby-ole (>= 1.0)
41
41
  thor (0.20.3)
42
42
  vcr (4.0.0)
@@ -58,4 +58,4 @@ DEPENDENCIES
58
58
  webmock
59
59
 
60
60
  BUNDLED WITH
61
- 1.16.5
61
+ 1.17.2
@@ -5,6 +5,14 @@ module RemoteJobScraper
5
5
 
6
6
  AVAILABLE_SITES = %w(we_work_remotely remote_ok 42jobs_rails)
7
7
 
8
+ desc 'collect_companies',
9
+ "Retrieves remote companies
10
+ [Example]: remote_job_scraper collect_companies
11
+ "
12
+ def collect_companies
13
+ Sites::GithubRemoteJobs.new.collect_companies
14
+ end
15
+
8
16
  desc 'collect_jobs LIMIT DELAY',
9
17
  "Retrieves data from #{AVAILABLE_SITES.join(', ')}.
10
18
  [Example]: remote_job_scraper collect_jobs 10 9.0..10.0
@@ -70,6 +78,8 @@ module RemoteJobScraper
70
78
 
71
79
  FileUtils.rm_rf(dirname)
72
80
  puts "Removed data in #{Dir.pwd}/#{dirname}."
81
+ rescue Interrupt => e
82
+ exit
73
83
  end
74
84
  end
75
85
  end
@@ -1,3 +1,3 @@
1
1
  module RemoteJobScraper
2
- VERSION = "0.4.4"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -5,6 +5,7 @@ require 'remote_job_scraper/cli'
5
5
  require 'sites/we_work_remotely'
6
6
  require 'sites/remote_ok'
7
7
  require 'sites/jobs_rails42'
8
+ require 'sites/github_remote_jobs'
8
9
 
9
10
  require 'support/offer_parser'
10
11
  require 'support/user_agent'
@@ -0,0 +1,38 @@
1
+ module Sites
2
+ class GithubRemoteJobs < Base
3
+
4
+ HOST = 'http://github.com/'.freeze
5
+ PATH = 'remoteintech/remote-jobs'
6
+ JOB_ITEM_SELECTOR = '.entry-content table tbody tr'.freeze
7
+ STORE_DIR = 'data/github_remote_jobs'.freeze
8
+
9
+ def initialize()
10
+ @url = "#{self.class::HOST}#{self.class::PATH}"
11
+ @current_time = Time.now
12
+ @timestamp = @current_time.strftime("%Y%m%d%H%M%S")
13
+ @doc = Nokogiri::HTML(open_page(@url))
14
+ @rows_count = 0
15
+ end
16
+
17
+ def collect_companies
18
+ puts "[Info] Getting the data from #{url}"
19
+ FileUtils.mkdir_p STORE_DIR
20
+
21
+ CSV.open(filepath, 'w') do |csv|
22
+ doc.css(JOB_ITEM_SELECTOR).each do |tr|
23
+ name = tr.search('td')[0].text
24
+ website = tr.search('td')[1].text
25
+ region = tr.search('td')[2].text
26
+ csv << [name, website, region]
27
+ @rows_count += 1
28
+ end
29
+ end
30
+ puts "[Done] Collected #{@rows_count} job offers from #{url}. Data stored in: #{filepath}."
31
+ end
32
+
33
+ def companies_count
34
+ @rows_count
35
+ end
36
+
37
+ end
38
+ end
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_job_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rafał Trojanowski
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-01-30 00:00:00.000000000 Z
11
+ date: 2019-06-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -167,6 +167,7 @@ files:
167
167
  - lib/remote_job_scraper/version.rb
168
168
  - lib/sites/base.rb
169
169
  - lib/sites/elixir_radar.rb
170
+ - lib/sites/github_remote_jobs.rb
170
171
  - lib/sites/jobs_rails42.rb
171
172
  - lib/sites/rails_jobs.rb
172
173
  - lib/sites/remote_ok.rb
@@ -174,6 +175,7 @@ files:
174
175
  - lib/support/offer_parser.rb
175
176
  - lib/support/spreadsheet_creator.rb
176
177
  - lib/support/user_agent.rb
178
+ - remote_job_scraper-0.4.4.gem
177
179
  - remote_job_scraper.gemspec
178
180
  homepage: https://github.com/rafaltrojanowski/remote_job_scraper
179
181
  licenses:
@@ -196,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
196
198
  version: '0'
197
199
  requirements: []
198
200
  rubyforge_project:
199
- rubygems_version: 2.6.11
201
+ rubygems_version: 2.6.14.1
200
202
  signing_key:
201
203
  specification_version: 4
202
204
  summary: Ruby gem that collects job offers for remote positions with ease.