remote_job_scraper 0.4.4 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Gemfile.lock +3 -3
- data/lib/remote_job_scraper/cli.rb +10 -0
- data/lib/remote_job_scraper/version.rb +1 -1
- data/lib/remote_job_scraper.rb +1 -0
- data/lib/sites/github_remote_jobs.rb +38 -0
- data/remote_job_scraper-0.4.4.gem +0 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8f12102d94abaccc92c800bb21d459fddce99380
|
4
|
+
data.tar.gz: 2af9bb36f83a766ab3ce90b3a9473b4cb1166683
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af613f9bff539e13a3585d80801e1983792e6d2125e0322526c411ee3a58ad8eb9ea955e88b07847e51a151e2eb072a7e4697b2141111ccadf5de047ad81bf46
|
7
|
+
data.tar.gz: a49fb2f1e599c3df9d49aa4c5be63d3930b43873c11a33465ad447fdc0fe7c7f33030c1c1f7d73fae0faf49ef7fa2fe5205262a9186c8260eec468265486a0fe
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.4.
|
1
|
+
2.4.4
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
remote_job_scraper (0.
|
4
|
+
remote_job_scraper (0.5.0)
|
5
5
|
nokogiri
|
6
6
|
spreadsheet
|
7
7
|
thor
|
@@ -36,7 +36,7 @@ GEM
|
|
36
36
|
rspec-support (3.8.0)
|
37
37
|
ruby-ole (1.2.12.1)
|
38
38
|
safe_yaml (1.0.4)
|
39
|
-
spreadsheet (1.
|
39
|
+
spreadsheet (1.2.0)
|
40
40
|
ruby-ole (>= 1.0)
|
41
41
|
thor (0.20.3)
|
42
42
|
vcr (4.0.0)
|
@@ -58,4 +58,4 @@ DEPENDENCIES
|
|
58
58
|
webmock
|
59
59
|
|
60
60
|
BUNDLED WITH
|
61
|
-
1.
|
61
|
+
1.17.2
|
@@ -5,6 +5,14 @@ module RemoteJobScraper
|
|
5
5
|
|
6
6
|
AVAILABLE_SITES = %w(we_work_remotely remote_ok 42jobs_rails)
|
7
7
|
|
8
|
+
desc 'collect_companies',
|
9
|
+
"Retrieves remote companies
|
10
|
+
[Example]: remote_job_scraper collect_companies
|
11
|
+
"
|
12
|
+
def collect_companies
|
13
|
+
Sites::GithubRemoteJobs.new.collect_companies
|
14
|
+
end
|
15
|
+
|
8
16
|
desc 'collect_jobs LIMIT DELAY',
|
9
17
|
"Retrieves data from #{AVAILABLE_SITES.join(', ')}.
|
10
18
|
[Example]: remote_job_scraper collect_jobs 10 9.0..10.0
|
@@ -70,6 +78,8 @@ module RemoteJobScraper
|
|
70
78
|
|
71
79
|
FileUtils.rm_rf(dirname)
|
72
80
|
puts "Removed data in #{Dir.pwd}/#{dirname}."
|
81
|
+
rescue Interrupt => e
|
82
|
+
exit
|
73
83
|
end
|
74
84
|
end
|
75
85
|
end
|
data/lib/remote_job_scraper.rb
CHANGED
@@ -0,0 +1,38 @@
|
|
1
|
+
module Sites
|
2
|
+
class GithubRemoteJobs < Base
|
3
|
+
|
4
|
+
HOST = 'http://github.com/'.freeze
|
5
|
+
PATH = 'remoteintech/remote-jobs'
|
6
|
+
JOB_ITEM_SELECTOR = '.entry-content table tbody tr'.freeze
|
7
|
+
STORE_DIR = 'data/github_remote_jobs'.freeze
|
8
|
+
|
9
|
+
def initialize()
|
10
|
+
@url = "#{self.class::HOST}#{self.class::PATH}"
|
11
|
+
@current_time = Time.now
|
12
|
+
@timestamp = @current_time.strftime("%Y%m%d%H%M%S")
|
13
|
+
@doc = Nokogiri::HTML(open_page(@url))
|
14
|
+
@rows_count = 0
|
15
|
+
end
|
16
|
+
|
17
|
+
def collect_companies
|
18
|
+
puts "[Info] Getting the data from #{url}"
|
19
|
+
FileUtils.mkdir_p STORE_DIR
|
20
|
+
|
21
|
+
CSV.open(filepath, 'w') do |csv|
|
22
|
+
doc.css(JOB_ITEM_SELECTOR).each do |tr|
|
23
|
+
name = tr.search('td')[0].text
|
24
|
+
website = tr.search('td')[1].text
|
25
|
+
region = tr.search('td')[2].text
|
26
|
+
csv << [name, website, region]
|
27
|
+
@rows_count += 1
|
28
|
+
end
|
29
|
+
end
|
30
|
+
puts "[Done] Collected #{@rows_count} job offers from #{url}. Data stored in: #{filepath}."
|
31
|
+
end
|
32
|
+
|
33
|
+
def companies_count
|
34
|
+
@rows_count
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_job_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rafał Trojanowski
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-06-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -167,6 +167,7 @@ files:
|
|
167
167
|
- lib/remote_job_scraper/version.rb
|
168
168
|
- lib/sites/base.rb
|
169
169
|
- lib/sites/elixir_radar.rb
|
170
|
+
- lib/sites/github_remote_jobs.rb
|
170
171
|
- lib/sites/jobs_rails42.rb
|
171
172
|
- lib/sites/rails_jobs.rb
|
172
173
|
- lib/sites/remote_ok.rb
|
@@ -174,6 +175,7 @@ files:
|
|
174
175
|
- lib/support/offer_parser.rb
|
175
176
|
- lib/support/spreadsheet_creator.rb
|
176
177
|
- lib/support/user_agent.rb
|
178
|
+
- remote_job_scraper-0.4.4.gem
|
177
179
|
- remote_job_scraper.gemspec
|
178
180
|
homepage: https://github.com/rafaltrojanowski/remote_job_scraper
|
179
181
|
licenses:
|
@@ -196,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
196
198
|
version: '0'
|
197
199
|
requirements: []
|
198
200
|
rubyforge_project:
|
199
|
-
rubygems_version: 2.6.
|
201
|
+
rubygems_version: 2.6.14.1
|
200
202
|
signing_key:
|
201
203
|
specification_version: 4
|
202
204
|
summary: Ruby gem that collects job offers for remote positions with ease.
|