remote_job_scraper 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Gemfile.lock +3 -3
- data/lib/remote_job_scraper/cli.rb +10 -0
- data/lib/remote_job_scraper/version.rb +1 -1
- data/lib/remote_job_scraper.rb +1 -0
- data/lib/sites/github_remote_jobs.rb +38 -0
- data/remote_job_scraper-0.4.4.gem +0 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8f12102d94abaccc92c800bb21d459fddce99380
|
4
|
+
data.tar.gz: 2af9bb36f83a766ab3ce90b3a9473b4cb1166683
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af613f9bff539e13a3585d80801e1983792e6d2125e0322526c411ee3a58ad8eb9ea955e88b07847e51a151e2eb072a7e4697b2141111ccadf5de047ad81bf46
|
7
|
+
data.tar.gz: a49fb2f1e599c3df9d49aa4c5be63d3930b43873c11a33465ad447fdc0fe7c7f33030c1c1f7d73fae0faf49ef7fa2fe5205262a9186c8260eec468265486a0fe
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.4.
|
1
|
+
2.4.4
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
remote_job_scraper (0.
|
4
|
+
remote_job_scraper (0.5.0)
|
5
5
|
nokogiri
|
6
6
|
spreadsheet
|
7
7
|
thor
|
@@ -36,7 +36,7 @@ GEM
|
|
36
36
|
rspec-support (3.8.0)
|
37
37
|
ruby-ole (1.2.12.1)
|
38
38
|
safe_yaml (1.0.4)
|
39
|
-
spreadsheet (1.
|
39
|
+
spreadsheet (1.2.0)
|
40
40
|
ruby-ole (>= 1.0)
|
41
41
|
thor (0.20.3)
|
42
42
|
vcr (4.0.0)
|
@@ -58,4 +58,4 @@ DEPENDENCIES
|
|
58
58
|
webmock
|
59
59
|
|
60
60
|
BUNDLED WITH
|
61
|
-
1.
|
61
|
+
1.17.2
|
@@ -5,6 +5,14 @@ module RemoteJobScraper
|
|
5
5
|
|
6
6
|
AVAILABLE_SITES = %w(we_work_remotely remote_ok 42jobs_rails)
|
7
7
|
|
8
|
+
desc 'collect_companies',
|
9
|
+
"Retrieves remote companies
|
10
|
+
[Example]: remote_job_scraper collect_companies
|
11
|
+
"
|
12
|
+
def collect_companies
|
13
|
+
Sites::GithubRemoteJobs.new.collect_companies
|
14
|
+
end
|
15
|
+
|
8
16
|
desc 'collect_jobs LIMIT DELAY',
|
9
17
|
"Retrieves data from #{AVAILABLE_SITES.join(', ')}.
|
10
18
|
[Example]: remote_job_scraper collect_jobs 10 9.0..10.0
|
@@ -70,6 +78,8 @@ module RemoteJobScraper
|
|
70
78
|
|
71
79
|
FileUtils.rm_rf(dirname)
|
72
80
|
puts "Removed data in #{Dir.pwd}/#{dirname}."
|
81
|
+
rescue Interrupt => e
|
82
|
+
exit
|
73
83
|
end
|
74
84
|
end
|
75
85
|
end
|
data/lib/remote_job_scraper.rb
CHANGED
@@ -0,0 +1,38 @@
|
|
1
|
+
module Sites
|
2
|
+
class GithubRemoteJobs < Base
|
3
|
+
|
4
|
+
HOST = 'http://github.com/'.freeze
|
5
|
+
PATH = 'remoteintech/remote-jobs'
|
6
|
+
JOB_ITEM_SELECTOR = '.entry-content table tbody tr'.freeze
|
7
|
+
STORE_DIR = 'data/github_remote_jobs'.freeze
|
8
|
+
|
9
|
+
def initialize()
|
10
|
+
@url = "#{self.class::HOST}#{self.class::PATH}"
|
11
|
+
@current_time = Time.now
|
12
|
+
@timestamp = @current_time.strftime("%Y%m%d%H%M%S")
|
13
|
+
@doc = Nokogiri::HTML(open_page(@url))
|
14
|
+
@rows_count = 0
|
15
|
+
end
|
16
|
+
|
17
|
+
def collect_companies
|
18
|
+
puts "[Info] Getting the data from #{url}"
|
19
|
+
FileUtils.mkdir_p STORE_DIR
|
20
|
+
|
21
|
+
CSV.open(filepath, 'w') do |csv|
|
22
|
+
doc.css(JOB_ITEM_SELECTOR).each do |tr|
|
23
|
+
name = tr.search('td')[0].text
|
24
|
+
website = tr.search('td')[1].text
|
25
|
+
region = tr.search('td')[2].text
|
26
|
+
csv << [name, website, region]
|
27
|
+
@rows_count += 1
|
28
|
+
end
|
29
|
+
end
|
30
|
+
puts "[Done] Collected #{@rows_count} job offers from #{url}. Data stored in: #{filepath}."
|
31
|
+
end
|
32
|
+
|
33
|
+
def companies_count
|
34
|
+
@rows_count
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_job_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rafał Trojanowski
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-06-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -167,6 +167,7 @@ files:
|
|
167
167
|
- lib/remote_job_scraper/version.rb
|
168
168
|
- lib/sites/base.rb
|
169
169
|
- lib/sites/elixir_radar.rb
|
170
|
+
- lib/sites/github_remote_jobs.rb
|
170
171
|
- lib/sites/jobs_rails42.rb
|
171
172
|
- lib/sites/rails_jobs.rb
|
172
173
|
- lib/sites/remote_ok.rb
|
@@ -174,6 +175,7 @@ files:
|
|
174
175
|
- lib/support/offer_parser.rb
|
175
176
|
- lib/support/spreadsheet_creator.rb
|
176
177
|
- lib/support/user_agent.rb
|
178
|
+
- remote_job_scraper-0.4.4.gem
|
177
179
|
- remote_job_scraper.gemspec
|
178
180
|
homepage: https://github.com/rafaltrojanowski/remote_job_scraper
|
179
181
|
licenses:
|
@@ -196,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
196
198
|
version: '0'
|
197
199
|
requirements: []
|
198
200
|
rubyforge_project:
|
199
|
-
rubygems_version: 2.6.
|
201
|
+
rubygems_version: 2.6.14.1
|
200
202
|
signing_key:
|
201
203
|
specification_version: 4
|
202
204
|
summary: Ruby gem that collects job offers for remote positions with ease.
|