remote_job_scraper 0.4.2 → 0.4.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +17 -1
- data/lib/remote_job_scraper.rb +13 -39
- data/lib/remote_job_scraper/cli.rb +75 -0
- data/lib/remote_job_scraper/configuration.rb +10 -0
- data/lib/remote_job_scraper/version.rb +1 -1
- data/lib/sites/base.rb +14 -25
- data/lib/sites/elixir_radar.rb +1 -1
- data/lib/sites/jobs_rails42.rb +39 -30
- data/lib/sites/remote_ok.rb +26 -16
- data/lib/sites/we_work_remotely.rb +27 -18
- data/lib/support/offer_parser.rb +4 -6
- data/lib/support/spreadsheet_creator.rb +2 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6629f970e902f9ec6789b5574156730e7c9066a0
|
4
|
+
data.tar.gz: 6b4eca4e512e432f4c4e642da57253e4756d0290
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 88559d103a0b1e5d70185641684e05f221d45fca0ea146b15324d34cc32c0ae40f42473f5564cc215410675f4b532e4a7a158fc659f73bfb6fb8c2d16208da11
|
7
|
+
data.tar.gz: f6e2e97da63b78a378200be0925221d80e569f4b1c3573cfa9928c3910332e5be834dc89bb4df0803c280b1a7ccff36e60519778d9c709295fe9ed1fa4612ed6
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -2,8 +2,9 @@
|
|
2
2
|
|
3
3
|
Ruby gem that collects job offers for remote positions with ease.
|
4
4
|
|
5
|
-
Going through many job listings and finding the right one may be a time-consuming process. That's why this tool has been built. It allows to automate the process, retrieve necessary data and store it in CSV/Excel file in just a few minutes. The main focus is to inform
|
5
|
+
Going through many job listings and finding the right one may be a time-consuming process. That's why this tool has been built. It allows to automate the process, retrieve necessary data and store it in CSV/Excel file in just a few minutes. The main focus is to inform an user about the location (time-zone) required for a position.
|
6
6
|
|
7
|
+
![screenshot](http://i67.tinypic.com/2ewfj3a.png)
|
7
8
|
|
8
9
|
## Installation
|
9
10
|
|
@@ -18,6 +19,21 @@ Going through many job listings and finding the right one may be a time-consumin
|
|
18
19
|
* [x] 2.4.1
|
19
20
|
* [ ] 2.0.0 (https://github.com/rafaltrojanowski/remote_job_scraper/issues/1)
|
20
21
|
|
22
|
+
## Running test
|
23
|
+
|
24
|
+
Running test:
|
25
|
+
|
26
|
+
```
|
27
|
+
$ rspec spec
|
28
|
+
```
|
29
|
+
|
30
|
+
A few tests run very slow because they parse a huge amount of pages.
|
31
|
+
You can skip slow tests by running:
|
32
|
+
|
33
|
+
```
|
34
|
+
$ rspec . --tag ~speed:slow
|
35
|
+
```
|
36
|
+
|
21
37
|
## Development
|
22
38
|
|
23
39
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/remote_job_scraper.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require 'remote_job_scraper/version'
|
2
|
+
require 'remote_job_scraper/configuration'
|
3
|
+
require 'remote_job_scraper/cli'
|
2
4
|
|
3
5
|
require 'sites/we_work_remotely'
|
4
6
|
require 'sites/remote_ok'
|
@@ -11,51 +13,23 @@ require 'support/spreadsheet_creator'
|
|
11
13
|
require 'nokogiri'
|
12
14
|
require 'open-uri'
|
13
15
|
require 'csv'
|
14
|
-
require "thor"
|
15
16
|
|
16
17
|
module RemoteJobScraper
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
desc 'collect_jobs', "Retrieves data from #{AVAILABLE_SITES.join(', ')}"
|
23
|
-
def collect_jobs
|
24
|
-
[Sites::WeWorkRemotely, Sites::RemoteOk].each do |klass|
|
25
|
-
klass.new.collect_jobs
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
desc 'collect_jobs_from SITE', "Retrieves data from SITE, e.g. #{AVAILABLE_SITES.sample}"
|
30
|
-
def collect_jobs_from(site)
|
31
|
-
case site
|
32
|
-
when 'we_work_remotely'
|
33
|
-
then Sites::WeWorkRemotely.new.collect_jobs
|
34
|
-
when 'remote_ok'
|
35
|
-
then Sites::RemoteOk.new.collect_jobs
|
36
|
-
when '42jobs_rails'
|
37
|
-
then Sites::JobsRails42.new.collect_jobs
|
38
|
-
else
|
39
|
-
raise "#{site} is not correct. Use: #{AVAILABLE_SITES.join(', ')}."
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
desc 'generate_summary', "Merges data from #{AVAILABLE_SITES.join(', ')} and exports to XLS file"
|
44
|
-
def generate_summary
|
45
|
-
Support::SpreadsheetCreator.generate
|
46
|
-
end
|
19
|
+
class << self
|
20
|
+
attr_accessor :configuration
|
21
|
+
end
|
47
22
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
puts "This command will remove #{Dir.pwd}/#{dirname} permanently"
|
52
|
-
puts "Press Ctrl-C to abort."
|
23
|
+
def self.configuration
|
24
|
+
@configuration ||= Configuration.new
|
25
|
+
end
|
53
26
|
|
54
|
-
|
27
|
+
def self.reset
|
28
|
+
@configuration = Configuration.new
|
29
|
+
end
|
55
30
|
|
56
|
-
|
57
|
-
|
58
|
-
end
|
31
|
+
def self.configure
|
32
|
+
yield(configuration)
|
59
33
|
end
|
60
34
|
|
61
35
|
def self.root
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require "thor"
|
2
|
+
|
3
|
+
module RemoteJobScraper
|
4
|
+
class CLI < Thor
|
5
|
+
|
6
|
+
AVAILABLE_SITES = %w(we_work_remotely remote_ok 42jobs_rails)
|
7
|
+
|
8
|
+
desc 'collect_jobs LIMIT DELAY',
|
9
|
+
"Retrieves data from #{AVAILABLE_SITES.join(', ')}.
|
10
|
+
[Example]: remote_job_scraper collect_jobs 10 9.0..10.0
|
11
|
+
"
|
12
|
+
def collect_jobs(limit = nil, delay = nil)
|
13
|
+
limit = limit.to_i
|
14
|
+
limit = limit.zero? ? nil : limit
|
15
|
+
|
16
|
+
begin
|
17
|
+
unless delay.nil?
|
18
|
+
arr = delay.split('..').map{ |d| Float(d) }
|
19
|
+
range = arr[0]..arr[1]
|
20
|
+
RemoteJobScraper.configuration.delay_range = range
|
21
|
+
end
|
22
|
+
rescue
|
23
|
+
raise "Passed: DELAY=#{range} DELAY need to be in format: 2.0..5.0 "
|
24
|
+
end
|
25
|
+
|
26
|
+
[
|
27
|
+
Sites::WeWorkRemotely,
|
28
|
+
Sites::RemoteOk,
|
29
|
+
Sites::JobsRails42
|
30
|
+
].each do |klass|
|
31
|
+
klass.new.collect_jobs(limit: limit)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
desc 'collect_jobs_from SITE LIMIT',
|
36
|
+
"Retrieves data from SITE with LIMIT, e.g. #{AVAILABLE_SITES.sample}
|
37
|
+
[Example]: remote_job_scraper collect_jobs_from remote_ok 10
|
38
|
+
"
|
39
|
+
def collect_jobs_from(site, limit=nil)
|
40
|
+
limit = limit.to_i
|
41
|
+
limit = limit.zero? ? nil : limit
|
42
|
+
|
43
|
+
case site
|
44
|
+
when 'we_work_remotely'
|
45
|
+
then Sites::WeWorkRemotely.new.collect_jobs(limit: limit)
|
46
|
+
when 'remote_ok'
|
47
|
+
then Sites::RemoteOk.new.collect_jobs(limit: limit)
|
48
|
+
when '42jobs_rails'
|
49
|
+
then Sites::JobsRails42.new.collect_jobs(limit: limit)
|
50
|
+
else
|
51
|
+
raise "#{site} is not correct. Use: #{AVAILABLE_SITES.join(', ')}."
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
desc 'generate_summary',
|
56
|
+
"Merges data from #{AVAILABLE_SITES.join(', ')} and exports to
|
57
|
+
separate sheets in XLS file.
|
58
|
+
"
|
59
|
+
def generate_summary
|
60
|
+
Support::SpreadsheetCreator.generate
|
61
|
+
end
|
62
|
+
|
63
|
+
desc 'remove DIRNAME', "Removes DIRNAME (default: 'data'). Use carefully."
|
64
|
+
def remove(dirname = 'data')
|
65
|
+
puts "[Warning!]"
|
66
|
+
puts "This command will remove #{Dir.pwd}/#{dirname} permanently"
|
67
|
+
puts "Press Ctrl-C to abort."
|
68
|
+
|
69
|
+
sleep 3
|
70
|
+
|
71
|
+
FileUtils.rm_rf(dirname)
|
72
|
+
puts "Removed data in #{Dir.pwd}/#{dirname}."
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/lib/sites/base.rb
CHANGED
@@ -1,44 +1,33 @@
|
|
1
1
|
module Sites
|
2
2
|
class Base
|
3
3
|
|
4
|
-
attr_reader :
|
4
|
+
attr_reader :doc, :url, :rows_count, :jobs_count
|
5
5
|
|
6
|
-
def initialize
|
7
|
-
@
|
8
|
-
@url = build_url
|
6
|
+
def initialize
|
7
|
+
@url = "#{self.class::HOST}#{self.class::PATH}"
|
9
8
|
@doc = Nokogiri::HTML(open_page(@url))
|
10
|
-
@current_time = Time.
|
9
|
+
@current_time = Time.now
|
11
10
|
@timestamp = @current_time.strftime("%Y%m%d%H%M%S")
|
12
|
-
@
|
11
|
+
@rows_count = 0
|
12
|
+
@jobs_count = get_jobs_count
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
sleep(rand(0..2.0)) unless ENV['RAILS_ENV'] == 'test' # less mechanical behaviour
|
15
|
+
private
|
17
16
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
end
|
17
|
+
def open_page(url)
|
18
|
+
sleep(rand(delay_range)) unless ENV['RAILS_ENV'] == 'test' # less mechanical behaviour
|
19
|
+
options = ENV['RAILS_ENV'] == 'test' ? {} : { 'User-Agent' => user_agent }
|
20
|
+
open(url, options)
|
23
21
|
end
|
24
22
|
|
25
|
-
|
23
|
+
def delay_range
|
24
|
+
RemoteJobScraper.configuration.delay_range
|
25
|
+
end
|
26
26
|
|
27
27
|
def user_agent
|
28
28
|
Support::UserAgent::LIST.sample
|
29
29
|
end
|
30
30
|
|
31
|
-
def build_url
|
32
|
-
case job_type
|
33
|
-
when :programming
|
34
|
-
then "#{self.class::HOST}#{self.class::PROGRAMMING}"
|
35
|
-
when :devops
|
36
|
-
then "#{self.class::HOST}#{self.class::DEVOPS}"
|
37
|
-
else
|
38
|
-
raise "Error"
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
31
|
def filepath
|
43
32
|
return test_filepath if ENV["RAILS_ENV"] == 'test'
|
44
33
|
"#{self.class::STORE_DIR}/#{@timestamp}.csv"
|
data/lib/sites/elixir_radar.rb
CHANGED
data/lib/sites/jobs_rails42.rb
CHANGED
@@ -1,62 +1,71 @@
|
|
1
1
|
module Sites
|
2
2
|
class JobsRails42 < Base
|
3
3
|
|
4
|
-
# @
|
5
|
-
# to grab more offers than just first page (25 items)
|
6
|
-
|
7
|
-
# I had to rename this class because we are not allowed to have numbers
|
4
|
+
# @NOTE: I had to rename this class because we are not allowed to have numbers
|
8
5
|
# on the beginning of the class name (42JobsRails won't work).
|
6
|
+
# file paths follow this convention
|
9
7
|
|
10
8
|
HOST = 'https://www.42jobs.io'.freeze
|
11
|
-
|
9
|
+
PATH = '/rails/jobs-remote'.freeze
|
12
10
|
JOB_ITEM_SELECTOR = 'li.job-offers__item a'.freeze
|
13
11
|
STORE_DIR = 'data/jobs_rails42'.freeze
|
14
12
|
|
15
|
-
def initialize
|
16
|
-
@
|
17
|
-
@
|
18
|
-
@doc = nil
|
19
|
-
@current_time = Time.new
|
13
|
+
def initialize
|
14
|
+
@url = "#{self.class::HOST}#{self.class::PATH}"
|
15
|
+
@current_time = Time.now
|
20
16
|
@timestamp = @current_time.strftime("%Y%m%d%H%M%S")
|
21
|
-
@
|
22
|
-
@
|
17
|
+
@doc = nil
|
18
|
+
@total_pages = 4
|
19
|
+
@rows_count = 0
|
20
|
+
@jobs_count = get_jobs_count
|
23
21
|
end
|
24
22
|
|
25
|
-
def collect_jobs
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
process_page(
|
23
|
+
def collect_jobs(limit: nil)
|
24
|
+
FileUtils.mkdir_p STORE_DIR
|
25
|
+
|
26
|
+
(1..@total_pages).each do |page|
|
27
|
+
process_page(page: page, limit: limit)
|
30
28
|
end
|
31
29
|
end
|
32
30
|
|
33
31
|
private
|
34
32
|
|
35
|
-
def process_page(
|
36
|
-
|
37
|
-
|
33
|
+
def process_page(page:, limit:)
|
34
|
+
current_page = "#{@url}?page=#{page}"
|
35
|
+
doc = Nokogiri::HTML(open_page(current_page))
|
36
|
+
puts "[Info] Getting the data from #{current_page}"
|
38
37
|
|
39
38
|
CSV.open(filepath, 'ab') do |csv|
|
40
39
|
doc.css(JOB_ITEM_SELECTOR).each do |link|
|
40
|
+
return if limit == @rows_count
|
41
|
+
|
41
42
|
job_url = "#{HOST}#{link["href"]}"
|
42
|
-
puts "[Info]
|
43
|
-
job_page = Nokogiri::HTML(open_page(job_url))
|
44
|
-
offer_text = job_page.css('.job-offer__description').to_s
|
43
|
+
puts "[Info] Parsing #{job_url}..."
|
45
44
|
|
46
|
-
|
47
|
-
keywords = Support::OfferParser.get_keywords(offer_text)
|
45
|
+
csv << get_row(job_url)
|
48
46
|
|
49
|
-
|
47
|
+
@rows_count += 1
|
50
48
|
end
|
51
49
|
end
|
52
50
|
|
53
|
-
puts "[Done] Collected #{@
|
51
|
+
puts "[Done] Collected #{@jobs_count} job offers from #{url}. Data stored in: #{filepath}." if page == @total_pages
|
54
52
|
end
|
55
53
|
|
56
|
-
|
54
|
+
def get_row(job_url)
|
55
|
+
job_page = Nokogiri::HTML(open_page(job_url))
|
56
|
+
offer_text = job_page.css('.job-offer__description').to_s
|
57
|
+
|
58
|
+
location = Support::OfferParser.get_location(offer_text)
|
59
|
+
keywords = Support::OfferParser.get_keywords(offer_text)
|
60
|
+
company = job_page.css('.job-offer__summary a').text
|
61
|
+
|
62
|
+
[job_url, location, keywords, company]
|
63
|
+
end
|
57
64
|
|
58
|
-
def
|
59
|
-
25 * @total_pages
|
65
|
+
def get_jobs_count
|
66
|
+
jobs_count = 25 * @total_pages
|
67
|
+
puts "[Info] There are #{jobs_count} remote jobs on [42JobsRails]."
|
68
|
+
jobs_count
|
60
69
|
end
|
61
70
|
end
|
62
71
|
end
|
data/lib/sites/remote_ok.rb
CHANGED
@@ -4,41 +4,51 @@ module Sites
|
|
4
4
|
class RemoteOk < Base
|
5
5
|
|
6
6
|
HOST = 'https://remoteok.io'.freeze
|
7
|
-
|
7
|
+
PATH = '/remote-dev-jobs'.freeze
|
8
8
|
JOB_ITEM_SELECTOR = 'tr.job'.freeze
|
9
9
|
STORE_DIR = 'data/remote_ok'.freeze
|
10
10
|
|
11
|
-
def initialize
|
12
|
-
super
|
11
|
+
def initialize
|
12
|
+
super
|
13
13
|
end
|
14
14
|
|
15
|
-
def collect_jobs
|
16
|
-
puts "[Info] Getting the data from #{url}
|
15
|
+
def collect_jobs(limit: nil)
|
16
|
+
puts "[Info] Getting the data from #{url}"
|
17
17
|
FileUtils.mkdir_p STORE_DIR
|
18
18
|
|
19
19
|
CSV.open(filepath, 'w') do |csv|
|
20
20
|
doc.css(JOB_ITEM_SELECTOR).each do |link|
|
21
|
+
return if limit == @rows_count
|
22
|
+
|
21
23
|
job_url = "#{HOST}#{link["data-url"]}"
|
22
|
-
puts "[Info]
|
23
|
-
job_page = Nokogiri::HTML(open_page(job_url))
|
24
|
-
offer_text = job_page.css('td.heading').to_s
|
24
|
+
puts "[Info] Parsing #{job_url}..."
|
25
25
|
|
26
|
-
|
27
|
-
keywords = Support::OfferParser.get_keywords(offer_text)
|
26
|
+
csv << get_row(job_url)
|
28
27
|
|
29
|
-
|
28
|
+
@rows_count += 1
|
30
29
|
end
|
31
30
|
end
|
32
31
|
|
33
|
-
puts "[Done] Collected #{@
|
32
|
+
puts "[Done] Collected #{@rows_count} job offers from #{url}. Data stored in: #{filepath}."
|
34
33
|
end
|
35
34
|
|
36
35
|
private
|
37
36
|
|
38
|
-
def
|
39
|
-
|
40
|
-
|
41
|
-
|
37
|
+
def get_row(job_url)
|
38
|
+
job_page = Nokogiri::HTML(open_page(job_url))
|
39
|
+
offer_text = job_page.css('td.heading').to_s
|
40
|
+
|
41
|
+
location = Support::OfferParser.get_location(offer_text)
|
42
|
+
keywords = Support::OfferParser.get_keywords(offer_text)
|
43
|
+
company = job_page.css('a.companyLink h3').text
|
44
|
+
|
45
|
+
[job_url, location, keywords, company]
|
46
|
+
end
|
47
|
+
|
48
|
+
def get_jobs_count
|
49
|
+
jobs_count = doc.css(JOB_ITEM_SELECTOR).map { |link| link['data-url'] }.size
|
50
|
+
puts "[Info] There are #{jobs_count} remote jobs on [RemoteOK]."
|
51
|
+
jobs_count
|
42
52
|
end
|
43
53
|
end
|
44
54
|
end
|
@@ -4,49 +4,58 @@ module Sites
|
|
4
4
|
class WeWorkRemotely < Base
|
5
5
|
|
6
6
|
HOST = 'https://weworkremotely.com'.freeze
|
7
|
-
|
7
|
+
PATH = '/categories/remote-programming-jobs'.freeze
|
8
8
|
DEVOPS = '/categories/remote-devops-sysadmin-jobs'.freeze
|
9
9
|
JOB_ITEM_SELECTOR = '.jobs-container li a'.freeze
|
10
10
|
STORE_DIR = 'data/we_work_remotely'
|
11
11
|
|
12
|
-
def initialize
|
13
|
-
super
|
12
|
+
def initialize
|
13
|
+
super
|
14
14
|
end
|
15
15
|
|
16
|
-
def collect_jobs
|
17
|
-
puts "[Info] Getting the data from #{url}
|
16
|
+
def collect_jobs(limit: nil)
|
17
|
+
puts "[Info] Getting the data from #{url}"
|
18
18
|
FileUtils.mkdir_p STORE_DIR
|
19
19
|
|
20
20
|
CSV.open(filepath, 'w') do |csv|
|
21
21
|
doc.css(JOB_ITEM_SELECTOR).each do |link|
|
22
22
|
if link["href"].start_with?("/remote-jobs")
|
23
|
-
|
24
|
-
puts "[Info] Processing #{job_url}..."
|
25
|
-
job_page = Nokogiri::HTML(open_page(job_url))
|
26
|
-
offer_text = job_page.css('.listing-container').to_s
|
23
|
+
return if limit == @rows_count
|
27
24
|
|
28
|
-
|
29
|
-
|
25
|
+
job_url = "#{HOST}#{link["href"]}"
|
26
|
+
puts "[Info] Parsing #{job_url}..."
|
30
27
|
|
31
|
-
|
28
|
+
csv << get_row(job_url)
|
32
29
|
|
33
|
-
|
30
|
+
@rows_count += 1
|
34
31
|
end
|
35
32
|
end
|
36
33
|
end
|
37
34
|
|
38
|
-
puts "[Done] Collected #{@
|
35
|
+
puts "[Done] Collected #{@rows_count} job offers from #{url}. Data stored in: #{filepath}."
|
39
36
|
end
|
40
37
|
|
41
38
|
private
|
42
39
|
|
43
|
-
def
|
44
|
-
|
40
|
+
def get_row(job_url)
|
41
|
+
job_page = Nokogiri::HTML(open_page(job_url))
|
42
|
+
offer_text = job_page.css('.listing-container').to_s
|
43
|
+
|
44
|
+
region = job_page.css('.listing-header-container span.region').first
|
45
|
+
location = job_page.css('.listing-header-container span.location').first
|
46
|
+
keywords = Support::OfferParser.get_keywords(offer_text)
|
47
|
+
company = job_page.css('.listing-header-container span.company').first
|
48
|
+
|
49
|
+
[job_url, location, region, keywords, company]
|
50
|
+
end
|
51
|
+
|
52
|
+
def get_jobs_count
|
53
|
+
jobs_count = doc.css(JOB_ITEM_SELECTOR)
|
45
54
|
.map { |link| link['href'] }
|
46
55
|
.select { |href| href.start_with?('/remote-jobs') }
|
47
56
|
.size
|
48
|
-
puts "[Info] There
|
49
|
-
|
57
|
+
puts "[Info] There are #{jobs_count} remote jobs on [WeWorkRemotely]."
|
58
|
+
jobs_count
|
50
59
|
end
|
51
60
|
end
|
52
61
|
end
|
data/lib/support/offer_parser.rb
CHANGED
@@ -19,11 +19,11 @@ module Support
|
|
19
19
|
indexes.each do |index|
|
20
20
|
next if index[0].nil?
|
21
21
|
|
22
|
-
locations << tokens[index[0] + 1]
|
23
|
-
locations << tokens[index[0] - 1]
|
22
|
+
locations << tokens[index[0] + 1] if index[1] == 'location'
|
23
|
+
locations << tokens[index[0] - 1..index[0] + 2] if index[1] == 'based'
|
24
24
|
end
|
25
25
|
|
26
|
-
locations.join('
|
26
|
+
locations.join(' ').capitalize
|
27
27
|
end
|
28
28
|
|
29
29
|
def self.get_keywords(content, keywords = KEYWORDS)
|
@@ -42,9 +42,7 @@ module Support
|
|
42
42
|
|
43
43
|
def self.get_tokens(content)
|
44
44
|
content
|
45
|
-
.gsub('
|
46
|
-
.gsub(',', '')
|
47
|
-
.gsub(':', '')
|
45
|
+
.gsub(/\W+/, ' ') # remove non letters
|
48
46
|
.downcase
|
49
47
|
.split(/[\s-]/)
|
50
48
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_job_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rafał Trojanowski
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-01-
|
11
|
+
date: 2019-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -162,6 +162,8 @@ files:
|
|
162
162
|
- bin/setup
|
163
163
|
- exe/remote_job_scraper
|
164
164
|
- lib/remote_job_scraper.rb
|
165
|
+
- lib/remote_job_scraper/cli.rb
|
166
|
+
- lib/remote_job_scraper/configuration.rb
|
165
167
|
- lib/remote_job_scraper/version.rb
|
166
168
|
- lib/sites/base.rb
|
167
169
|
- lib/sites/elixir_radar.rb
|