remote_job_scraper 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +17 -1
- data/lib/remote_job_scraper.rb +13 -39
- data/lib/remote_job_scraper/cli.rb +75 -0
- data/lib/remote_job_scraper/configuration.rb +10 -0
- data/lib/remote_job_scraper/version.rb +1 -1
- data/lib/sites/base.rb +14 -25
- data/lib/sites/elixir_radar.rb +1 -1
- data/lib/sites/jobs_rails42.rb +39 -30
- data/lib/sites/remote_ok.rb +26 -16
- data/lib/sites/we_work_remotely.rb +27 -18
- data/lib/support/offer_parser.rb +4 -6
- data/lib/support/spreadsheet_creator.rb +2 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6629f970e902f9ec6789b5574156730e7c9066a0
|
4
|
+
data.tar.gz: 6b4eca4e512e432f4c4e642da57253e4756d0290
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 88559d103a0b1e5d70185641684e05f221d45fca0ea146b15324d34cc32c0ae40f42473f5564cc215410675f4b532e4a7a158fc659f73bfb6fb8c2d16208da11
|
7
|
+
data.tar.gz: f6e2e97da63b78a378200be0925221d80e569f4b1c3573cfa9928c3910332e5be834dc89bb4df0803c280b1a7ccff36e60519778d9c709295fe9ed1fa4612ed6
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -2,8 +2,9 @@
|
|
2
2
|
|
3
3
|
Ruby gem that collects job offers for remote positions with ease.
|
4
4
|
|
5
|
-
Going through many job listings and finding the right one may be a time-consuming process. That's why this tool has been built. It allows to automate the process, retrieve necessary data and store it in CSV/Excel file in just a few minutes. The main focus is to inform
|
5
|
+
Going through many job listings and finding the right one may be a time-consuming process. That's why this tool has been built. It allows to automate the process, retrieve necessary data and store it in CSV/Excel file in just a few minutes. The main focus is to inform an user about the location (time-zone) required for a position.
|
6
6
|
|
7
|
+

|
7
8
|
|
8
9
|
## Installation
|
9
10
|
|
@@ -18,6 +19,21 @@ Going through many job listings and finding the right one may be a time-consumin
|
|
18
19
|
* [x] 2.4.1
|
19
20
|
* [ ] 2.0.0 (https://github.com/rafaltrojanowski/remote_job_scraper/issues/1)
|
20
21
|
|
22
|
+
## Running test
|
23
|
+
|
24
|
+
Running test:
|
25
|
+
|
26
|
+
```
|
27
|
+
$ rspec spec
|
28
|
+
```
|
29
|
+
|
30
|
+
A few tests run very slow because they parse a huge amount of pages.
|
31
|
+
You can skip slow tests by running:
|
32
|
+
|
33
|
+
```
|
34
|
+
$ rspec . --tag ~speed:slow
|
35
|
+
```
|
36
|
+
|
21
37
|
## Development
|
22
38
|
|
23
39
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/remote_job_scraper.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require 'remote_job_scraper/version'
|
2
|
+
require 'remote_job_scraper/configuration'
|
3
|
+
require 'remote_job_scraper/cli'
|
2
4
|
|
3
5
|
require 'sites/we_work_remotely'
|
4
6
|
require 'sites/remote_ok'
|
@@ -11,51 +13,23 @@ require 'support/spreadsheet_creator'
|
|
11
13
|
require 'nokogiri'
|
12
14
|
require 'open-uri'
|
13
15
|
require 'csv'
|
14
|
-
require "thor"
|
15
16
|
|
16
17
|
module RemoteJobScraper
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
desc 'collect_jobs', "Retrieves data from #{AVAILABLE_SITES.join(', ')}"
|
23
|
-
def collect_jobs
|
24
|
-
[Sites::WeWorkRemotely, Sites::RemoteOk].each do |klass|
|
25
|
-
klass.new.collect_jobs
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
desc 'collect_jobs_from SITE', "Retrieves data from SITE, e.g. #{AVAILABLE_SITES.sample}"
|
30
|
-
def collect_jobs_from(site)
|
31
|
-
case site
|
32
|
-
when 'we_work_remotely'
|
33
|
-
then Sites::WeWorkRemotely.new.collect_jobs
|
34
|
-
when 'remote_ok'
|
35
|
-
then Sites::RemoteOk.new.collect_jobs
|
36
|
-
when '42jobs_rails'
|
37
|
-
then Sites::JobsRails42.new.collect_jobs
|
38
|
-
else
|
39
|
-
raise "#{site} is not correct. Use: #{AVAILABLE_SITES.join(', ')}."
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
desc 'generate_summary', "Merges data from #{AVAILABLE_SITES.join(', ')} and exports to XLS file"
|
44
|
-
def generate_summary
|
45
|
-
Support::SpreadsheetCreator.generate
|
46
|
-
end
|
19
|
+
class << self
|
20
|
+
attr_accessor :configuration
|
21
|
+
end
|
47
22
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
puts "This command will remove #{Dir.pwd}/#{dirname} permanently"
|
52
|
-
puts "Press Ctrl-C to abort."
|
23
|
+
def self.configuration
|
24
|
+
@configuration ||= Configuration.new
|
25
|
+
end
|
53
26
|
|
54
|
-
|
27
|
+
def self.reset
|
28
|
+
@configuration = Configuration.new
|
29
|
+
end
|
55
30
|
|
56
|
-
|
57
|
-
|
58
|
-
end
|
31
|
+
def self.configure
|
32
|
+
yield(configuration)
|
59
33
|
end
|
60
34
|
|
61
35
|
def self.root
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require "thor"
|
2
|
+
|
3
|
+
module RemoteJobScraper
|
4
|
+
class CLI < Thor
|
5
|
+
|
6
|
+
AVAILABLE_SITES = %w(we_work_remotely remote_ok 42jobs_rails)
|
7
|
+
|
8
|
+
desc 'collect_jobs LIMIT DELAY',
|
9
|
+
"Retrieves data from #{AVAILABLE_SITES.join(', ')}.
|
10
|
+
[Example]: remote_job_scraper collect_jobs 10 9.0..10.0
|
11
|
+
"
|
12
|
+
def collect_jobs(limit = nil, delay = nil)
|
13
|
+
limit = limit.to_i
|
14
|
+
limit = limit.zero? ? nil : limit
|
15
|
+
|
16
|
+
begin
|
17
|
+
unless delay.nil?
|
18
|
+
arr = delay.split('..').map{ |d| Float(d) }
|
19
|
+
range = arr[0]..arr[1]
|
20
|
+
RemoteJobScraper.configuration.delay_range = range
|
21
|
+
end
|
22
|
+
rescue
|
23
|
+
raise "Passed: DELAY=#{range} DELAY need to be in format: 2.0..5.0 "
|
24
|
+
end
|
25
|
+
|
26
|
+
[
|
27
|
+
Sites::WeWorkRemotely,
|
28
|
+
Sites::RemoteOk,
|
29
|
+
Sites::JobsRails42
|
30
|
+
].each do |klass|
|
31
|
+
klass.new.collect_jobs(limit: limit)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
desc 'collect_jobs_from SITE LIMIT',
|
36
|
+
"Retrieves data from SITE with LIMIT, e.g. #{AVAILABLE_SITES.sample}
|
37
|
+
[Example]: remote_job_scraper collect_jobs_from remote_ok 10
|
38
|
+
"
|
39
|
+
def collect_jobs_from(site, limit=nil)
|
40
|
+
limit = limit.to_i
|
41
|
+
limit = limit.zero? ? nil : limit
|
42
|
+
|
43
|
+
case site
|
44
|
+
when 'we_work_remotely'
|
45
|
+
then Sites::WeWorkRemotely.new.collect_jobs(limit: limit)
|
46
|
+
when 'remote_ok'
|
47
|
+
then Sites::RemoteOk.new.collect_jobs(limit: limit)
|
48
|
+
when '42jobs_rails'
|
49
|
+
then Sites::JobsRails42.new.collect_jobs(limit: limit)
|
50
|
+
else
|
51
|
+
raise "#{site} is not correct. Use: #{AVAILABLE_SITES.join(', ')}."
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
desc 'generate_summary',
|
56
|
+
"Merges data from #{AVAILABLE_SITES.join(', ')} and exports to
|
57
|
+
separate sheets in XLS file.
|
58
|
+
"
|
59
|
+
def generate_summary
|
60
|
+
Support::SpreadsheetCreator.generate
|
61
|
+
end
|
62
|
+
|
63
|
+
desc 'remove DIRNAME', "Removes DIRNAME (default: 'data'). Use carefully."
|
64
|
+
def remove(dirname = 'data')
|
65
|
+
puts "[Warning!]"
|
66
|
+
puts "This command will remove #{Dir.pwd}/#{dirname} permanently"
|
67
|
+
puts "Press Ctrl-C to abort."
|
68
|
+
|
69
|
+
sleep 3
|
70
|
+
|
71
|
+
FileUtils.rm_rf(dirname)
|
72
|
+
puts "Removed data in #{Dir.pwd}/#{dirname}."
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/lib/sites/base.rb
CHANGED
@@ -1,44 +1,33 @@
|
|
1
1
|
module Sites
|
2
2
|
class Base
|
3
3
|
|
4
|
-
attr_reader :
|
4
|
+
attr_reader :doc, :url, :rows_count, :jobs_count
|
5
5
|
|
6
|
-
def initialize
|
7
|
-
@
|
8
|
-
@url = build_url
|
6
|
+
def initialize
|
7
|
+
@url = "#{self.class::HOST}#{self.class::PATH}"
|
9
8
|
@doc = Nokogiri::HTML(open_page(@url))
|
10
|
-
@current_time = Time.
|
9
|
+
@current_time = Time.now
|
11
10
|
@timestamp = @current_time.strftime("%Y%m%d%H%M%S")
|
12
|
-
@
|
11
|
+
@rows_count = 0
|
12
|
+
@jobs_count = get_jobs_count
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
sleep(rand(0..2.0)) unless ENV['RAILS_ENV'] == 'test' # less mechanical behaviour
|
15
|
+
private
|
17
16
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
end
|
17
|
+
def open_page(url)
|
18
|
+
sleep(rand(delay_range)) unless ENV['RAILS_ENV'] == 'test' # less mechanical behaviour
|
19
|
+
options = ENV['RAILS_ENV'] == 'test' ? {} : { 'User-Agent' => user_agent }
|
20
|
+
open(url, options)
|
23
21
|
end
|
24
22
|
|
25
|
-
|
23
|
+
def delay_range
|
24
|
+
RemoteJobScraper.configuration.delay_range
|
25
|
+
end
|
26
26
|
|
27
27
|
def user_agent
|
28
28
|
Support::UserAgent::LIST.sample
|
29
29
|
end
|
30
30
|
|
31
|
-
def build_url
|
32
|
-
case job_type
|
33
|
-
when :programming
|
34
|
-
then "#{self.class::HOST}#{self.class::PROGRAMMING}"
|
35
|
-
when :devops
|
36
|
-
then "#{self.class::HOST}#{self.class::DEVOPS}"
|
37
|
-
else
|
38
|
-
raise "Error"
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
31
|
def filepath
|
43
32
|
return test_filepath if ENV["RAILS_ENV"] == 'test'
|
44
33
|
"#{self.class::STORE_DIR}/#{@timestamp}.csv"
|
data/lib/sites/elixir_radar.rb
CHANGED
data/lib/sites/jobs_rails42.rb
CHANGED
@@ -1,62 +1,71 @@
|
|
1
1
|
module Sites
|
2
2
|
class JobsRails42 < Base
|
3
3
|
|
4
|
-
# @
|
5
|
-
# to grab more offers than just first page (25 items)
|
6
|
-
|
7
|
-
# I had to rename this class because we are not allowed to have numbers
|
4
|
+
# @NOTE: I had to rename this class because we are not allowed to have numbers
|
8
5
|
# on the beginning of the class name (42JobsRails won't work).
|
6
|
+
# file paths follow this convention
|
9
7
|
|
10
8
|
HOST = 'https://www.42jobs.io'.freeze
|
11
|
-
|
9
|
+
PATH = '/rails/jobs-remote'.freeze
|
12
10
|
JOB_ITEM_SELECTOR = 'li.job-offers__item a'.freeze
|
13
11
|
STORE_DIR = 'data/jobs_rails42'.freeze
|
14
12
|
|
15
|
-
def initialize
|
16
|
-
@
|
17
|
-
@
|
18
|
-
@doc = nil
|
19
|
-
@current_time = Time.new
|
13
|
+
def initialize
|
14
|
+
@url = "#{self.class::HOST}#{self.class::PATH}"
|
15
|
+
@current_time = Time.now
|
20
16
|
@timestamp = @current_time.strftime("%Y%m%d%H%M%S")
|
21
|
-
@
|
22
|
-
@
|
17
|
+
@doc = nil
|
18
|
+
@total_pages = 4
|
19
|
+
@rows_count = 0
|
20
|
+
@jobs_count = get_jobs_count
|
23
21
|
end
|
24
22
|
|
25
|
-
def collect_jobs
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
process_page(
|
23
|
+
def collect_jobs(limit: nil)
|
24
|
+
FileUtils.mkdir_p STORE_DIR
|
25
|
+
|
26
|
+
(1..@total_pages).each do |page|
|
27
|
+
process_page(page: page, limit: limit)
|
30
28
|
end
|
31
29
|
end
|
32
30
|
|
33
31
|
private
|
34
32
|
|
35
|
-
def process_page(
|
36
|
-
|
37
|
-
|
33
|
+
def process_page(page:, limit:)
|
34
|
+
current_page = "#{@url}?page=#{page}"
|
35
|
+
doc = Nokogiri::HTML(open_page(current_page))
|
36
|
+
puts "[Info] Getting the data from #{current_page}"
|
38
37
|
|
39
38
|
CSV.open(filepath, 'ab') do |csv|
|
40
39
|
doc.css(JOB_ITEM_SELECTOR).each do |link|
|
40
|
+
return if limit == @rows_count
|
41
|
+
|
41
42
|
job_url = "#{HOST}#{link["href"]}"
|
42
|
-
puts "[Info]
|
43
|
-
job_page = Nokogiri::HTML(open_page(job_url))
|
44
|
-
offer_text = job_page.css('.job-offer__description').to_s
|
43
|
+
puts "[Info] Parsing #{job_url}..."
|
45
44
|
|
46
|
-
|
47
|
-
keywords = Support::OfferParser.get_keywords(offer_text)
|
45
|
+
csv << get_row(job_url)
|
48
46
|
|
49
|
-
|
47
|
+
@rows_count += 1
|
50
48
|
end
|
51
49
|
end
|
52
50
|
|
53
|
-
puts "[Done] Collected #{@
|
51
|
+
puts "[Done] Collected #{@jobs_count} job offers from #{url}. Data stored in: #{filepath}." if page == @total_pages
|
54
52
|
end
|
55
53
|
|
56
|
-
|
54
|
+
def get_row(job_url)
|
55
|
+
job_page = Nokogiri::HTML(open_page(job_url))
|
56
|
+
offer_text = job_page.css('.job-offer__description').to_s
|
57
|
+
|
58
|
+
location = Support::OfferParser.get_location(offer_text)
|
59
|
+
keywords = Support::OfferParser.get_keywords(offer_text)
|
60
|
+
company = job_page.css('.job-offer__summary a').text
|
61
|
+
|
62
|
+
[job_url, location, keywords, company]
|
63
|
+
end
|
57
64
|
|
58
|
-
def
|
59
|
-
25 * @total_pages
|
65
|
+
def get_jobs_count
|
66
|
+
jobs_count = 25 * @total_pages
|
67
|
+
puts "[Info] There are #{jobs_count} remote jobs on [42JobsRails]."
|
68
|
+
jobs_count
|
60
69
|
end
|
61
70
|
end
|
62
71
|
end
|
data/lib/sites/remote_ok.rb
CHANGED
@@ -4,41 +4,51 @@ module Sites
|
|
4
4
|
class RemoteOk < Base
|
5
5
|
|
6
6
|
HOST = 'https://remoteok.io'.freeze
|
7
|
-
|
7
|
+
PATH = '/remote-dev-jobs'.freeze
|
8
8
|
JOB_ITEM_SELECTOR = 'tr.job'.freeze
|
9
9
|
STORE_DIR = 'data/remote_ok'.freeze
|
10
10
|
|
11
|
-
def initialize
|
12
|
-
super
|
11
|
+
def initialize
|
12
|
+
super
|
13
13
|
end
|
14
14
|
|
15
|
-
def collect_jobs
|
16
|
-
puts "[Info] Getting the data from #{url}
|
15
|
+
def collect_jobs(limit: nil)
|
16
|
+
puts "[Info] Getting the data from #{url}"
|
17
17
|
FileUtils.mkdir_p STORE_DIR
|
18
18
|
|
19
19
|
CSV.open(filepath, 'w') do |csv|
|
20
20
|
doc.css(JOB_ITEM_SELECTOR).each do |link|
|
21
|
+
return if limit == @rows_count
|
22
|
+
|
21
23
|
job_url = "#{HOST}#{link["data-url"]}"
|
22
|
-
puts "[Info]
|
23
|
-
job_page = Nokogiri::HTML(open_page(job_url))
|
24
|
-
offer_text = job_page.css('td.heading').to_s
|
24
|
+
puts "[Info] Parsing #{job_url}..."
|
25
25
|
|
26
|
-
|
27
|
-
keywords = Support::OfferParser.get_keywords(offer_text)
|
26
|
+
csv << get_row(job_url)
|
28
27
|
|
29
|
-
|
28
|
+
@rows_count += 1
|
30
29
|
end
|
31
30
|
end
|
32
31
|
|
33
|
-
puts "[Done] Collected #{@
|
32
|
+
puts "[Done] Collected #{@rows_count} job offers from #{url}. Data stored in: #{filepath}."
|
34
33
|
end
|
35
34
|
|
36
35
|
private
|
37
36
|
|
38
|
-
def
|
39
|
-
|
40
|
-
|
41
|
-
|
37
|
+
def get_row(job_url)
|
38
|
+
job_page = Nokogiri::HTML(open_page(job_url))
|
39
|
+
offer_text = job_page.css('td.heading').to_s
|
40
|
+
|
41
|
+
location = Support::OfferParser.get_location(offer_text)
|
42
|
+
keywords = Support::OfferParser.get_keywords(offer_text)
|
43
|
+
company = job_page.css('a.companyLink h3').text
|
44
|
+
|
45
|
+
[job_url, location, keywords, company]
|
46
|
+
end
|
47
|
+
|
48
|
+
def get_jobs_count
|
49
|
+
jobs_count = doc.css(JOB_ITEM_SELECTOR).map { |link| link['data-url'] }.size
|
50
|
+
puts "[Info] There are #{jobs_count} remote jobs on [RemoteOK]."
|
51
|
+
jobs_count
|
42
52
|
end
|
43
53
|
end
|
44
54
|
end
|
@@ -4,49 +4,58 @@ module Sites
|
|
4
4
|
class WeWorkRemotely < Base
|
5
5
|
|
6
6
|
HOST = 'https://weworkremotely.com'.freeze
|
7
|
-
|
7
|
+
PATH = '/categories/remote-programming-jobs'.freeze
|
8
8
|
DEVOPS = '/categories/remote-devops-sysadmin-jobs'.freeze
|
9
9
|
JOB_ITEM_SELECTOR = '.jobs-container li a'.freeze
|
10
10
|
STORE_DIR = 'data/we_work_remotely'
|
11
11
|
|
12
|
-
def initialize
|
13
|
-
super
|
12
|
+
def initialize
|
13
|
+
super
|
14
14
|
end
|
15
15
|
|
16
|
-
def collect_jobs
|
17
|
-
puts "[Info] Getting the data from #{url}
|
16
|
+
def collect_jobs(limit: nil)
|
17
|
+
puts "[Info] Getting the data from #{url}"
|
18
18
|
FileUtils.mkdir_p STORE_DIR
|
19
19
|
|
20
20
|
CSV.open(filepath, 'w') do |csv|
|
21
21
|
doc.css(JOB_ITEM_SELECTOR).each do |link|
|
22
22
|
if link["href"].start_with?("/remote-jobs")
|
23
|
-
|
24
|
-
puts "[Info] Processing #{job_url}..."
|
25
|
-
job_page = Nokogiri::HTML(open_page(job_url))
|
26
|
-
offer_text = job_page.css('.listing-container').to_s
|
23
|
+
return if limit == @rows_count
|
27
24
|
|
28
|
-
|
29
|
-
|
25
|
+
job_url = "#{HOST}#{link["href"]}"
|
26
|
+
puts "[Info] Parsing #{job_url}..."
|
30
27
|
|
31
|
-
|
28
|
+
csv << get_row(job_url)
|
32
29
|
|
33
|
-
|
30
|
+
@rows_count += 1
|
34
31
|
end
|
35
32
|
end
|
36
33
|
end
|
37
34
|
|
38
|
-
puts "[Done] Collected #{@
|
35
|
+
puts "[Done] Collected #{@rows_count} job offers from #{url}. Data stored in: #{filepath}."
|
39
36
|
end
|
40
37
|
|
41
38
|
private
|
42
39
|
|
43
|
-
def
|
44
|
-
|
40
|
+
def get_row(job_url)
|
41
|
+
job_page = Nokogiri::HTML(open_page(job_url))
|
42
|
+
offer_text = job_page.css('.listing-container').to_s
|
43
|
+
|
44
|
+
region = job_page.css('.listing-header-container span.region').first
|
45
|
+
location = job_page.css('.listing-header-container span.location').first
|
46
|
+
keywords = Support::OfferParser.get_keywords(offer_text)
|
47
|
+
company = job_page.css('.listing-header-container span.company').first
|
48
|
+
|
49
|
+
[job_url, location, region, keywords, company]
|
50
|
+
end
|
51
|
+
|
52
|
+
def get_jobs_count
|
53
|
+
jobs_count = doc.css(JOB_ITEM_SELECTOR)
|
45
54
|
.map { |link| link['href'] }
|
46
55
|
.select { |href| href.start_with?('/remote-jobs') }
|
47
56
|
.size
|
48
|
-
puts "[Info] There
|
49
|
-
|
57
|
+
puts "[Info] There are #{jobs_count} remote jobs on [WeWorkRemotely]."
|
58
|
+
jobs_count
|
50
59
|
end
|
51
60
|
end
|
52
61
|
end
|
data/lib/support/offer_parser.rb
CHANGED
@@ -19,11 +19,11 @@ module Support
|
|
19
19
|
indexes.each do |index|
|
20
20
|
next if index[0].nil?
|
21
21
|
|
22
|
-
locations << tokens[index[0] + 1]
|
23
|
-
locations << tokens[index[0] - 1]
|
22
|
+
locations << tokens[index[0] + 1] if index[1] == 'location'
|
23
|
+
locations << tokens[index[0] - 1..index[0] + 2] if index[1] == 'based'
|
24
24
|
end
|
25
25
|
|
26
|
-
locations.join('
|
26
|
+
locations.join(' ').capitalize
|
27
27
|
end
|
28
28
|
|
29
29
|
def self.get_keywords(content, keywords = KEYWORDS)
|
@@ -42,9 +42,7 @@ module Support
|
|
42
42
|
|
43
43
|
def self.get_tokens(content)
|
44
44
|
content
|
45
|
-
.gsub('
|
46
|
-
.gsub(',', '')
|
47
|
-
.gsub(':', '')
|
45
|
+
.gsub(/\W+/, ' ') # remove non letters
|
48
46
|
.downcase
|
49
47
|
.split(/[\s-]/)
|
50
48
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_job_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rafał Trojanowski
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-01-
|
11
|
+
date: 2019-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -162,6 +162,8 @@ files:
|
|
162
162
|
- bin/setup
|
163
163
|
- exe/remote_job_scraper
|
164
164
|
- lib/remote_job_scraper.rb
|
165
|
+
- lib/remote_job_scraper/cli.rb
|
166
|
+
- lib/remote_job_scraper/configuration.rb
|
165
167
|
- lib/remote_job_scraper/version.rb
|
166
168
|
- lib/sites/base.rb
|
167
169
|
- lib/sites/elixir_radar.rb
|