remote_job_scraper 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: debb2440ac5898b2fc0642017b9bf01aa6adb89f
4
+ data.tar.gz: e370546575983d780c9ed6aa28d7611270b08b86
5
+ SHA512:
6
+ metadata.gz: '0844c2a152641653717d587829e8a73aca58292afb175e014a4edc08386ff009de153703fc37d8d271cfd5919441a73a25b5f6ee79c2dda2cd375d1bccbe2e70'
7
+ data.tar.gz: cfd1916966d735458d11f4953f0ec26932833e7c03fb028890bfb3f6226ff30c0ce1c58f5a733597f070d3b185dca3597abcacf8f3aaa6befc690cb699366cd5
data/.gitignore ADDED
@@ -0,0 +1,13 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ data/
10
+
11
+ # rspec failure tracking
12
+ .rspec_status
13
+ .DS_Store
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.4.1
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.0.0
5
+ before_install: gem install bundler -v 1.16.0
@@ -0,0 +1,74 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ In the interest of fostering an open and welcoming environment, we as
6
+ contributors and maintainers pledge to making participation in our project and
7
+ our community a harassment-free experience for everyone, regardless of age, body
8
+ size, disability, ethnicity, gender identity and expression, level of experience,
9
+ nationality, personal appearance, race, religion, or sexual identity and
10
+ orientation.
11
+
12
+ ## Our Standards
13
+
14
+ Examples of behavior that contributes to creating a positive environment
15
+ include:
16
+
17
+ * Using welcoming and inclusive language
18
+ * Being respectful of differing viewpoints and experiences
19
+ * Gracefully accepting constructive criticism
20
+ * Focusing on what is best for the community
21
+ * Showing empathy towards other community members
22
+
23
+ Examples of unacceptable behavior by participants include:
24
+
25
+ * The use of sexualized language or imagery and unwelcome sexual attention or
26
+ advances
27
+ * Trolling, insulting/derogatory comments, and personal or political attacks
28
+ * Public or private harassment
29
+ * Publishing others' private information, such as a physical or electronic
30
+ address, without explicit permission
31
+ * Other conduct which could reasonably be considered inappropriate in a
32
+ professional setting
33
+
34
+ ## Our Responsibilities
35
+
36
+ Project maintainers are responsible for clarifying the standards of acceptable
37
+ behavior and are expected to take appropriate and fair corrective action in
38
+ response to any instances of unacceptable behavior.
39
+
40
+ Project maintainers have the right and responsibility to remove, edit, or
41
+ reject comments, commits, code, wiki edits, issues, and other contributions
42
+ that are not aligned to this Code of Conduct, or to ban temporarily or
43
+ permanently any contributor for other behaviors that they deem inappropriate,
44
+ threatening, offensive, or harmful.
45
+
46
+ ## Scope
47
+
48
+ This Code of Conduct applies both within project spaces and in public spaces
49
+ when an individual is representing the project or its community. Examples of
50
+ representing a project or community include using an official project e-mail
51
+ address, posting via an official social media account, or acting as an appointed
52
+ representative at an online or offline event. Representation of a project may be
53
+ further defined and clarified by project maintainers.
54
+
55
+ ## Enforcement
56
+
57
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
58
+ reported by contacting the project team at rt.trojanowski@gmail.com. All
59
+ complaints will be reviewed and investigated and will result in a response that
60
+ is deemed necessary and appropriate to the circumstances. The project team is
61
+ obligated to maintain confidentiality with regard to the reporter of an incident.
62
+ Further details of specific enforcement policies may be posted separately.
63
+
64
+ Project maintainers who do not follow or enforce the Code of Conduct in good
65
+ faith may face temporary or permanent repercussions as determined by other
66
+ members of the project's leadership.
67
+
68
+ ## Attribution
69
+
70
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71
+ available at [http://contributor-covenant.org/version/1/4][version]
72
+
73
+ [homepage]: http://contributor-covenant.org
74
+ [version]: http://contributor-covenant.org/version/1/4/
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in remote_job_scraper.gemspec
6
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,61 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ remote_job_scraper (0.1.0)
5
+ nokogiri
6
+ spreadsheet
7
+ thor
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ addressable (2.6.0)
13
+ public_suffix (>= 2.0.2, < 4.0)
14
+ byebug (10.0.2)
15
+ crack (0.4.3)
16
+ safe_yaml (~> 1.0.0)
17
+ diff-lcs (1.3)
18
+ hashdiff (0.3.8)
19
+ mini_portile2 (2.4.0)
20
+ nokogiri (1.10.1)
21
+ mini_portile2 (~> 2.4.0)
22
+ public_suffix (3.0.3)
23
+ rake (10.5.0)
24
+ rspec (3.8.0)
25
+ rspec-core (~> 3.8.0)
26
+ rspec-expectations (~> 3.8.0)
27
+ rspec-mocks (~> 3.8.0)
28
+ rspec-core (3.8.0)
29
+ rspec-support (~> 3.8.0)
30
+ rspec-expectations (3.8.1)
31
+ diff-lcs (>= 1.2.0, < 2.0)
32
+ rspec-support (~> 3.8.0)
33
+ rspec-mocks (3.8.0)
34
+ diff-lcs (>= 1.2.0, < 2.0)
35
+ rspec-support (~> 3.8.0)
36
+ rspec-support (3.8.0)
37
+ ruby-ole (1.2.12.1)
38
+ safe_yaml (1.0.4)
39
+ spreadsheet (1.1.8)
40
+ ruby-ole (>= 1.0)
41
+ thor (0.20.3)
42
+ vcr (4.0.0)
43
+ webmock (3.5.1)
44
+ addressable (>= 2.3.6)
45
+ crack (>= 0.3.2)
46
+ hashdiff
47
+
48
+ PLATFORMS
49
+ ruby
50
+
51
+ DEPENDENCIES
52
+ bundler (~> 1.16)
53
+ byebug
54
+ rake (~> 10.0)
55
+ remote_job_scraper!
56
+ rspec (~> 3.0)
57
+ vcr
58
+ webmock
59
+
60
+ BUNDLED WITH
61
+ 1.16.5
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2019 Rafał Trojanowski
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,49 @@
1
+ # RemoteJobScraper 💻🌏
2
+
3
+ Ruby gem that collects job offers for remote positions with ease.
4
+
5
+ Going through many job listings and finding the right one may be a time-consuming process. That's why this tool has been built. It allows to automate the process, retrieve necessary data and store it in CSV/Excel file in just a few minutes. The main focus is to inform a user about the location (time-zone) required for a position.
6
+
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ ```ruby
13
+ gem 'remote_job_scraper'
14
+ ```
15
+
16
+ And then execute:
17
+
18
+ $ bundle
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install remote_job_scraper
23
+
24
+ ## Usage
25
+
26
+ $ bundle exec exe/remote_job_scraper
27
+
28
+
29
+ * Tested with Ruby versions:
30
+
31
+ * [x] 2.4.1
32
+
33
+ ## Development
34
+
35
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
36
+
37
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
38
+
39
+ ## Contributing
40
+
41
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/remote_job_scraper. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
42
+
43
+ ## License
44
+
45
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
46
+
47
+ ## Code of Conduct
48
+
49
+ Everyone interacting in the RemoteJobScraper project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/remote_job_scraper/blob/master/CODE_OF_CONDUCT.md).
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "remote_job_scraper"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "remote_job_scraper"
4
+
5
+ RemoteJobScraper::CLI.start(ARGV)
@@ -0,0 +1,3 @@
1
+ module RemoteJobScraper
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,64 @@
1
+ require 'remote_job_scraper/version'
2
+
3
+ require 'sites/we_work_remotely'
4
+ require 'sites/remote_ok'
5
+ require 'sites/jobs_rails42'
6
+
7
+ require 'support/offer_parser'
8
+ require 'support/user_agent'
9
+ require 'support/spreadsheet_creator'
10
+
11
+ require 'nokogiri'
12
+ require 'open-uri'
13
+ require 'csv'
14
+ require "thor"
15
+
16
+ module RemoteJobScraper
17
+
18
+ AVAILABLE_SERVICES = %w(we_work_remotely remote_ok 42jobs_rails)
19
+
20
+ class CLI < Thor
21
+
22
+ desc 'collect_jobs', 'Retrieves data from all sites'
23
+ def collect_jobs
24
+ [Sites::WeWorkRemotely, Sites::RemoteOk].each do |klass|
25
+ klass.new.collect_jobs
26
+ end
27
+ end
28
+
29
+ desc 'collect_jobs_from', 'Retrieves data from specified service'
30
+ def collect_jobs_from(service_name)
31
+ case service_name
32
+ when 'we_work_remotely'
33
+ then Sites::WeWorkRemotely.new.collect_jobs
34
+ when 'remote_ok'
35
+ then Sites::RemoteOk.new.collect_jobs
36
+ when '42jobs_rails'
37
+ then Sites::JobsRails42.new.collect_jobs
38
+ else
39
+ raise "#{service_name} is not correct. Use: #{AVAILABLE_SERVICES.join(', ')}."
40
+ end
41
+ end
42
+
43
+ desc 'generate_summary', 'Collect all data and export to XLS file'
44
+ def generate_summary
45
+ Support::SpreadsheetCreator.generate
46
+ end
47
+
48
+ desc 'clean_up', 'Removes all stored data'
49
+ def clean_up
50
+ puts "This command will remote all stored data."
51
+ puts "Press Ctrl-C to abort."
52
+
53
+ sleep 2
54
+
55
+ FileUtils.rm_rf('data')
56
+ puts "Removed data."
57
+ end
58
+
59
+ end
60
+
61
+ def self.root
62
+ File.dirname __dir__
63
+ end
64
+ end
data/lib/sites/base.rb ADDED
@@ -0,0 +1,62 @@
1
+ module Sites
2
+ class Base
3
+
4
+ attr_reader :job_type, :doc, :url
5
+
6
+ def initialize(job_type: :programming)
7
+ @job_type = job_type
8
+ @url = build_url
9
+ @doc = Nokogiri::HTML(open_page(@url))
10
+ @current_time = Time.new
11
+ @timestamp = @current_time.strftime("%Y%m%d%H%M%S")
12
+ @count = get_count
13
+ end
14
+
15
+ def open_page(url)
16
+ sleep(rand(0..2.0)) unless ENV['RAILS_ENV'] == 'test' # less mechanical behaviour
17
+
18
+ if ENV['RAILS_ENV'] == 'test'
19
+ open(url)
20
+ else
21
+ open(url, 'User-Agent' => user_agent)
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def user_agent
28
+ Support::UserAgent::LIST.sample
29
+ end
30
+
31
+ def build_url
32
+ case job_type
33
+ when :programming
34
+ then "#{self.class::HOST}#{self.class::PROGRAMMING}"
35
+ when :devops
36
+ then "#{self.class::HOST}#{self.class::DEVOPS}"
37
+ else
38
+ raise "Error"
39
+ end
40
+ end
41
+
42
+ def filepath
43
+ return test_filepath if ENV["RAILS_ENV"] == 'test'
44
+ "#{self.class::STORE_DIR}/#{@timestamp}.csv"
45
+ end
46
+
47
+ def test_filepath
48
+ "spec/fixtures/data/#{underscore(self.class.name.split('::').last)}/#{@timestamp}.csv"
49
+ end
50
+
51
+ # https://stackoverflow.com/a/5622585
52
+ def underscore(camel_cased_word)
53
+ word = camel_cased_word.dup
54
+ word.gsub!(/::/, '/')
55
+ word.gsub!(/([A-Z]+)([A-Z][a-z])/,'\1_\2')
56
+ word.gsub!(/([a-z\d])([A-Z])/,'\1_\2')
57
+ word.tr!("-", "_")
58
+ word.downcase!
59
+ word
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,10 @@
1
+ require_relative 'base'
2
+
3
+ module Sites
4
+ class ElixirRadar < Base
5
+
6
+ HOST = 'http://plataformatec.com.br/'.freeze
7
+ PROGRAMMING = '/elixir-radar/jobs'.freeze
8
+
9
+ end
10
+ end
@@ -0,0 +1,64 @@
1
+ module Sites
2
+ class JobsRails42 < Base
3
+
4
+ # @TODO/NOTE: There is pagination on this site, it would be cool to find a way
5
+ # to grab more offers than just first page (25 items)
6
+
7
+ # I had to rename this class because we are not allowed to have numbers
8
+ # on the beginning of the class name (42JobsRails won't work).
9
+
10
+ HOST = 'https://www.42jobs.io'.freeze
11
+ PROGRAMMING = '/rails/jobs-remote'.freeze
12
+ JOB_ITEM_SELECTOR = 'li.job-offers__item a'.freeze
13
+ STORE_DIR = 'data/jobs_rails42'.freeze
14
+
15
+ NUMBER_OF_PAGES = 10
16
+
17
+ def initialize(job_type: :programming)
18
+ @job_type = job_type
19
+ @url = build_url
20
+ @doc = nil
21
+ @current_time = Time.new
22
+ @timestamp = @current_time.strftime("%Y%m%d%H%M%S")
23
+ @count = get_count
24
+ end
25
+
26
+ def collect_jobs
27
+ (1..NUMBER_OF_PAGES).to_a.each do |page|
28
+ current_page = "#{@url}?page=#{page}"
29
+ doc = Nokogiri::HTML(open_page(current_page))
30
+ process_page(doc, current_page, page)
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def process_page(doc, page_url, page)
37
+ puts "[Info] Getting the data from #{page_url} at #{@current_time}..."
38
+ FileUtils.mkdir_p STORE_DIR
39
+
40
+ CSV.open(filepath, 'ab') do |csv|
41
+ doc.css(JOB_ITEM_SELECTOR).each do |link|
42
+ job_url = "#{HOST}#{link["href"]}"
43
+ puts "[Info] Processing #{job_url}..."
44
+ job_page = Nokogiri::HTML(open_page(job_url))
45
+ offer_text = job_page.css('.job-offer__description').to_s
46
+
47
+ location = Support::OfferParser.get_location(offer_text)
48
+ region = nil
49
+ keywords = Support::OfferParser.get_keywords(offer_text)
50
+
51
+ csv << [job_url, location, region, keywords]
52
+ end
53
+ end
54
+
55
+ puts "[Done] Collected #{@count} job offers from #{url}. Data stores in: #{filepath}." if page == NUMBER_OF_PAGES
56
+ end
57
+
58
+ private
59
+
60
+ def get_count
61
+ 25 * NUMBER_OF_PAGES
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,9 @@
1
+ require_relative 'base'
2
+
3
+ module Sites
4
+ class RailsJobs < Base
5
+
6
+ HOST = 'https://www.railsjobs.com'.freeze
7
+
8
+ end
9
+ end
@@ -0,0 +1,45 @@
1
+ require_relative 'base'
2
+
3
+ module Sites
4
+ class RemoteOk < Base
5
+
6
+ HOST = 'https://remoteok.io'.freeze
7
+ PROGRAMMING = '/remote-dev-jobs'.freeze
8
+ JOB_ITEM_SELECTOR = 'tr.job'.freeze
9
+ STORE_DIR = 'data/remote_ok'.freeze
10
+
11
+ def initialize(args = {})
12
+ super(args = {})
13
+ end
14
+
15
+ def collect_jobs
16
+ puts "[Info] Getting the data from #{url} at #{@current_time}..."
17
+ FileUtils.mkdir_p STORE_DIR
18
+
19
+ CSV.open(filepath, 'w') do |csv|
20
+ doc.css(JOB_ITEM_SELECTOR).each do |link|
21
+ job_url = "#{HOST}#{link["data-url"]}"
22
+ puts "[Info] Processing #{job_url}..."
23
+ job_page = Nokogiri::HTML(open_page(job_url))
24
+ offer_text = job_page.css('td.heading').to_s
25
+
26
+ location = Support::OfferParser.get_location(offer_text)
27
+ region = nil
28
+ keywords = Support::OfferParser.get_keywords(offer_text)
29
+
30
+ csv << [job_url, location, region, keywords]
31
+ end
32
+ end
33
+
34
+ puts "[Done] Collected #{@count} job offers from #{url}. Data stores in: #{filepath}."
35
+ end
36
+
37
+ private
38
+
39
+ def get_count
40
+ count = doc.css(JOB_ITEM_SELECTOR).map { |link| link['data-url'] }.size
41
+ puts "[Info] There is #{count} remote jobs available."
42
+ count
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,52 @@
1
+ require_relative 'base'
2
+
3
+ module Sites
4
+ class WeWorkRemotely < Base
5
+
6
+ HOST = 'https://weworkremotely.com'.freeze
7
+ PROGRAMMING = '/categories/remote-programming-jobs'.freeze
8
+ DEVOPS = '/categories/remote-devops-sysadmin-jobs'.freeze
9
+ JOB_ITEM_SELECTOR = '.jobs-container li a'.freeze
10
+ STORE_DIR = 'data/we_work_remotely'
11
+
12
+ def initialize(args = {})
13
+ super(args = {})
14
+ end
15
+
16
+ def collect_jobs
17
+ puts "[Info] Getting the data from #{url} at #{@current_time}..."
18
+ FileUtils.mkdir_p STORE_DIR
19
+
20
+ CSV.open(filepath, 'w') do |csv|
21
+ doc.css(JOB_ITEM_SELECTOR).each do |link|
22
+ if link["href"].start_with?("/remote-jobs")
23
+ job_url = "#{HOST}#{link["href"]}"
24
+ puts "[Info] Processing #{job_url}..."
25
+ job_page = Nokogiri::HTML(open_page(job_url))
26
+ offer_text = job_page.css('.listing-container').to_s
27
+
28
+ region = job_page.css('span.region').first
29
+ location = job_page.css('span.location').first
30
+
31
+ keywords = Support::OfferParser.get_keywords(offer_text)
32
+
33
+ csv << [job_url, location, region, keywords]
34
+ end
35
+ end
36
+ end
37
+
38
+ puts "[Done] Collected #{@count} job offers from #{url}. Data stores in: #{filepath}."
39
+ end
40
+
41
+ private
42
+
43
+ def get_count
44
+ count = doc.css(JOB_ITEM_SELECTOR)
45
+ .map { |link| link['href'] }
46
+ .select { |href| href.start_with?('/remote-jobs') }
47
+ .size
48
+ puts "[Info] There is #{count} remote jobs available."
49
+ count
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,52 @@
1
+ module Support
2
+ module OfferParser
3
+
4
+ LOCATION_DICT = ['location', 'based']
5
+ KEYWORDS = [
6
+ 'ruby',
7
+ 'elixir',
8
+ 'react',
9
+ 'remote',
10
+ 'graphql'
11
+ ]
12
+
13
+ def self.get_location(content, dict = LOCATION_DICT)
14
+ indexes = Array.new
15
+ tokens = get_tokens(content)
16
+ indexes = dict.map { |q| [tokens.find_index(q), q] }
17
+ locations = Array.new
18
+
19
+ indexes.each do |index|
20
+ next if index[0].nil?
21
+
22
+ locations << tokens[index[0] + 1].gsub(',', '') if index[1] == 'location'
23
+ locations << tokens[index[0] - 1].gsub(',', '') if index[1] == 'based'
24
+ end
25
+
26
+ locations.join(', ').capitalize
27
+ end
28
+
29
+ def self.get_keywords(content, keywords = KEYWORDS)
30
+ indexes = Array.new
31
+ tokens = get_tokens(content)
32
+ indexes = keywords.map { |q| [tokens.find_index(q), q] }
33
+ keywords = Array.new
34
+
35
+ indexes.each do |index|
36
+ next if index[0].nil?
37
+ keywords << tokens[index[0]].gsub(',', '')
38
+ end
39
+
40
+ keywords.map(&:capitalize).join(', ')
41
+ end
42
+
43
+ def self.get_tokens(content)
44
+ content
45
+ .gsub('.', '')
46
+ .gsub(',', '')
47
+ .gsub(':', '')
48
+ .downcase
49
+ .split(/[\s-]/)
50
+ end
51
+ end
52
+ end