RubyGems - news_scraper - Versions diffs - 0.1.2 → 1.0.0 - Mend

news_scraper 0.1.2 → 1.0.0

Files changed (20) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/.rubocop.yml +1 -1
data/README.md +2 -2
data/config/article_scrape_patterns.yml +15 -0
data/lib/news_scraper/cli.rb +9 -8
data/lib/news_scraper/configuration.rb +33 -0
data/lib/news_scraper/errors.rb +12 -3
data/lib/news_scraper/extractors_helpers.rb +6 -2
data/lib/news_scraper/scraper.rb +11 -4
data/lib/news_scraper/trainer/preset_selector.rb +30 -40
data/lib/news_scraper/trainer/url_trainer.rb +15 -20
data/lib/news_scraper/transformers/article.rb +19 -20
data/lib/news_scraper/transformers/nokogiri/functions.rb +15 -0
data/lib/news_scraper/transformers/trainer_article.rb +17 -3
data/lib/news_scraper/version.rb +1 -1
data/lib/news_scraper.rb +18 -2
data/news_scraper.gemspec +2 -0
metadata +38 -3
data/lib/news_scraper/constants.rb +0 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: e571db5d64bc7679b4b061981208026da6878101
-  data.tar.gz: 4009ca8b1571847afc1756eb5678e634feb0ba54
+  metadata.gz: db7d631f3f6cf73ff2e57b9e472804651b9fe1e0
+  data.tar.gz: 1045878eb97749d6b264a486ac34bfb89f4796dd
 SHA512:
-  metadata.gz: fcb8a793e259163dec3bcdf260bdd4accb3275f11f1ae5df05ce0b40ff3708bf9113be3c8021846ed4f0497e24f5bddffc4165408fd3ef4e57c881f5a6d8d501
-  data.tar.gz: be27fea65bc420052cdae19f7ae38135565fed7ff8730e5bdf4445058af34a30bbfed006c5cb5f901b6688020d892c70a322adaa5d6d4c5270521746e12d6fa6
+  metadata.gz: a53423be5dbda33ead7dbb46bc494e40fcf412172a291496128b40512c985fc157c646481e1b8a183be2f709486e2cc7ec27d47a17bb9715c3b19dbe09dd7e42
+  data.tar.gz: eb43f129a0ca1a9f6eb02f24bfeb891583c96b0cce548afe43cf613231e8908a66e381347aeb16f5be9ea4af3ca96f82dcaf4da005951fb5ae5936d3f3530cbc

data/.gitignore CHANGED Viewed

@@ -8,3 +8,4 @@
 /spec/reports/
 /tmp/
 *.gem
+coverage

data/.rubocop.yml CHANGED Viewed

@@ -1,5 +1,5 @@
 AllCops:
-  TargetRubyVersion: 2.3
+  TargetRubyVersion: 2.2
 ClassLength:
   Max: 500

data/README.md CHANGED Viewed

@@ -50,7 +50,7 @@ How the `Scraper` extracts and parses for the information is determined by scrap
 Calling `NewsScraper::Scraper#scrape` with either the array or block notation will yield `transformed_data` hashes. [`article_scrape_patterns.yml`](https://github.com/richardwu/news_scraper/blob/master/config/article_scrape_patterns.yml) defines the data types that will be scraped for.
-In addition, the `uri` and `root_domain`(hostname) of the article will be returned in the hash too.
+In addition, the `url` and `root_domain`(hostname) of the article will be returned in the hash too.
 Example
 ```
@@ -62,7 +62,7 @@ Example
   section: 'technology',
   datetime: '1991-10-05T12:00:00+00:00',
   title: 'Linus Linux',
-  uri: 'linusworld.com/the-linux-kernel',
+  url: 'https://linusworld.com/the-linux-kernel',
   root_domain: 'linusworld.com'
 }
 ```

data/config/article_scrape_patterns.yml CHANGED Viewed

@@ -52,10 +52,16 @@ presets:
     article_tag: &article_tag_keywords
       method: "xpath"
       pattern: "//meta[@property='article:tag']/@content"
+    news_keywords: &news_keywords_keywords
+      method: "xpath"
+      pattern: "//meta[@name='news_keywords']/@content"
   section:
     meta: &meta_section
       method: "xpath"
       pattern: "//meta[@property='article:section']/@content"
+    section: &section_section
+      method: "xpath"
+      pattern: "//meta[@name='section']/@content"
   datetime:
     article_date_original: &article_date_original_datetime
       method: xpath
@@ -87,6 +93,15 @@ presets:
     sailthru_date: &sailthru_date_datetime
       method: xpath
       pattern: //meta[@name='sailthru.date']/@content
+    time: &time_datetime
+      method: xpath
+      pattern: //time/@datetime
+    date_published_datetime: &date_published_datetime_datetime
+      method: xpath
+      pattern: //meta[@itemprop="datePublished"]/@datetime
+    date_published_content: &date_published_content_datetime
+      method: xpath
+      pattern: //meta[@itemprop="datePublished"]/@content
   title:
     html: &html_title
       method: "xpath"

data/lib/news_scraper/cli.rb CHANGED Viewed

@@ -6,14 +6,13 @@ module NewsScraper
     DEFAULT_COLOR = "\x1b[36m".freeze
-    def log(message, color: DEFAULT_COLOR, new_line: false)
-      message += "\n" if new_line
+    def log(message, color: DEFAULT_COLOR)
       $stdout.puts "#{color}┃\x1b[0m " + message
     end
-    def log_lines(message, color: DEFAULT_COLOR, new_line: false)
+    def log_lines(message, color: DEFAULT_COLOR)
       message.split("\n").each do |line|
-        log(line, color: color, new_line: new_line)
+        log(line, color: color)
       end
     end
@@ -49,8 +48,8 @@ module NewsScraper
       buf = -1
       available = (1..options.length).to_a
       until available.include?(buf.to_i)
-        begin
-          buf = Readline.readline("\x1b[34m┃ > \x1b[33m", true)
+        buf = begin
+          Readline.readline("\x1b[34m┃ > \x1b[33m", true)
         rescue Interrupt
           nil
         end
@@ -71,14 +70,16 @@ module NewsScraper
     ## Fancy Headers and Footers
-    def put_header(text = "", color = DEFAULT_COLOR)
+    def put_header(text = "", color: DEFAULT_COLOR)
       put_edge(color, "┏━━ ", text)
     end
-    def put_footer(color = DEFAULT_COLOR)
+    def put_footer(color: DEFAULT_COLOR)
       put_edge(color, "┗", "")
     end
+    private
     def put_edge(color, prefix, text)
       ptext = "#{color}#{prefix}#{text}"
       textwidth = printing_width(ptext)

data/lib/news_scraper/configuration.rb ADDED Viewed

@@ -0,0 +1,33 @@
+module NewsScraper
+  class Configuration
+    DEFAULT_SCRAPE_PATTERNS_FILEPATH = File.expand_path('../../../config/article_scrape_patterns.yml', __FILE__)
+    attr_accessor :fetch_method, :scrape_patterns_filepath
+    # <code>NewsScraper::Configuration.initialize</code> initializes the scrape_patterns_filepath
+    # and the fetch_method to the <code>DEFAULT_SCRAPE_PATTERNS_FILEPATH</code>
+    #
+    # Set the <code>scrape_patterns_filepath</code> to <code>nil</code> to disable saving during training
+    #
+    def initialize
+      self.scrape_patterns_filepath = DEFAULT_SCRAPE_PATTERNS_FILEPATH
+      self.fetch_method = proc { default_scrape_patterns }
+    end
+    # <code>NewsScraper::Configuration.scrape_patterns</code> proxies scrape_patterns
+    # requests to <code>fetch_method</code>:
+    #
+    # *Returns*
+    # - The result of calling the <code>fetch_method</code> proc, expected to be a hash
+    #
+    def scrape_patterns
+      fetch_method.call
+    end
+    private
+    def default_scrape_patterns
+      @default_scrape_patterns ||= {}
+      @default_scrape_patterns[scrape_patterns_filepath] ||= YAML.load_file(scrape_patterns_filepath)
+    end
+  end
+end

data/lib/news_scraper/errors.rb CHANGED Viewed

@@ -1,14 +1,23 @@
 module NewsScraper
-  class ResponseError < StandardError; end
+  class ResponseError < StandardError
+    attr_reader :error_code, :message, :url
+    def initialize(opts = {})
+      @error_code = opts[:error_code]
+      @message = opts[:message]
+      @url = opts[:url]
+      super
+    end
+  end
   module Transformers
     class ScrapePatternNotDefined < StandardError
-      attr_reader :root_domain, :uri
+      attr_reader :root_domain, :url
       def initialize(opts = {})
         @root_domain = opts[:root_domain]
-        @uri = opts[:uri]
+        @url = opts[:url]
         super
       end
     end

data/lib/news_scraper/extractors_helpers.rb CHANGED Viewed

@@ -10,9 +10,13 @@ module NewsScraper
       CLI.put_header(url)
       CLI.log "Beginning HTTP request for #{url}"
-      response = HTTParty.get(url)
+      response = HTTParty.get(url, headers: { "User-Agent" => "news-scraper-#{NewsScraper::VERSION}" })
-      raise ResponseError.new("#{response.code} - #{response.message}") unless response.code == 200
+      raise ResponseError.new(
+        error_code: response.code,
+        message: response.message,
+        url: url
+      ) unless response.code == 200
       CLI.log "#{response.code} - #{response.message}. Request successful for #{url}"
       CLI.put_footer

data/lib/news_scraper/scraper.rb CHANGED Viewed

@@ -16,6 +16,7 @@ module NewsScraper
     #
     # *Raises*
     # - Will raise a <code>Transformers::ScrapePatternNotDefined</code> if an article is not in the root domains
+    #   - Will <code>yield</code> the error if a block is given
     #   - Root domains are specified by the <code>article_scrape_patterns.yml</code> file
     #   - This root domain will need to be trained, it would be helpful to have a PR created to train the domain
     #   - You can train the domain by running <code>NewsScraper::Trainer::UrlTrainer.new(URL_TO_TRAIN).train</code>
@@ -27,13 +28,19 @@ module NewsScraper
       article_urls = Extractors::GoogleNewsRss.new(query: @query).extract
       transformed_articles = []
       article_urls.each do |article_url|
         payload = Extractors::Article.new(url: article_url).extract
+        article_transformer = Transformers::Article.new(url: article_url, payload: payload)
-        transformed_article = Transformers::Article.new(url: article_url, payload: payload).transform
-        transformed_articles << transformed_article
-        yield transformed_article if block_given?
+        begin
+          transformed_article = article_transformer.transform
+          transformed_articles << transformed_article
+          yield transformed_article if block_given?
+        rescue Transformers::ScrapePatternNotDefined => e
+          raise e unless block_given?
+          yield e
+        end
       end
       transformed_articles

data/lib/news_scraper/trainer/preset_selector.rb CHANGED Viewed

@@ -3,18 +3,16 @@ module NewsScraper
     class PresetSelector
       PROVIDER_PHRASE = 'I will provide a pattern using'.freeze
-      def initialize(data_type:, data_type_presets:, url:, payload:)
+      def initialize(url:, payload:)
         @url = url
         @payload = payload
-        @data_type_presets = data_type_presets
-        @data_type = data_type
       end
-      def select
-        return unless @data_type_presets
+      def select(data_type)
+        pattern_options = pattern_options(data_type)
         selected_option = CLI.prompt_with_options(
-          "Select which preset to use for #{@data_type}:",
+          "Select which preset to use for #{data_type}:",
           pattern_options.keys
         )
@@ -27,50 +25,42 @@ module NewsScraper
         end
         return if selected_option == 'skip'
-        selected_index = pattern_options[selected_option]
-        selected_preset_code = transform_results[selected_index].first
-        @data_type_presets[selected_preset_code].merge('variable' => [selected_preset_code, @data_type].join('_'))
+        selected_preset_code = pattern_options[selected_option]
+        result = transform_results[data_type][selected_preset_code].merge(
+          'variable' => [selected_preset_code, data_type].join('_')
+        )
+        result.delete('data')
+        result
       end
       private
-      def pattern_options
-        return {} unless @data_type_presets
-        @pattern_options ||= begin
-          temp_options = transform_results.each_with_object({}).with_index do |(results, options_hash), index|
-            preset_name = "#{results[0]}_#{@data_type}"
-            extracted_text = results[1]
-            options_hash["#{preset_name}: #{extracted_text}"] = index
-          end
-          %w(xpath css).each do |pattern_provider|
-            temp_options["#{PROVIDER_PHRASE} #{pattern_provider}"] = pattern_provider
+      def pattern_options(data_type)
+        # Add valid options from the transformed results
+        options = transform_results[data_type].each_with_object({}) do |(option, details), valid_options|
+          next unless details['data'] && !details['data'].empty?
+          table_key = Terminal::Table.new do |t|
+            t << ['method', details['method']]
+            t << ['pattern', details['pattern']]
+            t << ['data', details['data']]
           end
-          temp_options.merge('skip' => 'skip')
+          valid_options["\n#{table_key}"] = option
         end
-      end
-      def transform_results
-        return {} unless @data_type_presets
-        scrape_details = blank_scrape_details
-        @results ||= @data_type_presets.each_with_object({}) do |(preset_name, preset_details), hash|
-          scrape_details[@data_type] = preset_details
-          train_transformer = Transformers::TrainerArticle.new(
-            url: @url,
-            payload: @payload,
-            scrape_details: scrape_details,
-          )
+        # Add in options to customize the pattern
+        %w(xpath css).each do |pattern_provider|
+          options["#{PROVIDER_PHRASE} #{pattern_provider}"] = pattern_provider
+        end
-          transformed_result = train_transformer.transform[@data_type.to_sym]
-          hash[preset_name] = transformed_result if transformed_result && !transformed_result.empty?
-        end.to_a
+        # Add option in to skip
+        options.merge('skip' => 'skip')
       end
-      def blank_scrape_details
-        @blank_scrape_details ||= Constants::SCRAPE_PATTERNS.each_with_object({}) do |data_type, hash|
-          hash[data_type] = nil
-        end
+      def transform_results
+        @transform_results ||= Transformers::TrainerArticle.new(
+          url: @url,
+          payload: @payload
+        ).transform
       end
     end
   end

data/lib/news_scraper/trainer/url_trainer.rb CHANGED Viewed

@@ -8,18 +8,17 @@ module NewsScraper
       end
       def train
-        return if article_scrape_patterns['domains'].key?(@root_domain)
+        return if NewsScraper.configuration.scrape_patterns['domains'].key?(@root_domain)
         CLI.put_header(@root_domain)
-        CLI.log("There is no scrape pattern defined for #{@root_domain} in #{Constants::SCRAPE_PATTERN_FILEPATH}")
+        CLI.log("There is no scrape pattern defined for #{@root_domain}")
         CLI.log "Fetching information..."
         CLI.put_footer
         selected_presets = {}
-        article_scrape_patterns['data_types'].each do |data_type|
+        NewsScraper.configuration.scrape_patterns['data_types'].each do |data_type|
           selected_presets[data_type] = selected_pattern(data_type)
         end
         save_selected_presets(selected_presets)
       end
@@ -27,29 +26,29 @@ module NewsScraper
       def selected_pattern(data_type)
         CLI.put_header("Determining information for #{data_type}")
-        data_type_presets = article_scrape_patterns['presets'][data_type]
-        pattern = if data_type_presets.nil?
+        pattern = if NewsScraper.configuration.scrape_patterns['presets'][data_type].nil?
           CLI.log("No presets were found for #{data_type}. Skipping to next.")
           nil
         else
-          PresetSelector.new(
-            url: @url,
-            payload: @payload,
-            data_type_presets: data_type_presets,
-            data_type: data_type
-          ).select
+          preset_selector.select(data_type)
         end
         CLI.put_footer
         pattern || { 'method' => "<<<<< TODO >>>>>", 'pattern' => "<<<<< TODO >>>>>" }
       end
+      def preset_selector
+        @preset_selector ||= PresetSelector.new(url: @url, payload: @payload)
+      end
       def save_selected_presets(selected_presets)
-        current_content = File.read(Constants::SCRAPE_PATTERN_FILEPATH).chomp
+        return unless NewsScraper.configuration.scrape_patterns_filepath
+        current_content = File.read(NewsScraper.configuration.scrape_patterns_filepath).chomp
         new_content = "#{current_content}\n#{build_domain_yaml(selected_presets)}\n"
-        File.write(Constants::SCRAPE_PATTERN_FILEPATH, new_content)
-        CLI.log("Successfully wrote presets for #{@root_domain} to #{Constants::SCRAPE_PATTERN_FILEPATH}.")
+        File.write(NewsScraper.configuration.scrape_patterns_filepath, new_content)
+        CLI.log("Successfully wrote presets for #{@root_domain} to"\
+          " #{NewsScraper.configuration.scrape_patterns_filepath}.")
       end
       def build_domain_yaml(selected_presets)
@@ -65,10 +64,6 @@ module NewsScraper
         end
         output_string.join("\n")
       end
-      def article_scrape_patterns
-        @article_scrape_patterns ||= YAML.load_file(Constants::SCRAPE_PATTERN_FILEPATH)
-      end
     end
   end
 end

data/lib/news_scraper/transformers/article.rb CHANGED Viewed

@@ -2,6 +2,7 @@ require 'nokogiri'
 require 'sanitize'
 require 'readability'
 require 'htmlbeautifier'
+require 'news_scraper/transformers/nokogiri/functions'
 module NewsScraper
   module Transformers
@@ -13,9 +14,8 @@ module NewsScraper
       # - <code>payload</code>: keyword arg - the result of the scrape
       #
       def initialize(url:, payload:)
-        uri_parser = URIParser.new(url)
-        @uri = uri_parser.without_scheme
-        @root_domain = uri_parser.host
+        @url = url
+        @root_domain = URIParser.new(url).host
         @payload = payload
       end
@@ -28,37 +28,36 @@ module NewsScraper
       # - <code>transformed_response</code>: the response that has been parsed and transformed to a hash
       #
       def transform
-        raise ScrapePatternNotDefined.new(uri: @uri, root_domain: @root_domain) unless scrape_details
-        transformed_response.merge(uri: @uri, root_domain: @root_domain)
+        scrape_details = NewsScraper.configuration.scrape_patterns['domains'][@root_domain]
+        raise ScrapePatternNotDefined.new(url: @url, root_domain: @root_domain) unless scrape_details
+        transformed_response(scrape_details).merge(url: @url, root_domain: @root_domain)
       end
       private
-      def scrape_details
-        @scrape_details ||= Constants::SCRAPE_PATTERNS['domains'][@root_domain]
-      end
+      def transformed_response(scrape_details)
+        NewsScraper.configuration.scrape_patterns['data_types'].each_with_object({}) do |data_type, response|
+          response[data_type.to_sym] = nil
+          next unless scrape_details[data_type]
-      def transformed_response
-        Constants::SCRAPE_PATTERNS['data_types'].each_with_object({}) do |data_type, response|
-          response[data_type.to_sym] = parsed_data(data_type)
+          response[data_type.to_sym] = parsed_data(
+            scrape_details[data_type]['method'].to_sym,
+            scrape_details[data_type]['pattern']
+          )
         end
       end
-      def parsed_data(data_type)
-        return nil unless scrape_details[data_type]
-        scrape_method = scrape_details[data_type]['method'].to_sym
+      def parsed_data(scrape_method, scrape_pattern)
         case scrape_method
         when :xpath
-          noko_html = Nokogiri::HTML(@payload)
+          noko_html = ::Nokogiri::HTML(@payload)
           Sanitize.fragment(
-            noko_html.send(scrape_method, "(#{scrape_details[data_type]['pattern']})[1]")
+            noko_html.xpath("(#{scrape_pattern})[1]", Nokogiri::Functions.new)
           ).squish
         when :css
-          noko_html = Nokogiri::HTML(@payload)
+          noko_html = ::Nokogiri::HTML(@payload)
           Sanitize.fragment(
-            noko_html.send(scrape_method, scrape_details[data_type]['pattern'])
+            noko_html.css(scrape_pattern)
           ).squish
         when :readability
           content = Readability::Document.new(

data/lib/news_scraper/transformers/nokogiri/functions.rb ADDED Viewed

@@ -0,0 +1,15 @@
+require 'nokogiri'
+module NewsScraper
+  module Transformers
+    module Nokogiri
+      class Functions
+        # Implements fn:string-join of XPath 2.0
+        def string_join(nodeset, separator)
+          nodeset.map(&:text).join(separator)
+        end
+        alias_method :'string-join', :string_join
+      end
+    end
+  end
+end

data/lib/news_scraper/transformers/trainer_article.rb CHANGED Viewed

@@ -6,12 +6,26 @@ module NewsScraper
       # *Params*
       # - <code>url</code>: keyword arg - the url on which scraping was done
       # - <code>payload</code>: keyword arg - the result of the scrape
-      # - <code>scrape_details</code>: keyword arg - The pattern/methods for the domain to use in the transformation
       #
-      def initialize(url:, payload:, scrape_details:)
-        @scrape_details = scrape_details
+      def initialize(url:, payload:)
         super(url: url, payload: payload)
       end
+      # Transform the article
+      #
+      # *Returns*
+      # - <code>transformed_response</code>: tries all possible presets and returns a hash representing the results
+      #
+      def transform
+        presets = NewsScraper.configuration.scrape_patterns['presets']
+        transformed_response = presets.each_with_object({}) do |(data_type, preset_options), response|
+          response[data_type] = preset_options.each_with_object({}) do |(option, scrape_details), data_type_options|
+            data = parsed_data(scrape_details['method'].to_sym, scrape_details['pattern'])
+            data_type_options[option] = scrape_details.merge('data' => data)
+          end
+        end
+        transformed_response.merge('url' => @url, 'root_domain' => @root_domain)
+      end
     end
   end
 end

data/lib/news_scraper/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module NewsScraper
-  VERSION = "0.1.2".freeze
+  VERSION = "1.0.0".freeze
 end

data/lib/news_scraper.rb CHANGED Viewed

@@ -1,7 +1,8 @@
 require 'httparty'
 require 'yaml'
+require 'terminal-table'
-require 'news_scraper/constants'
+require 'news_scraper/configuration'
 require 'news_scraper/uri_parser'
 require 'news_scraper/active_support_lite/string'
@@ -23,11 +24,12 @@ require 'news_scraper/trainer'
 module NewsScraper
   extend self
+  attr_writer :configuration
   # <code>NewsScraper::train</code> is an interactive command-line prompt that:
   #
   # 1. Collates all articles for the given :query
-  # 2. Grep for <code>:data_types</code> using <code>:presets</code> in <code>config/article_scrape_patterns.yml</code>
+  # 2. Grep for <code>:data_types</code> using <code>:presets</code> in the config set in the <code>configuration</code>
   # 3. Displays the results of each <code>:preset</code> grep for a given <code>:data_type</code>
   # 4. Prompts to select one of the <code>:presets</code> or define a pattern for that domain's <code>:data_type</code>
   # N.B: User may ignore all presets and manually configure it in the YAML file
@@ -36,7 +38,21 @@ module NewsScraper
   # *Params*
   # - <code>query</code>: a keyword arugment specifying the query to train on
   #
+  # :nocov:
   def train(query:)
     Trainer.train(query: query)
   end
+  # :nocov:
+  def configuration
+    @configuration ||= Configuration.new
+  end
+  def reset_configuration
+    @configuration = Configuration.new
+  end
+  def configure
+    yield(configuration)
+  end
 end

data/news_scraper.gemspec CHANGED Viewed

@@ -29,6 +29,7 @@ Gem::Specification.new do |spec|
   spec.add_dependency 'sanitize', '~> 4.2', '>= 4.2.0'
   spec.add_dependency 'ruby-readability', '~> 0.7', '>= 0.7.0'
   spec.add_dependency 'htmlbeautifier', '~> 1.1', '>= 1.1.1'
+  spec.add_dependency 'terminal-table', '~> 1.5', '>= 1.5.2'
   spec.add_development_dependency 'bundler', '~> 1.12', '>= 1.12.0'
   spec.add_development_dependency 'rake', '~> 10.0', '>= 10.0.0'
@@ -38,4 +39,5 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency 'timecop', '~> 0.8', '>= 0.8.0'
   spec.add_development_dependency 'rubocop', '~> 0.42', '>= 0.42.0'
   spec.add_development_dependency 'rdoc', '~> 4.2', '>= 4.2.2'
+  spec.add_development_dependency 'simplecov', '~> 0.12.0'
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: news_scraper
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 1.0.0
 platform: ruby
 authors:
 - Richard Wu
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2016-09-19 00:00:00.000000000 Z
+date: 2016-09-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -111,6 +111,26 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: 1.1.1
+- !ruby/object:Gem::Dependency
+  name: terminal-table
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.5'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.5.2
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.5'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.5.2
 - !ruby/object:Gem::Dependency
   name: bundler
   requirement: !ruby/object:Gem::Requirement
@@ -271,6 +291,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: 4.2.2
+- !ruby/object:Gem::Dependency
+  name: simplecov
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.12.0
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.12.0
 description: A collection of extractors, transformers and loaders for scraping news
   websites and syndicates.
 email:
@@ -296,7 +330,7 @@ files:
 - lib/news_scraper.rb
 - lib/news_scraper/active_support_lite/string.rb
 - lib/news_scraper/cli.rb
-- lib/news_scraper/constants.rb
+- lib/news_scraper/configuration.rb
 - lib/news_scraper/errors.rb
 - lib/news_scraper/extractors/article.rb
 - lib/news_scraper/extractors/google_news_rss.rb
@@ -306,6 +340,7 @@ files:
 - lib/news_scraper/trainer/preset_selector.rb
 - lib/news_scraper/trainer/url_trainer.rb
 - lib/news_scraper/transformers/article.rb
+- lib/news_scraper/transformers/nokogiri/functions.rb
 - lib/news_scraper/transformers/trainer_article.rb
 - lib/news_scraper/uri_parser.rb
 - lib/news_scraper/version.rb

data/lib/news_scraper/constants.rb DELETED Viewed

@@ -1,6 +0,0 @@
-module NewsScraper
-  module Constants
-    SCRAPE_PATTERN_FILEPATH = File.expand_path('../../../config/article_scrape_patterns.yml', __FILE__)
-    SCRAPE_PATTERNS = YAML.load_file(SCRAPE_PATTERN_FILEPATH)
-  end
-end