RubyGems - news_scraper - Versions diffs - 0.1.2 → 1.0.0 - Mend

news_scraper 0.1.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/.rubocop.yml +1 -1
data/README.md +2 -2
data/config/article_scrape_patterns.yml +15 -0
data/lib/news_scraper/cli.rb +9 -8
data/lib/news_scraper/configuration.rb +33 -0
data/lib/news_scraper/errors.rb +12 -3
data/lib/news_scraper/extractors_helpers.rb +6 -2
data/lib/news_scraper/scraper.rb +11 -4
data/lib/news_scraper/trainer/preset_selector.rb +30 -40
data/lib/news_scraper/trainer/url_trainer.rb +15 -20
data/lib/news_scraper/transformers/article.rb +19 -20
data/lib/news_scraper/transformers/nokogiri/functions.rb +15 -0
data/lib/news_scraper/transformers/trainer_article.rb +17 -3
data/lib/news_scraper/version.rb +1 -1
data/lib/news_scraper.rb +18 -2
data/news_scraper.gemspec +2 -0
metadata +38 -3
data/lib/news_scraper/constants.rb +0 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: e571db5d64bc7679b4b061981208026da6878101
-  data.tar.gz: 4009ca8b1571847afc1756eb5678e634feb0ba54
+  metadata.gz: db7d631f3f6cf73ff2e57b9e472804651b9fe1e0
+  data.tar.gz: 1045878eb97749d6b264a486ac34bfb89f4796dd
 SHA512:
-  metadata.gz: fcb8a793e259163dec3bcdf260bdd4accb3275f11f1ae5df05ce0b40ff3708bf9113be3c8021846ed4f0497e24f5bddffc4165408fd3ef4e57c881f5a6d8d501
-  data.tar.gz: be27fea65bc420052cdae19f7ae38135565fed7ff8730e5bdf4445058af34a30bbfed006c5cb5f901b6688020d892c70a322adaa5d6d4c5270521746e12d6fa6
+  metadata.gz: a53423be5dbda33ead7dbb46bc494e40fcf412172a291496128b40512c985fc157c646481e1b8a183be2f709486e2cc7ec27d47a17bb9715c3b19dbe09dd7e42
+  data.tar.gz: eb43f129a0ca1a9f6eb02f24bfeb891583c96b0cce548afe43cf613231e8908a66e381347aeb16f5be9ea4af3ca96f82dcaf4da005951fb5ae5936d3f3530cbc

data/.gitignore CHANGED Viewed

@@ -8,3 +8,4 @@
 /spec/reports/
 /tmp/
 *.gem
+coverage

data/.rubocop.yml CHANGED Viewed

@@ -1,5 +1,5 @@
 AllCops:
-  TargetRubyVersion: 2.3
+  TargetRubyVersion: 2.2
 ClassLength:
   Max: 500

data/README.md CHANGED Viewed

@@ -50,7 +50,7 @@ How the `Scraper` extracts and parses for the information is determined by scrap
 Calling `NewsScraper::Scraper#scrape` with either the array or block notation will yield `transformed_data` hashes. [`article_scrape_patterns.yml`](https://github.com/richardwu/news_scraper/blob/master/config/article_scrape_patterns.yml) defines the data types that will be scraped for.
-In addition, the `uri` and `root_domain`(hostname) of the article will be returned in the hash too.
+In addition, the `url` and `root_domain`(hostname) of the article will be returned in the hash too.
 Example
 ```
@@ -62,7 +62,7 @@ Example
   section: 'technology',
   datetime: '1991-10-05T12:00:00+00:00',
   title: 'Linus Linux',
-  uri: 'linusworld.com/the-linux-kernel',
+  url: 'https://linusworld.com/the-linux-kernel',
   root_domain: 'linusworld.com'
 }
 ```

data/config/article_scrape_patterns.yml CHANGED Viewed

@@ -52,10 +52,16 @@ presets:
     article_tag: &article_tag_keywords
       method: "xpath"
       pattern: "//meta[@property='article:tag']/@content"
+    news_keywords: &news_keywords_keywords
+      method: "xpath"
+      pattern: "//meta[@name='news_keywords']/@content"
   section:
     meta: &meta_section
       method: "xpath"
       pattern: "//meta[@property='article:section']/@content"
+    section: &section_section
+      method: "xpath"
+      pattern: "//meta[@name='section']/@content"
   datetime:
     article_date_original: &article_date_original_datetime
       method: xpath
@@ -87,6 +93,15 @@ presets:
     sailthru_date: &sailthru_date_datetime
       method: xpath
       pattern: //meta[@name='sailthru.date']/@content
+    time: &time_datetime
+      method: xpath
+      pattern: //time/@datetime
+    date_published_datetime: &date_published_datetime_datetime
+      method: xpath
+      pattern: //meta[@itemprop="datePublished"]/@datetime
+    date_published_content: &date_published_content_datetime
+      method: xpath
+      pattern: //meta[@itemprop="datePublished"]/@content
   title:
     html: &html_title
       method: "xpath"

data/lib/news_scraper/cli.rb CHANGED Viewed

@@ -6,14 +6,13 @@ module NewsScraper
     DEFAULT_COLOR = "\x1b[36m".freeze
-    def log(message, color: DEFAULT_COLOR, new_line: false)
-      message += "\n" if new_line
+    def log(message, color: DEFAULT_COLOR)
       $stdout.puts "#{color}┃\x1b[0m " + message
     end
-    def log_lines(message, color: DEFAULT_COLOR, new_line: false)
+    def log_lines(message, color: DEFAULT_COLOR)
       message.split("\n").each do |line|
-        log(line, color: color, new_line: new_line)
+        log(line, color: color)
       end
     end
@@ -49,8 +48,8 @@ module NewsScraper
       buf = -1
       available = (1..options.length).to_a
       until available.include?(buf.to_i)
-        begin
-          buf = Readline.readline("\x1b[34m┃ > \x1b[33m", true)
+        buf = begin
+          Readline.readline("\x1b[34m┃ > \x1b[33m", true)
         rescue Interrupt
           nil
         end
@@ -71,14 +70,16 @@ module NewsScraper
     ## Fancy Headers and Footers
-    def put_header(text = "", color = DEFAULT_COLOR)
+    def put_header(text = "", color: DEFAULT_COLOR)
       put_edge(color, "┏━━ ", text)
     end
-    def put_footer(color = DEFAULT_COLOR)
+    def put_footer(color: DEFAULT_COLOR)
       put_edge(color, "┗", "")
     end
+    private
     def put_edge(color, prefix, text)
       ptext = "#{color}#{prefix}#{text}"
       textwidth = printing_width(ptext)

data/lib/news_scraper/configuration.rb ADDED Viewed

@@ -0,0 +1,33 @@
+module NewsScraper
+  class Configuration
+    DEFAULT_SCRAPE_PATTERNS_FILEPATH = File.expand_path('../../../config/article_scrape_patterns.yml', __FILE__)
+    attr_accessor :fetch_method, :scrape_patterns_filepath
+    # <code>NewsScraper::Configuration.initialize</code> initializes the scrape_patterns_filepath
+    # and the fetch_method to the <code>DEFAULT_SCRAPE_PATTERNS_FILEPATH</code>
+    #
+    # Set the <code>scrape_patterns_filepath</code> to <code>nil</code> to disable saving during training
+    #
+    def initialize
+      self.scrape_patterns_filepath = DEFAULT_SCRAPE_PATTERNS_FILEPATH
+      self.fetch_method = proc { default_scrape_patterns }
+    end
+    # <code>NewsScraper::Configuration.scrape_patterns</code> proxies scrape_patterns
+    # requests to <code>fetch_method</code>:
+    #
+    # *Returns*
+    # - The result of calling the <code>fetch_method</code> proc, expected to be a hash
+    #
+    def scrape_patterns
+      fetch_method.call
+    end
+    private
+    def default_scrape_patterns
+      @default_scrape_patterns ||= {}
+      @default_scrape_patterns[scrape_patterns_filepath] ||= YAML.load_file(scrape_patterns_filepath)
+    end
+  end
+end

data/lib/news_scraper/errors.rb CHANGED Viewed

@@ -1,14 +1,23 @@
 module NewsScraper
-  class ResponseError < StandardError; end
+  class ResponseError < StandardError
+    attr_reader :error_code, :message, :url
+    def initialize(opts = {})
+      @error_code = opts[:error_code]
+      @message = opts[:message]
+      @url = opts[:url]
+      super
+    end
+  end
   module Transformers
     class ScrapePatternNotDefined < StandardError
-      attr_reader :root_domain, :uri
+      attr_reader :root_domain, :url
       def initialize(opts = {})
         @root_domain = opts[:root_domain]
-        @uri = opts[:uri]
+        @url = opts[:url]
         super
       end
     end

data/lib/news_scraper/extractors_helpers.rb CHANGED Viewed

@@ -10,9 +10,13 @@ module NewsScraper
       CLI.put_header(url)
       CLI.log "Beginning HTTP request for #{url}"
-      response = HTTParty.get(url)
+      response = HTTParty.get(url, headers: { "User-Agent" => "news-scraper-#{NewsScraper::VERSION}" })
-      raise ResponseError.new("#{response.code} - #{response.message}") unless response.code == 200
+      raise ResponseError.new(
+        error_code: response.code,
+        message: response.message,
+        url: url
+      ) unless response.code == 200
       CLI.log "#{response.code} - #{response.message}. Request successful for #{url}"
       CLI.put_footer

data/lib/news_scraper/scraper.rb CHANGED Viewed

@@ -16,6 +16,7 @@ module NewsScraper
     #
     # *Raises*
     # - Will raise a <code>Transformers::ScrapePatternNotDefined</code> if an article is not in the root domains
+    #   - Will <code>yield</code> the error if a block is given
     #   - Root domains are specified by the <code>article_scrape_patterns.yml</code> file
     #   - This root domain will need to be trained, it would be helpful to have a PR created to train the domain
     #   - You can train the domain by running <code>NewsScraper::Trainer::UrlTrainer.new(URL_TO_TRAIN).train</code>
@@ -27,13 +28,19 @@ module NewsScraper
       article_urls = Extractors::GoogleNewsRss.new(query: @query).extract
       transformed_articles = []
       article_urls.each do |article_url|
         payload = Extractors::Article.new(url: article_url).extract
+        article_transformer = Transformers::Article.new(url: article_url, payload: payload)
-        transformed_article = Transformers::Article.new(url: article_url, payload: payload).transform
-        transformed_articles << transformed_article
-        yield transformed_article if block_given?
+        begin
+          transformed_article = article_transformer.transform
+          transformed_articles << transformed_article
+          yield transformed_article if block_given?
+        rescue Transformers::ScrapePatternNotDefined => e
+          raise e unless block_given?
+          yield e
+        end
       end
       transformed_articles

data/lib/news_scraper/trainer/preset_selector.rb CHANGED Viewed

@@ -3,18 +3,16 @@ module NewsScraper
     class PresetSelector
       PROVIDER_PHRASE = 'I will provide a pattern using'.freeze
-      def initialize(data_type:, data_type_presets:, url:, payload:)
+      def initialize(url:, payload:)
         @url = url
         @payload = payload
-        @data_type_presets = data_type_presets
-        @data_type = data_type
       end
-      def select
-        return unless @data_type_presets
+      def select(data_type)
+        pattern_options = pattern_options(data_type)
         selected_option = CLI.prompt_with_options(
-          "Select which preset to use for #{@data_type}:",
+          "Select which preset to use for #{data_type}:",
           pattern_options.keys
         )
@@ -27,50 +25,42 @@ module NewsScraper
         end
         return if selected_option == 'skip'
-        selected_index = pattern_options[selected_option]
-        selected_preset_code = transform_results[selected_index].first
-        @data_type_presets[selected_preset_code].merge('variable' => [selected_preset_code, @data_type].join('_'))
+        selected_preset_code = pattern_options[selected_option]
+        result = transform_results[data_type][selected_preset_code].merge(
+          'variable' => [selected_preset_code, data_type].join('_')
+        )
+        result.delete('data')
+        result
       end
       private
-      def pattern_options
-        return {} unless @data_type_presets
-        @pattern_options ||= begin
-          temp_options = transform_results.each_with_object({}).with_index do |(results, options_hash), index|
-            preset_name = "#{results[0]}_#{@data_type}"
-            extracted_text = results[1]
-            options_hash["#{preset_name}: #{extracted_text}"] = index
-          end
-          %w(xpath css).each do |pattern_provider|
-            temp_options["#{PROVIDER_PHRASE} #{pattern_provider}"] = pattern_provider
+      def pattern_options(data_type)
+        # Add valid options from the transformed results
+        options = transform_results[data_type].each_with_object({}) do |(option, details), valid_options|
+          next unless details['data'] && !details['data'].empty?
+          table_key = Terminal::Table.new do |t|
+            t << ['method', details['method']]
+            t << ['pattern', details['pattern']]
+            t << ['data', details['data']]
           end
-          temp_options.merge('skip' => 'skip')
+          valid_options["\n#{table_key}"] = option
         end
-      end
-      def transform_results
-        return {} unless @data_type_presets
-        scrape_details = blank_scrape_details
-        @results ||= @data_type_presets.each_with_object({}) do |(preset_name, preset_details), hash|
-          scrape_details[@data_type] = preset_details
-          train_transformer = Transformers::TrainerArticle.new(
-            url: @url,
-            payload: @payload,
-            scrape_details: scrape_details,
-          )
+        # Add in options to customize the pattern
+        %w(xpath css).each do |pattern_provider|
+          options["#{PROVIDER_PHRASE} #{pattern_provider}"] = pattern_provider
+        end
-          transformed_result = train_transformer.transform[@data_type.to_sym]
-          hash[preset_name] = transformed_result if transformed_result && !transformed_result.empty?
-        end.to_a
+        # Add option in to skip
+        options.merge('skip' => 'skip')
       end
-      def blank_scrape_details
-        @blank_scrape_details ||= Constants::SCRAPE_PATTERNS.each_with_object({}) do |data_type, hash|
-          hash[data_type] = nil
-        end
+      def transform_results
+        @transform_results ||= Transformers::TrainerArticle.new(
+          url: @url,
+          payload: @payload
+        ).transform
       end
     end
   end

data/lib/news_scraper/trainer/url_trainer.rb CHANGED Viewed

@@ -8,18 +8,17 @@ module NewsScraper
       end
       def train
-        return if article_scrape_patterns['domains'].key?(@root_domain)
+        return if NewsScraper.configuration.scrape_patterns['domains'].key?(@root_domain)
         CLI.put_header(@root_domain)
-        CLI.log("There is no scrape pattern defined for #{@root_domain} in #{Constants::SCRAPE_PATTERN_FILEPATH}")
+        CLI.log("There is no scrape pattern defined for #{@root_domain}")
         CLI.log "Fetching information..."
         CLI.put_footer
         selected_presets = {}
-        article_scrape_patterns['data_types'].each do |data_type|
+        NewsScraper.configuration.scrape_patterns['data_types'].each do |data_type|
           selected_presets[data_type] = selected_pattern(data_type)
         end
         save_selected_presets(selected_presets)
       end
@@ -27,29 +26,29 @@ module NewsScraper
       def selected_pattern(data_type)
         CLI.put_header("Determining information for #{data_type}")
-        data_type_presets = article_scrape_patterns['presets'][data_type]
-        pattern = if data_type_presets.nil?
+        pattern = if NewsScraper.configuration.scrape_patterns['presets'][data_type].nil?
           CLI.log("No presets were found for #{data_type}. Skipping to next.")
           nil
         else
-          PresetSelector.new(
-            url: @url,
-            payload: @payload,
-            data_type_presets: data_type_presets,
-            data_type: data_type
-          ).select
+          preset_selector.select(data_type)
         end
         CLI.put_footer
         pattern || { 'method' => "<<<<< TODO >>>>>", 'pattern' => "<<<<< TODO >>>>>" }
       end
+      def preset_selector
+        @preset_selector ||= PresetSelector.new(url: @url, payload: @payload)
+      end
       def save_selected_presets(selected_presets)
-        current_content = File.read(Constants::SCRAPE_PATTERN_FILEPATH).chomp
+        return unless NewsScraper.configuration.scrape_patterns_filepath
+        current_content = File.read(NewsScraper.configuration.scrape_patterns_filepath).chomp
         new_content = "#{current_content}\n#{build_domain_yaml(selected_presets)}\n"
-        File.write(Constants::SCRAPE_PATTERN_FILEPATH, new_content)
-        CLI.log("Successfully wrote presets for #{@root_domain} to #{Constants::SCRAPE_PATTERN_FILEPATH}.")
+        File.write(NewsScraper.configuration.scrape_patterns_filepath, new_content)
+        CLI.log("Successfully wrote presets for #{@root_domain} to"\
+          " #{NewsScraper.configuration.scrape_patterns_filepath}.")
       end
       def build_domain_yaml(selected_presets)
@@ -65,10 +64,6 @@ module NewsScraper
         end
         output_string.join("\n")
       end
-      def article_scrape_patterns
-        @article_scrape_patterns ||= YAML.load_file(Constants::SCRAPE_PATTERN_FILEPATH)
-      end
     end
   end
 end

data/lib/news_scraper/transformers/article.rb CHANGED Viewed

@@ -2,6 +2,7 @@ require 'nokogiri'
 require 'sanitize'
 require 'readability'
 require 'htmlbeautifier'
+require 'news_scraper/transformers/nokogiri/functions'
 module NewsScraper
   module Transformers
@@ -13,9 +14,8 @@ module NewsScraper
       # - <code>payload</code>: keyword arg - the result of the scrape
       #
       def initialize(url:, payload:)
-        uri_parser = URIParser.new(url)
-        @uri = uri_parser.without_scheme
-        @root_domain = uri_parser.host
+        @url = url
+        @root_domain = URIParser.new(url).host
         @payload = payload
       end
@@ -28,37 +28,36 @@ module NewsScraper
       # - <code>transformed_response</code>: the response that has been parsed and transformed to a hash
       #
       def transform
-        raise ScrapePatternNotDefined.new(uri: @uri, root_domain: @root_domain) unless scrape_details
-        transformed_response.merge(uri: @uri, root_domain: @root_domain)
+        scrape_details = NewsScraper.configuration.scrape_patterns['domains'][@root_domain]
+        raise ScrapePatternNotDefined.new(url: @url, root_domain: @root_domain) unless scrape_details
+        transformed_response(scrape_details).merge(url: @url, root_domain: @root_domain)
       end
       private
-      def scrape_details
-        @scrape_details ||= Constants::SCRAPE_PATTERNS['domains'][@root_domain]
-      end
+      def transformed_response(scrape_details)
+        NewsScraper.configuration.scrape_patterns['data_types'].each_with_object({}) do |data_type, response|
+          response[data_type.to_sym] = nil
+          next unless scrape_details[data_type]
-      def transformed_response
-        Constants::SCRAPE_PATTERNS['data_types'].each_with_object({}) do |data_type, response|
-          response[data_type.to_sym] = parsed_data(data_type)
+          response[data_type.to_sym] = parsed_data(
+            scrape_details[data_type]['method'].to_sym,
+            scrape_details[data_type]['pattern']
+          )
         end
       end
-      def parsed_data(data_type)
-        return nil unless scrape_details[data_type]
-        scrape_method = scrape_details[data_type]['method'].to_sym
+      def parsed_data(scrape_method, scrape_pattern)
         case scrape_method
         when :xpath
-          noko_html = Nokogiri::HTML(@payload)
+          noko_html = ::Nokogiri::HTML(@payload)
           Sanitize.fragment(
-            noko_html.send(scrape_method, "(#{scrape_details[data_type]['pattern']})[1]")
+            noko_html.xpath("(#{scrape_pattern})[1]", Nokogiri::Functions.new)
           ).squish
         when :css
-          noko_html = Nokogiri::HTML(@payload)
+          noko_html = ::Nokogiri::HTML(@payload)
           Sanitize.fragment(
-            noko_html.send(scrape_method, scrape_details[data_type]['pattern'])
+            noko_html.css(scrape_pattern)
           ).squish
         when :readability
           content = Readability::Document.new(

data/lib/news_scraper/transformers/nokogiri/functions.rb ADDED Viewed

@@ -0,0 +1,15 @@
+require 'nokogiri'
+module NewsScraper
+  module Transformers
+    module Nokogiri
+      class Functions
+        # Implements fn:string-join of XPath 2.0
+        def string_join(nodeset, separator)
+          nodeset.map(&:text).join(separator)
+        end
+        alias_method :'string-join', :string_join
+      end
+    end
+  end
+end

data/lib/news_scraper/transformers/trainer_article.rb CHANGED Viewed

@@ -6,12 +6,26 @@ module NewsScraper
       # *Params*
       # - <code>url</code>: keyword arg - the url on which scraping was done
       # - <code>payload</code>: keyword arg - the result of the scrape
-      # - <code>scrape_details</code>: keyword arg - The pattern/methods for the domain to use in the transformation
       #
-      def initialize(url:, payload:, scrape_details:)
-        @scrape_details = scrape_details
+      def initialize(url:, payload:)
         super(url: url, payload: payload)
       end
+      # Transform the article
+      #
+      # *Returns*
+      # - <code>transformed_response</code>: tries all possible presets and returns a hash representing the results
+      #
+      def transform
+        presets = NewsScraper.configuration.scrape_patterns['presets']
+        transformed_response = presets.each_with_object({}) do |(data_type, preset_options), response|
+          response[data_type] = preset_options.each_with_object({}) do |(option, scrape_details), data_type_options|
+            data = parsed_data(scrape_details['method'].to_sym, scrape_details['pattern'])
+            data_type_options[option] = scrape_details.merge('data' => data)
+          end
+        end
+        transformed_response.merge('url' => @url, 'root_domain' => @root_domain)
+      end
     end
   end
 end

data/lib/news_scraper/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module NewsScraper
-  VERSION = "0.1.2".freeze
+  VERSION = "1.0.0".freeze
 end

data/lib/news_scraper.rb CHANGED Viewed

@@ -1,7 +1,8 @@
 require 'httparty'
 require 'yaml'
+require 'terminal-table'
-require 'news_scraper/constants'
+require 'news_scraper/configuration'
 require 'news_scraper/uri_parser'
 require 'news_scraper/active_support_lite/string'
@@ -23,11 +24,12 @@ require 'news_scraper/trainer'
 module NewsScraper
   extend self
+  attr_writer :configuration
   # <code>NewsScraper::train</code> is an interactive command-line prompt that:
   #
   # 1. Collates all articles for the given :query
-  # 2. Grep for <code>:data_types</code> using <code>:presets</code> in <code>config/article_scrape_patterns.yml</code>
+  # 2. Grep for <code>:data_types</code> using <code>:presets</code> in the config set in the <code>configuration</code>
   # 3. Displays the results of each <code>:preset</code> grep for a given <code>:data_type</code>
   # 4. Prompts to select one of the <code>:presets</code> or define a pattern for that domain's <code>:data_type</code>
   # N.B: User may ignore all presets and manually configure it in the YAML file
@@ -36,7 +38,21 @@ module NewsScraper
   # *Params*
   # - <code>query</code>: a keyword arugment specifying the query to train on
   #
+  # :nocov:
   def train(query:)
     Trainer.train(query: query)
   end
+  # :nocov:
+  def configuration
+    @configuration ||= Configuration.new
+  end
+  def reset_configuration
+    @configuration = Configuration.new
+  end
+  def configure
+    yield(configuration)
+  end
 end

data/news_scraper.gemspec CHANGED Viewed

@@ -29,6 +29,7 @@ Gem::Specification.new do |spec|
   spec.add_dependency 'sanitize', '~> 4.2', '>= 4.2.0'
   spec.add_dependency 'ruby-readability', '~> 0.7', '>= 0.7.0'
   spec.add_dependency 'htmlbeautifier', '~> 1.1', '>= 1.1.1'
+  spec.add_dependency 'terminal-table', '~> 1.5', '>= 1.5.2'
   spec.add_development_dependency 'bundler', '~> 1.12', '>= 1.12.0'
   spec.add_development_dependency 'rake', '~> 10.0', '>= 10.0.0'
@@ -38,4 +39,5 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency 'timecop', '~> 0.8', '>= 0.8.0'
   spec.add_development_dependency 'rubocop', '~> 0.42', '>= 0.42.0'
   spec.add_development_dependency 'rdoc', '~> 4.2', '>= 4.2.2'
+  spec.add_development_dependency 'simplecov', '~> 0.12.0'
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: news_scraper
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 1.0.0
 platform: ruby
 authors:
 - Richard Wu
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2016-09-19 00:00:00.000000000 Z
+date: 2016-09-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -111,6 +111,26 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: 1.1.1
+- !ruby/object:Gem::Dependency
+  name: terminal-table
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.5'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.5.2
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.5'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.5.2
 - !ruby/object:Gem::Dependency
   name: bundler
   requirement: !ruby/object:Gem::Requirement
@@ -271,6 +291,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: 4.2.2
+- !ruby/object:Gem::Dependency
+  name: simplecov
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.12.0
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.12.0
 description: A collection of extractors, transformers and loaders for scraping news
   websites and syndicates.
 email:
@@ -296,7 +330,7 @@ files:
 - lib/news_scraper.rb
 - lib/news_scraper/active_support_lite/string.rb
 - lib/news_scraper/cli.rb
-- lib/news_scraper/constants.rb
+- lib/news_scraper/configuration.rb
 - lib/news_scraper/errors.rb
 - lib/news_scraper/extractors/article.rb
 - lib/news_scraper/extractors/google_news_rss.rb
@@ -306,6 +340,7 @@ files:
 - lib/news_scraper/trainer/preset_selector.rb
 - lib/news_scraper/trainer/url_trainer.rb
 - lib/news_scraper/transformers/article.rb
+- lib/news_scraper/transformers/nokogiri/functions.rb
 - lib/news_scraper/transformers/trainer_article.rb
 - lib/news_scraper/uri_parser.rb
 - lib/news_scraper/version.rb

data/lib/news_scraper/constants.rb DELETED Viewed

@@ -1,6 +0,0 @@
-module NewsScraper
-  module Constants
-    SCRAPE_PATTERN_FILEPATH = File.expand_path('../../../config/article_scrape_patterns.yml', __FILE__)
-    SCRAPE_PATTERNS = YAML.load_file(SCRAPE_PATTERN_FILEPATH)
-  end
-end