RubyGems - docparser - Versions diffs - 0.2.3 → 0.3.0 - Mend

docparser 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

checksums.yaml +5 -5
data/.rubocop.yml +15 -3
data/.rubocop_todo.yml +45 -0
data/.travis.yml +1 -1
data/Gemfile +5 -4
data/README.md +2 -2
data/Rakefile +3 -1
data/docparser.gemspec +9 -9
data/example.rb +2 -0
data/lib/docparser.rb +2 -0
data/lib/docparser/document.rb +20 -10
data/lib/docparser/output.rb +9 -6
data/lib/docparser/output/csv_output.rb +2 -0
data/lib/docparser/output/html_output.rb +52 -49
data/lib/docparser/output/json_output.rb +3 -1
data/lib/docparser/output/multi_output.rb +3 -1
data/lib/docparser/output/nil_output.rb +5 -6
data/lib/docparser/output/xlsx_output.rb +2 -0
data/lib/docparser/output/yaml_output.rb +4 -1
data/lib/docparser/parser.rb +9 -13
data/lib/docparser/version.rb +3 -1
data/test/.rubocop.yml +6 -2
data/test/.rubocop_todo.yml +23 -0
data/test/lib/docparser/blackbox_test.rb +5 -4
data/test/lib/docparser/document_test.rb +19 -14
data/test/lib/docparser/output/csv_output_test.rb +5 -10
data/test/lib/docparser/output/html_output_test.rb +5 -10
data/test/lib/docparser/output/json_output_test.rb +8 -13
data/test/lib/docparser/output/multi_output_test.rb +6 -12
data/test/lib/docparser/output/nil_output_test.rb +4 -9
data/test/lib/docparser/output/xlsx_output_test.rb +5 -10
data/test/lib/docparser/output/yaml_output_test.rb +22 -27
data/test/lib/docparser/output_test.rb +3 -8
data/test/lib/docparser/parser_test.rb +2 -22
data/test/lib/docparser/version_test.rb +2 -0
data/test/support/hackaday/dl.rb +2 -0
data/test/test_helper.rb +2 -3
metadata +20 -35
data/test/lib/docparser/logging_test.rb +0 -19

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: 69310d210a064254c5c767509118e5f9d4a1bc83
-  data.tar.gz: 2e39c9e8c9b8833c17277847fc89b460173de70c
+SHA256:
+  metadata.gz: 3a857af8eb8403f2f26f867f02e13319fcb173c6f177947986d5fe81e7248a74
+  data.tar.gz: 53e66974c57f2662606f1125a0466ca0ce7db476cae0516da71837b3703263e4
 SHA512:
-  metadata.gz: 464c4e463de25d0b961476253f54cf13718a2129bab265382d5a3bc34d90a5ecb9d24eead1718688a485b5c9405cec0f6316b7cd2e7741446f3a253341e2cdb2
-  data.tar.gz: 682813ef09039f64e4285c4db5986f7c2be6387e7df1f608e6294804e583a3d7c11c5427901d093cacdab03f8afcd9f6f0a39ab77f90df395ac7993e77cc28db
+  metadata.gz: 70c5721acae9866e862a6747a6749c894bb38dd20792545ad372b8f5265f9a4ca2f5a895e30fe36fc17eb2e511ffe86ec837974d2430ee1864b2b6773ce08b6c
+  data.tar.gz: 0b7cd82a97ad79b78bebab10319403cb6cd9cbee27137912315ede6ccbaa432f87d9a9267123724021f119c9f3d67de73dd57350455d3c00bd63132047b1f57c

data/.rubocop.yml CHANGED

@@ -1,6 +1,18 @@
+inherit_from: .rubocop_todo.yml
 # Temporary turn this off
 # Avoid parameter lists longer than three or four parameters.
-ParameterLists:
+Metrics/ParameterLists:
   Enabled: false
-MultilineBlockChain:
-  Enabled: false
+Style/MultilineBlockChain:
+  Enabled: false
+Style/HashTransformKeys:
+  Enabled: true
+Style/HashTransformValues:
+  Enabled: true
+Style/HashEachMethods:
+  Enabled: true
+Lint/RaiseException:
+  Enabled: true
+Lint/StructNewOverride:
+  Enabled: true

data/.rubocop_todo.yml ADDED

@@ -0,0 +1,45 @@
+# This configuration was generated by
+# `rubocop --auto-gen-config`
+# on 2020-04-13 17:55:59 +0200 using RuboCop version 0.81.0.
+# The point is for the user to remove these configuration records
+# one by one as the offenses are removed from the code base.
+# Note that changes in the inspected code, or installation of new
+# versions of RuboCop, may require this file to be generated again.
+# Offense count: 9
+# Configuration parameters: CountComments, ExcludedMethods.
+# ExcludedMethods: refine
+Metrics/BlockLength:
+  Max: 173
+# Offense count: 2
+# Configuration parameters: ForbiddenDelimiters.
+# ForbiddenDelimiters: (?-mix:(^|\s)(EO[A-Z]{1}|END)(\s|$))
+Naming/HeredocDelimiterNaming:
+  Exclude:
+    - 'lib/docparser/output/html_output.rb'
+# Offense count: 16
+Security/Open:
+  Exclude:
+    - 'lib/docparser/document.rb'
+    - 'lib/docparser/output.rb'
+    - 'test/lib/docparser/document_test.rb'
+    - 'test/lib/docparser/output/csv_output_test.rb'
+    - 'test/lib/docparser/output/html_output_test.rb'
+    - 'test/lib/docparser/output/json_output_test.rb'
+    - 'test/lib/docparser/output/multi_output_test.rb'
+    - 'test/lib/docparser/output/yaml_output_test.rb'
+# Offense count: 4
+# Configuration parameters: EnforcedStyle.
+# SupportedStyles: annotated, template, unannotated
+Style/FormatStringToken:
+  Exclude:
+    - 'lib/docparser/output.rb'
+    - 'lib/docparser/parser.rb'
+# Offense count: 1
+Style/MixinUsage:
+  Exclude:
+    - 'example.rb'

data/.travis.yml CHANGED

@@ -1,7 +1,7 @@
 language: ruby
 rvm:
     - 2.0.0
-    - 2.1.1
+    - 2.1.2
     - rbx-2.2.5
     - ruby-head
     - jruby-head

data/Gemfile CHANGED

@@ -1,12 +1,13 @@
+# frozen_string_literal: true
 gemspec
 source 'https://rubygems.org'
 group :test do
-  gem 'minitest', '~> 5.4.1'
-  gem 'coveralls', require: false
+  gem 'minitest', '~> 5.14.0'
   gem 'rake'
-  gem 'rubocop', '~> 0.26.0'
-  gem 'simplecov', require: false
+  gem 'rubocop', '~> 0.81.0'
   gem 'simple_mock'
+  gem 'simplecov', require: false
 end

data/README.md CHANGED

@@ -1,6 +1,6 @@
 # DocParser
-[![Gem Version](https://badge.fury.io/rb/docparser.png)](http://badge.fury.io/rb/docparser) [![Build Status](https://travis-ci.org/jurriaan/docparser.png?branch=master)](https://travis-ci.org/jurriaan/docparser) [![Dependency Status](https://gemnasium.com/jurriaan/docparser.png)](https://gemnasium.com/jurriaan/docparser) [![Coverage Status](https://coveralls.io/repos/jurriaan/docparser/badge.png?branch=master)](https://coveralls.io/r/jurriaan/docparser)
+[![Gem Version](http://img.shields.io/gem/v/docparser.svg)](http://badge.fury.io/rb/docparser) [![Build Status](http://img.shields.io/travis/jurriaan/docparser.svg)](https://travis-ci.org/jurriaan/docparser) [![Dependency Status](http://img.shields.io/gemnasium/jurriaan/docparser.svg)](https://gemnasium.com/jurriaan/docparser)
 DocParser is a web scraping/screen scraping tool.
@@ -59,4 +59,4 @@ See [example.rb](https://github.com/jurriaan/docparser/blob/master/example.rb)
 ## Thanks
-- [randym](https://github.com/randym) - for providing the [axlsx](https://github.com/randym/axlsx) gem
+- [randym](https://github.com/randym) - for providing the [axlsx](https://github.com/randym/axlsx) gem

data/Rakefile CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require 'bundler/gem_tasks'
 require 'rake/testtask'
 require 'rubocop'
@@ -16,7 +18,7 @@ task :rubocop do
   puts "Running Rubocop #{RuboCop::Version::STRING}"
   args = FileList['**/*.rb', 'Rakefile', 'docparser.gemspec', 'Gemfile']
   cli = RuboCop::CLI.new
-  fail unless cli.run(args) == 0
+  raise unless cli.run(args).zero?
 end
 task default: :test

data/docparser.gemspec CHANGED

@@ -1,4 +1,6 @@
-lib = File.expand_path('../lib', __FILE__)
+# frozen_string_literal: true
+lib = File.expand_path('lib', __dir__)
 $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 require 'docparser/version'
@@ -14,18 +16,16 @@ Gem::Specification.new do |spec|
   spec.platform      = Gem::Platform::RUBY
   spec.files         = `git ls-files`.split($RS)
-  spec.executables   = spec.files.grep(/^bin\//) { |f| File.basename(f) }
-  spec.test_files    = spec.files.grep(/^(test|spec|features)\//)
+  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
   spec.require_paths = ['lib']
   spec.extra_rdoc_files = ['README.md', 'LICENSE']
-  spec.add_runtime_dependency 'nokogiri', '~> 1.6.1'
-  spec.add_runtime_dependency 'parallel', '~> 1.3.2'
   spec.add_runtime_dependency 'axlsx', '~> 2.0.1'
-  spec.add_runtime_dependency 'log4r', '~> 1.1.10'
+  spec.add_runtime_dependency 'nokogiri', '~> 1.10.0'
+  spec.add_runtime_dependency 'parallel', '~> 1.10'
-  spec.add_development_dependency 'yard'
-  spec.add_development_dependency 'kramdown', '~> 1.4.1'
   spec.add_development_dependency 'github-markup'
-  spec.required_ruby_version = '>= 2.0.0'
+  spec.add_development_dependency 'kramdown', '~> 2.1.0'
+  spec.add_development_dependency 'yard'
 end

data/example.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 #
 # An example of parsing hackaday.com
 # (C) 2013 Jurriaan Pruis

data/lib/docparser.rb CHANGED

@@ -1 +1,3 @@
+# frozen_string_literal: true
 require 'docparser/parser'

data/lib/docparser/document.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require 'nokogiri'
 module DocParser
   # The Document class loads and parses the files.
@@ -19,9 +21,10 @@ module DocParser
     # @return [String] the source of the document
     attr_reader :html
-    def initialize(filename: nil, encoding: 'utf-8', parser: nil)
-      @logger = Log4r::Logger.new('docparser::document')
-      @logger.debug { "Parsing #{filename}" }
+    def initialize(filename: nil, encoding: 'utf-8', parser: nil, logger: nil)
+      @logger = logger || Logger.new(STDERR)
+      @logger.level = Logger::INFO
+      @logger.debug("Parsing #{filename}")
       @encoding = encoding
       @parser = parser
       @filename = filename
@@ -32,7 +35,7 @@ module DocParser
     # Adds a row to an output
     def add_row(*row, output: 0)
       output = @parser.outputs.index(output) if output.is_a? Output
-      @logger.debug { "#{filename}: Adding row #{row.flatten}" }
+      @logger.debug("#{filename}: Adding row #{row.flatten}")
       results[output] << row.flatten
     end
@@ -42,9 +45,14 @@ module DocParser
       @title ||= xpath_content('//head/title')
     end
-    # Executes a xpath query
-    def xpath(query)
-      res = @doc.search(query)
+    # Executes a xpath/css query
+    def elements(query)
+      @doc.search(query)
+    end
+    def each_element(query)
+      res = elements(query)
       if block_given?
         res.each { |el| yield el }
       else
@@ -54,7 +62,7 @@ module DocParser
     # Executes a xpath query and returns the content
     # @return [String] the content of the HTML node
-    def xpath_content(query)
+    def element_content(query)
       first = @doc.search(query).first
       if first.nil?
         nil
@@ -91,7 +99,9 @@ module DocParser
       end
     end
-    alias_method :css, :xpath
-    alias_method :css_content, :xpath_content
+    alias css each_element
+    alias xpath each_element
+    alias css_content element_content
+    alias xpath_content element_content
   end
 end

data/lib/docparser/output.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module DocParser
   # The Output base class.
   # All Output classes inherit from this one.
@@ -24,10 +26,11 @@ module DocParser
       @filename = filename
       @uniq = uniq
       @uniqarr = []
-      fail ArgumentError, 'Please specify a filename' if filename.empty?
+      raise ArgumentError, 'Please specify a filename' if filename.empty?
       @file = open filename, 'w'
-      classname = self.class.name.split('::').last
-      @logger = Log4r::Logger.new("docparser::output::#{classname}")
+      @logger = Logger.new(STDERR)
+      @logger.level = Logger::INFO
       open_file
     end
@@ -40,6 +43,7 @@ module DocParser
     # Adds a row
     def add_row(row)
       return if @uniq && @uniqarr.include?(row.hash)
       @rowcount += 1
       write_row row
       @uniqarr << row.hash
@@ -66,12 +70,11 @@ module DocParser
     # Called when a row is added
     def write_row(_row)
-      fail NotImplementedError, 'No row writer defined'
+      raise NotImplementedError, 'No row writer defined'
     end
     # Called before closing the file
-    def footer
-    end
+    def footer; end
   end
   # MissingHeaderException gets thrown if a required header is missing.

data/lib/docparser/output/csv_output.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require 'csv'
 module DocParser
   # The CSVOutput class generates a CSV file containing all rows

data/lib/docparser/output/html_output.rb CHANGED

@@ -1,65 +1,68 @@
+# frozen_string_literal: true
 require 'cgi'
 module DocParser
   # The XLSXOutput class generates an HTML file containing a table
   # @see Output
   class HTMLOutput < Output
     # @!visibility private
-    HTMLHEADER = <<-EOS
-<!DOCTYPE html>
-<html>
-<head>
-<title>HTML output "#FILENAME#"</title>
-<meta charset="utf-8">
-<style type="text/css">
-body {
-  font-family:"Helvetica Neue", Helvetica, Arial, sans-serif;
-  font-size:12px;
-}
-table {
-  border:1px solid #69c;
-  border-collapse:collapse;
-  font-size:12px;
-  text-align:left;
-  width:480px;
-}
-th {
-  border-bottom:1px dashed #69c;
-  color:#039;
-  font-size:14px;
-  font-weight:normal;
-  padding:12px 17px;
-}
-td {
-  color:#669;
-  padding:7px 17px;
-  white-space: pre;
-}
-tbody tr:hover td {
-  background:#d0dafd;
-  color:#339;
-}
-tbody tr:nth-child(even) {
-  background:#e0eaff;
-}
-</style>
-</head>
-<body>
-<table>
-EOS
+    HTMLHEADER = <<~EOS
+      <!DOCTYPE html>
+      <html>
+      <head>
+      <title>HTML output "#FILENAME#"</title>
+      <meta charset="utf-8">
+      <style type="text/css">
+      body {
+        font-family:"Helvetica Neue", Helvetica, Arial, sans-serif;
+        font-size:12px;
+      }
+      table {
+        border:1px solid #69c;
+        border-collapse:collapse;
+        font-size:12px;
+        text-align:left;
+        width:480px;
+      }
+      th {
+        border-bottom:1px dashed #69c;
+        color:#039;
+        font-size:14px;
+        font-weight:normal;
+        padding:12px 17px;
+      }
+      td {
+        color:#669;
+        padding:7px 17px;
+        white-space: pre;
+      }
+      tbody tr:hover td {
+        background:#d0dafd;
+        color:#339;
+      }
+      tbody tr:nth-child(even) {
+        background:#e0eaff;
+      }
+      </style>
+      </head>
+      <body>
+      <table>
+    EOS
     # @!visibility private
-    HTMLFOOTER = <<-EOS
-</tbody>
-</table>
-<p>#COUNT# rows</p>
-</body>
-</html>
-EOS
+    HTMLFOOTER = <<~EOS
+      </tbody>
+      </table>
+      <p>#COUNT# rows</p>
+      </body>
+      </html>
+    EOS
     def open_file
       @file << HTMLHEADER.gsub('#FILENAME#', @filename)
     end
     def header
       return if @header.nil? || @header.empty?
       @file << '<thead><tr>'
       @file << @header.map { |f| '<th>' + f + '</th>' }.join
       @file << "</tr></thead>\n<tbody>\n"

data/lib/docparser/output/json_output.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require 'json'
 module DocParser
   # The JSONOutput class generates a JSON file containing all rows as seperate
@@ -11,7 +13,7 @@ module DocParser
     end
     def write_row(row)
-      fail MissingHeaderException if @header.nil? || @header.length == 0
+      raise MissingHeaderException if @header.nil? || @header.empty?
       @file << ',' unless @file.pos <= 1

data/lib/docparser/output/multi_output.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module DocParser
   # The MultiOutput output combines multiple outputs.
   # It creates a CSV, HTML, YAML and XLSX Output file
@@ -9,7 +11,7 @@ module DocParser
   class MultiOutput < Output
     # All the possible outputs
     OUTPUT_TYPES = { csv: CSVOutput, html: HTMLOutput, yml: YAMLOutput,
-                     xlsx: XLSXOutput, json: JSONOutput }
+                     xlsx: XLSXOutput, json: JSONOutput }.freeze
     # @!visibility private
     def initialize(**options)