docparser 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +5 -5
  2. data/.rubocop.yml +15 -3
  3. data/.rubocop_todo.yml +45 -0
  4. data/.travis.yml +1 -1
  5. data/Gemfile +5 -4
  6. data/README.md +2 -2
  7. data/Rakefile +3 -1
  8. data/docparser.gemspec +9 -9
  9. data/example.rb +2 -0
  10. data/lib/docparser.rb +2 -0
  11. data/lib/docparser/document.rb +20 -10
  12. data/lib/docparser/output.rb +9 -6
  13. data/lib/docparser/output/csv_output.rb +2 -0
  14. data/lib/docparser/output/html_output.rb +52 -49
  15. data/lib/docparser/output/json_output.rb +3 -1
  16. data/lib/docparser/output/multi_output.rb +3 -1
  17. data/lib/docparser/output/nil_output.rb +5 -6
  18. data/lib/docparser/output/xlsx_output.rb +2 -0
  19. data/lib/docparser/output/yaml_output.rb +4 -1
  20. data/lib/docparser/parser.rb +9 -13
  21. data/lib/docparser/version.rb +3 -1
  22. data/test/.rubocop.yml +6 -2
  23. data/test/.rubocop_todo.yml +23 -0
  24. data/test/lib/docparser/blackbox_test.rb +5 -4
  25. data/test/lib/docparser/document_test.rb +19 -14
  26. data/test/lib/docparser/output/csv_output_test.rb +5 -10
  27. data/test/lib/docparser/output/html_output_test.rb +5 -10
  28. data/test/lib/docparser/output/json_output_test.rb +8 -13
  29. data/test/lib/docparser/output/multi_output_test.rb +6 -12
  30. data/test/lib/docparser/output/nil_output_test.rb +4 -9
  31. data/test/lib/docparser/output/xlsx_output_test.rb +5 -10
  32. data/test/lib/docparser/output/yaml_output_test.rb +22 -27
  33. data/test/lib/docparser/output_test.rb +3 -8
  34. data/test/lib/docparser/parser_test.rb +2 -22
  35. data/test/lib/docparser/version_test.rb +2 -0
  36. data/test/support/hackaday/dl.rb +2 -0
  37. data/test/test_helper.rb +2 -3
  38. metadata +20 -35
  39. data/test/lib/docparser/logging_test.rb +0 -19
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module DocParser
2
4
  # This Output is used for testing purposes.
3
5
 
@@ -8,13 +10,10 @@ module DocParser
8
10
  @rowcount = 0
9
11
  end
10
12
 
11
- def close
12
- end
13
+ def close; end
13
14
 
14
- def write_row(*)
15
- end
15
+ def write_row(*); end
16
16
 
17
- def add_row(*)
18
- end
17
+ def add_row(*); end
19
18
  end
20
19
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'axlsx'
2
4
  module DocParser
3
5
  # The XLSXOutput class generates Microsoft Excel compatible .xlsx files
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'yaml'
2
4
  module DocParser
3
5
  # The YAMLOutput class generates a YAML file containing all rows as seperate
@@ -6,7 +8,8 @@ module DocParser
6
8
  class YAMLOutput < Output
7
9
  # @!visibility private
8
10
  def write_row(row)
9
- fail MissingHeaderException if @header.nil? || @header.length == 0
11
+ raise MissingHeaderException if @header.nil? || @header.empty?
12
+
10
13
  @doc ||= {}
11
14
 
12
15
  0.upto(@header.length - 1) do |counter|
@@ -1,9 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rubygems'
2
4
  require 'bundler/setup'
3
5
  require 'parallel'
6
+ require 'logger'
4
7
  require 'set'
5
- require 'log4r'
6
- require 'log4r/formatter/patternformatter'
7
8
  require 'docparser/version'
8
9
  require 'docparser/output'
9
10
  require 'docparser/document'
@@ -15,13 +16,6 @@ require 'docparser/output/json_output.rb'
15
16
  require 'docparser/output/multi_output.rb'
16
17
  require 'docparser/output/nil_output.rb'
17
18
 
18
- Log4r.define_levels(*Log4r::Log4rConfig::LogLevels)
19
- logger = Log4r::Logger.new('docparser')
20
- output = Log4r::StdoutOutputter.new('docparser')
21
- output.formatter = Log4r::PatternFormatter.new(pattern: '[%l %C] %d :: %m')
22
- logger.outputters = output
23
- logger.level = Log4r::INFO
24
-
25
19
  # The DocParser namespace
26
20
  # See README.md for information on using DocParser
27
21
  module DocParser
@@ -48,11 +42,11 @@ module DocParser
48
42
  @files = range ? files[range] : files
49
43
  @encoding = encoding
50
44
 
51
- Log4r::Logger['docparser'].level = quiet ? Log4r::ERROR : Log4r::INFO
45
+ @logger = Logger.new(STDERR)
46
+ @logger.level = quiet ? Logger::ERROR : Logger::INFO
52
47
 
53
48
  initialize_outputs output
54
49
 
55
- @logger = Log4r::Logger.new('docparser::parser')
56
50
  @logger.info "DocParser v#{VERSION} loaded"
57
51
  end
58
52
 
@@ -88,7 +82,7 @@ module DocParser
88
82
  elsif output.is_a?(Array) && output.all? { |o| o.is_a? Output }
89
83
  @outputs = output
90
84
  elsif output
91
- fail ArgumentError, 'Invalid outputs specified'
85
+ raise ArgumentError, 'Invalid outputs specified'
92
86
  end
93
87
 
94
88
  @resultsets = Array.new(@outputs.length) { Set.new }
@@ -102,9 +96,11 @@ module DocParser
102
96
  parse_doc(file, &block)
103
97
  # :nocov: #
104
98
  end.each do |result|
99
+ next unless @outputs
100
+
105
101
  result.each_with_index do |set, index|
106
102
  @resultsets[index].merge(set)
107
- end if @outputs
103
+ end
108
104
  end
109
105
  end
110
106
 
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # DocParser
2
4
  # See README.md for information on using DocParser
3
5
  module DocParser
4
6
  # The current version of DocParser
5
- VERSION = '0.2.3'
7
+ VERSION = '0.3.0'
6
8
  end
@@ -1,3 +1,7 @@
1
+ inherit_from:
2
+ - ../.rubocop.yml
3
+ - .rubocop_todo.yml
4
+
1
5
  # Do not introduce global variables.
2
- GlobalVars:
3
- Enabled: false
6
+ Style/GlobalVars:
7
+ Enabled: false
@@ -0,0 +1,23 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2020-04-13 17:56:31 +0200 using RuboCop version 0.81.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 9
10
+ # Configuration parameters: CountComments, ExcludedMethods.
11
+ # ExcludedMethods: refine
12
+ Metrics/BlockLength:
13
+ Max: 173
14
+
15
+ # Offense count: 14
16
+ Security/Open:
17
+ Exclude:
18
+ - 'lib/docparser/document_test.rb'
19
+ - 'lib/docparser/output/csv_output_test.rb'
20
+ - 'lib/docparser/output/html_output_test.rb'
21
+ - 'lib/docparser/output/json_output_test.rb'
22
+ - 'lib/docparser/output/multi_output_test.rb'
23
+ - 'lib/docparser/output/yaml_output_test.rb'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../../test_helper'
2
4
  require 'open3'
3
5
  require 'shellwords'
@@ -14,15 +16,14 @@ describe DocParser do
14
16
  Dir.mktmpdir do |dir|
15
17
  Dir.chdir(dir)
16
18
  example_file = Shellwords.escape(File.join($ROOT_DIR, 'example.rb'))
17
- out, err = cmd_to_sys '/usr/bin/env ruby ' + example_file
18
- err.must_be_empty
19
- rows = out.scan(/(\d+) rows/).flatten
19
+ _, err = cmd_to_sys '/usr/bin/env ruby ' + example_file
20
+ rows = err.scan(/(\d+) rows/).flatten
20
21
  rows.length.must_equal 5
21
22
  row_lengths = rows.group_by(&:to_i)
22
23
  row_lengths.length.must_equal 1
23
24
  # HaD: 40 pages of 7 articles
24
25
  row_lengths.keys.first.must_equal(7 * 40)
25
- out.must_match(/Done processing/)
26
+ err.must_match(/Done processing/)
26
27
  end
27
28
  Dir.chdir(curwd)
28
29
  end
@@ -1,7 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../../test_helper'
4
+
2
5
  describe DocParser::Document do
3
6
  before do
4
- Log4r::Logger['docparser'].level = Log4r::INFO
5
7
  $output = DocParser::NilOutput.new
6
8
  @parser = Class.new do
7
9
  define_method(:outputs) { [$output] }
@@ -54,15 +56,15 @@ describe DocParser::Document do
54
56
  it 'should be possible to use css queries' do
55
57
  css = 'article > h1 + p'
56
58
  css_content = @test_doc.css_content(css)
57
- css_element = @test_doc.css(css)
59
+ css_element = @test_doc.elements(css)
58
60
  css_content.must_equal('Great article it is')
59
61
  css_content.must_equal(css_element.first.content)
60
62
  end
61
63
 
62
64
  it 'should be possible to use xpath queries' do
63
65
  xpath = '//li/ancestor::article/h1'
64
- xpath_content = @test_doc.xpath_content(xpath)
65
- xpath_element = @test_doc.xpath(xpath)
66
+ xpath_content = @test_doc.element_content(xpath)
67
+ xpath_element = @test_doc.elements(xpath)
66
68
  xpath_content.must_equal('This is an article')
67
69
  xpath_content.must_equal(xpath_element.first.content)
68
70
  end
@@ -83,6 +85,11 @@ describe DocParser::Document do
83
85
  array2 << element.content
84
86
  end
85
87
  array2.must_equal(array)
88
+ array2 = []
89
+ @test_doc.each_element('//p') do |element|
90
+ array2 << element.content
91
+ end
92
+ array2.must_equal(array)
86
93
  end
87
94
 
88
95
  it 'should warn when providing an empty file' do
@@ -91,20 +98,19 @@ describe DocParser::Document do
91
98
  file.close
92
99
 
93
100
  open(file.path).read.empty?.must_equal true
94
- _, err = capture_io do
95
- # Switch to hijacked IO
96
- Log4r::Outputter['docparser'].instance_variable_set(:@out, $stderr)
97
- DocParser::Document.new(filename: file.path, parser: @parser)
98
- end
99
- # Restore IO
100
- Log4r::Outputter['docparser'].instance_variable_set(:@out, $stderr)
101
- err.must_include "#{file.path} is empty"
101
+ err = StringIO.new
102
+
103
+ DocParser::Document.new(filename: file.path,
104
+ parser: @parser,
105
+ logger: Logger.new(err))
106
+
107
+ err.string.must_include "#{file.path} is empty"
102
108
  end
103
109
 
104
110
  it 'should add the row to the results' do
105
111
  @test_doc.add_row ['test']
106
112
  @test_doc.add_row 'test', 'test2'
107
- @test_doc.results.must_equal [[%w(test), %w(test test2)]]
113
+ @test_doc.results.must_equal [[%w[test], %w[test test2]]]
108
114
  end
109
115
 
110
116
  it 'should be possible to not use outputs' do
@@ -133,5 +139,4 @@ describe DocParser::Document do
133
139
  test_doc.add_row ['b'], output: 0
134
140
  test_doc.results.must_equal [[['b']], [['a']]]
135
141
  end
136
-
137
142
  end
@@ -1,13 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../../../test_helper'
2
4
 
3
5
  describe DocParser::CSVOutput do
4
- before do
5
- Log4r::Logger['docparser'].level = Log4r::ERROR
6
- end
7
- after do
8
- Log4r::Logger['docparser'].level = Log4r::INFO
9
- end
10
-
11
6
  it 'must create a file' do
12
7
  Dir.mktmpdir do |dir|
13
8
  filename = File.join(dir, 'test.csv')
@@ -30,7 +25,7 @@ describe DocParser::CSVOutput do
30
25
  Dir.mktmpdir do |dir|
31
26
  filename = File.join(dir, 'test.csv')
32
27
  output = DocParser::CSVOutput.new(filename: filename)
33
- output.add_row %w(aap noot mies)
28
+ output.add_row %w[aap noot mies]
34
29
  output.add_row ['aap', 'noot', 'mies;']
35
30
  output.close
36
31
  open(filename).read.must_equal "aap;noot;mies\naap;noot;\"mies;\"\n"
@@ -43,8 +38,8 @@ describe DocParser::CSVOutput do
43
38
  output = DocParser::CSVOutput.new(filename: filename)
44
39
  output.header = 'test', 'the', 'header'
45
40
  output.rowcount.must_equal 0
46
- output.add_row %w(aap noot mies)
47
- output.add_row %w(aap noot mies)
41
+ output.add_row %w[aap noot mies]
42
+ output.add_row %w[aap noot mies]
48
43
  output.rowcount.must_equal 2
49
44
  end
50
45
  end
@@ -1,13 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../../../test_helper'
2
4
 
3
5
  describe DocParser::HTMLOutput do
4
- before do
5
- Log4r::Logger['docparser'].level = Log4r::ERROR
6
- end
7
- after do
8
- Log4r::Logger['docparser'].level = Log4r::INFO
9
- end
10
-
11
6
  it 'must create a file' do
12
7
  Dir.mktmpdir do |dir|
13
8
  filename = File.join(dir, 'test.html')
@@ -31,7 +26,7 @@ describe DocParser::HTMLOutput do
31
26
  Dir.mktmpdir do |dir|
32
27
  filename = File.join(dir, 'test.html')
33
28
  output = DocParser::HTMLOutput.new(filename: filename)
34
- output.add_row %w(aap noot mies)
29
+ output.add_row %w[aap noot mies]
35
30
  output.add_row ['aap', 'noot', 'mies;']
36
31
  output.close
37
32
  html = open(filename).read
@@ -47,8 +42,8 @@ describe DocParser::HTMLOutput do
47
42
  output = DocParser::HTMLOutput.new(filename: filename)
48
43
  output.header = 'test', 'the', 'header'
49
44
  output.rowcount.must_equal 0
50
- output.add_row %w(aap noot mies)
51
- output.add_row %w(aap noot mies)
45
+ output.add_row %w[aap noot mies]
46
+ output.add_row %w[aap noot mies]
52
47
  output.rowcount.must_equal 2
53
48
  output.close
54
49
  open(filename).read.must_include('<p>2 rows</p>')
@@ -1,13 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../../../test_helper'
2
4
 
3
5
  describe DocParser::JSONOutput do
4
- before do
5
- Log4r::Logger['docparser'].level = Log4r::ERROR
6
- end
7
- after do
8
- Log4r::Logger['docparser'].level = Log4r::INFO
9
- end
10
-
11
6
  it 'must create a file' do
12
7
  Dir.mktmpdir do |dir|
13
8
  filename = File.join(dir, 'test.json')
@@ -31,7 +26,7 @@ describe DocParser::JSONOutput do
31
26
  filename = File.join(dir, 'test.json')
32
27
  output = DocParser::JSONOutput.new(filename: filename)
33
28
  lambda do
34
- output.add_row %w(aap noot mies)
29
+ output.add_row %w[aap noot mies]
35
30
  end.must_raise(DocParser::MissingHeaderException)
36
31
  end
37
32
  end
@@ -41,9 +36,9 @@ describe DocParser::JSONOutput do
41
36
  filename = File.join(dir, 'test.json')
42
37
  output = DocParser::JSONOutput.new(filename: filename)
43
38
  output.header = 'test', 'the', 'header'
44
- output.add_row %w(a b c)
45
- output.add_row %w(aap noot mies")
46
- output.add_row %w(aap noot) # testing empty column
39
+ output.add_row %w[a b c]
40
+ output.add_row %w[aap noot mies"]
41
+ output.add_row %w[aap noot] # testing empty column
47
42
  output.close
48
43
  expected = '[{"test":"a","the":"b","header":"c"},
49
44
  {"test":"aap","the":"noot","header":"mies\""},
@@ -58,8 +53,8 @@ describe DocParser::JSONOutput do
58
53
  output = DocParser::JSONOutput.new(filename: filename)
59
54
  output.header = 'test', 'the', 'header'
60
55
  output.rowcount.must_equal 0
61
- output.add_row %w(aap noot mies)
62
- output.add_row %w(aap noot mies)
56
+ output.add_row %w[aap noot mies]
57
+ output.add_row %w[aap noot mies]
63
58
  output.rowcount.must_equal 2
64
59
  end
65
60
  end
@@ -1,13 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../../../test_helper'
2
4
 
3
5
  describe DocParser::MultiOutput do
4
- before do
5
- Log4r::Logger['docparser'].level = Log4r::ERROR
6
- end
7
- after do
8
- Log4r::Logger['docparser'].level = Log4r::INFO
9
- end
10
-
11
6
  it 'must create files' do
12
7
  Dir.mktmpdir do |dir|
13
8
  filename = File.join(dir, 'test')
@@ -36,7 +31,7 @@ describe DocParser::MultiOutput do
36
31
  filename = File.join(dir, 'test')
37
32
  output = DocParser::MultiOutput.new(filename: filename)
38
33
  lambda do
39
- output.add_row %w(aap noot mies)
34
+ output.add_row %w[aap noot mies]
40
35
  end.must_raise(DocParser::MissingHeaderException)
41
36
  end
42
37
  end
@@ -47,8 +42,8 @@ describe DocParser::MultiOutput do
47
42
  output = DocParser::MultiOutput.new(filename: filename)
48
43
  output.header = 'test', 'the', 'header'
49
44
  output.rowcount.must_equal 0
50
- output.add_row %w(aap noot mies)
51
- output.add_row %w(aap noot mies)
45
+ output.add_row %w[aap noot mies]
46
+ output.add_row %w[aap noot mies]
52
47
  output.rowcount.must_equal 2
53
48
  end
54
49
  end
@@ -57,7 +52,7 @@ describe DocParser::MultiOutput do
57
52
  Dir.mktmpdir do |dir|
58
53
  filename = File.join(dir, 'test')
59
54
  output = DocParser::MultiOutput.new(filename: filename)
60
- methods = %i(add_row header= close)
55
+ methods = %i[add_row header= close]
61
56
  outputs = output.instance_variable_get(:@outputs)
62
57
  outputs.map! do |o|
63
58
  SimpleMock.new o
@@ -76,5 +71,4 @@ describe DocParser::MultiOutput do
76
71
  end
77
72
  end
78
73
  end
79
-
80
74
  end
@@ -1,13 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../../../test_helper'
2
4
 
3
5
  describe DocParser::NilOutput do
4
- before do
5
- Log4r::Logger['docparser'].level = Log4r::ERROR
6
- end
7
- after do
8
- Log4r::Logger['docparser'].level = Log4r::INFO
9
- end
10
-
11
6
  it 'should not create a file' do
12
7
  Dir.mktmpdir do |dir|
13
8
  filename = File.join(dir, '*')
@@ -20,8 +15,8 @@ describe DocParser::NilOutput do
20
15
  output = DocParser::NilOutput.new
21
16
  output.header = 'test', 'the', 'header'
22
17
  output.rowcount.must_equal 0
23
- output.add_row %w(aap noot mies)
24
- output.add_row %w(aap noot mies)
18
+ output.add_row %w[aap noot mies]
19
+ output.add_row %w[aap noot mies]
25
20
  output.rowcount.must_equal 0
26
21
  end
27
22
  end