docparser 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +5 -5
  2. data/.rubocop.yml +15 -3
  3. data/.rubocop_todo.yml +45 -0
  4. data/.travis.yml +1 -1
  5. data/Gemfile +5 -4
  6. data/README.md +2 -2
  7. data/Rakefile +3 -1
  8. data/docparser.gemspec +9 -9
  9. data/example.rb +2 -0
  10. data/lib/docparser.rb +2 -0
  11. data/lib/docparser/document.rb +20 -10
  12. data/lib/docparser/output.rb +9 -6
  13. data/lib/docparser/output/csv_output.rb +2 -0
  14. data/lib/docparser/output/html_output.rb +52 -49
  15. data/lib/docparser/output/json_output.rb +3 -1
  16. data/lib/docparser/output/multi_output.rb +3 -1
  17. data/lib/docparser/output/nil_output.rb +5 -6
  18. data/lib/docparser/output/xlsx_output.rb +2 -0
  19. data/lib/docparser/output/yaml_output.rb +4 -1
  20. data/lib/docparser/parser.rb +9 -13
  21. data/lib/docparser/version.rb +3 -1
  22. data/test/.rubocop.yml +6 -2
  23. data/test/.rubocop_todo.yml +23 -0
  24. data/test/lib/docparser/blackbox_test.rb +5 -4
  25. data/test/lib/docparser/document_test.rb +19 -14
  26. data/test/lib/docparser/output/csv_output_test.rb +5 -10
  27. data/test/lib/docparser/output/html_output_test.rb +5 -10
  28. data/test/lib/docparser/output/json_output_test.rb +8 -13
  29. data/test/lib/docparser/output/multi_output_test.rb +6 -12
  30. data/test/lib/docparser/output/nil_output_test.rb +4 -9
  31. data/test/lib/docparser/output/xlsx_output_test.rb +5 -10
  32. data/test/lib/docparser/output/yaml_output_test.rb +22 -27
  33. data/test/lib/docparser/output_test.rb +3 -8
  34. data/test/lib/docparser/parser_test.rb +2 -22
  35. data/test/lib/docparser/version_test.rb +2 -0
  36. data/test/support/hackaday/dl.rb +2 -0
  37. data/test/test_helper.rb +2 -3
  38. metadata +20 -35
  39. data/test/lib/docparser/logging_test.rb +0 -19
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module DocParser
2
4
  # This Output is used for testing purposes.
3
5
 
@@ -8,13 +10,10 @@ module DocParser
8
10
  @rowcount = 0
9
11
  end
10
12
 
11
- def close
12
- end
13
+ def close; end
13
14
 
14
- def write_row(*)
15
- end
15
+ def write_row(*); end
16
16
 
17
- def add_row(*)
18
- end
17
+ def add_row(*); end
19
18
  end
20
19
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'axlsx'
2
4
  module DocParser
3
5
  # The XLSXOutput class generates Microsoft Excel compatible .xlsx files
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'yaml'
2
4
  module DocParser
3
5
  # The YAMLOutput class generates a YAML file containing all rows as seperate
@@ -6,7 +8,8 @@ module DocParser
6
8
  class YAMLOutput < Output
7
9
  # @!visibility private
8
10
  def write_row(row)
9
- fail MissingHeaderException if @header.nil? || @header.length == 0
11
+ raise MissingHeaderException if @header.nil? || @header.empty?
12
+
10
13
  @doc ||= {}
11
14
 
12
15
  0.upto(@header.length - 1) do |counter|
@@ -1,9 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rubygems'
2
4
  require 'bundler/setup'
3
5
  require 'parallel'
6
+ require 'logger'
4
7
  require 'set'
5
- require 'log4r'
6
- require 'log4r/formatter/patternformatter'
7
8
  require 'docparser/version'
8
9
  require 'docparser/output'
9
10
  require 'docparser/document'
@@ -15,13 +16,6 @@ require 'docparser/output/json_output.rb'
15
16
  require 'docparser/output/multi_output.rb'
16
17
  require 'docparser/output/nil_output.rb'
17
18
 
18
- Log4r.define_levels(*Log4r::Log4rConfig::LogLevels)
19
- logger = Log4r::Logger.new('docparser')
20
- output = Log4r::StdoutOutputter.new('docparser')
21
- output.formatter = Log4r::PatternFormatter.new(pattern: '[%l %C] %d :: %m')
22
- logger.outputters = output
23
- logger.level = Log4r::INFO
24
-
25
19
  # The DocParser namespace
26
20
  # See README.md for information on using DocParser
27
21
  module DocParser
@@ -48,11 +42,11 @@ module DocParser
48
42
  @files = range ? files[range] : files
49
43
  @encoding = encoding
50
44
 
51
- Log4r::Logger['docparser'].level = quiet ? Log4r::ERROR : Log4r::INFO
45
+ @logger = Logger.new(STDERR)
46
+ @logger.level = quiet ? Logger::ERROR : Logger::INFO
52
47
 
53
48
  initialize_outputs output
54
49
 
55
- @logger = Log4r::Logger.new('docparser::parser')
56
50
  @logger.info "DocParser v#{VERSION} loaded"
57
51
  end
58
52
 
@@ -88,7 +82,7 @@ module DocParser
88
82
  elsif output.is_a?(Array) && output.all? { |o| o.is_a? Output }
89
83
  @outputs = output
90
84
  elsif output
91
- fail ArgumentError, 'Invalid outputs specified'
85
+ raise ArgumentError, 'Invalid outputs specified'
92
86
  end
93
87
 
94
88
  @resultsets = Array.new(@outputs.length) { Set.new }
@@ -102,9 +96,11 @@ module DocParser
102
96
  parse_doc(file, &block)
103
97
  # :nocov: #
104
98
  end.each do |result|
99
+ next unless @outputs
100
+
105
101
  result.each_with_index do |set, index|
106
102
  @resultsets[index].merge(set)
107
- end if @outputs
103
+ end
108
104
  end
109
105
  end
110
106
 
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # DocParser
2
4
  # See README.md for information on using DocParser
3
5
  module DocParser
4
6
  # The current version of DocParser
5
- VERSION = '0.2.3'
7
+ VERSION = '0.3.0'
6
8
  end
@@ -1,3 +1,7 @@
1
+ inherit_from:
2
+ - ../.rubocop.yml
3
+ - .rubocop_todo.yml
4
+
1
5
  # Do not introduce global variables.
2
- GlobalVars:
3
- Enabled: false
6
+ Style/GlobalVars:
7
+ Enabled: false
@@ -0,0 +1,23 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2020-04-13 17:56:31 +0200 using RuboCop version 0.81.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 9
10
+ # Configuration parameters: CountComments, ExcludedMethods.
11
+ # ExcludedMethods: refine
12
+ Metrics/BlockLength:
13
+ Max: 173
14
+
15
+ # Offense count: 14
16
+ Security/Open:
17
+ Exclude:
18
+ - 'lib/docparser/document_test.rb'
19
+ - 'lib/docparser/output/csv_output_test.rb'
20
+ - 'lib/docparser/output/html_output_test.rb'
21
+ - 'lib/docparser/output/json_output_test.rb'
22
+ - 'lib/docparser/output/multi_output_test.rb'
23
+ - 'lib/docparser/output/yaml_output_test.rb'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../../test_helper'
2
4
  require 'open3'
3
5
  require 'shellwords'
@@ -14,15 +16,14 @@ describe DocParser do
14
16
  Dir.mktmpdir do |dir|
15
17
  Dir.chdir(dir)
16
18
  example_file = Shellwords.escape(File.join($ROOT_DIR, 'example.rb'))
17
- out, err = cmd_to_sys '/usr/bin/env ruby ' + example_file
18
- err.must_be_empty
19
- rows = out.scan(/(\d+) rows/).flatten
19
+ _, err = cmd_to_sys '/usr/bin/env ruby ' + example_file
20
+ rows = err.scan(/(\d+) rows/).flatten
20
21
  rows.length.must_equal 5
21
22
  row_lengths = rows.group_by(&:to_i)
22
23
  row_lengths.length.must_equal 1
23
24
  # HaD: 40 pages of 7 articles
24
25
  row_lengths.keys.first.must_equal(7 * 40)
25
- out.must_match(/Done processing/)
26
+ err.must_match(/Done processing/)
26
27
  end
27
28
  Dir.chdir(curwd)
28
29
  end
@@ -1,7 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../../test_helper'
4
+
2
5
  describe DocParser::Document do
3
6
  before do
4
- Log4r::Logger['docparser'].level = Log4r::INFO
5
7
  $output = DocParser::NilOutput.new
6
8
  @parser = Class.new do
7
9
  define_method(:outputs) { [$output] }
@@ -54,15 +56,15 @@ describe DocParser::Document do
54
56
  it 'should be possible to use css queries' do
55
57
  css = 'article > h1 + p'
56
58
  css_content = @test_doc.css_content(css)
57
- css_element = @test_doc.css(css)
59
+ css_element = @test_doc.elements(css)
58
60
  css_content.must_equal('Great article it is')
59
61
  css_content.must_equal(css_element.first.content)
60
62
  end
61
63
 
62
64
  it 'should be possible to use xpath queries' do
63
65
  xpath = '//li/ancestor::article/h1'
64
- xpath_content = @test_doc.xpath_content(xpath)
65
- xpath_element = @test_doc.xpath(xpath)
66
+ xpath_content = @test_doc.element_content(xpath)
67
+ xpath_element = @test_doc.elements(xpath)
66
68
  xpath_content.must_equal('This is an article')
67
69
  xpath_content.must_equal(xpath_element.first.content)
68
70
  end
@@ -83,6 +85,11 @@ describe DocParser::Document do
83
85
  array2 << element.content
84
86
  end
85
87
  array2.must_equal(array)
88
+ array2 = []
89
+ @test_doc.each_element('//p') do |element|
90
+ array2 << element.content
91
+ end
92
+ array2.must_equal(array)
86
93
  end
87
94
 
88
95
  it 'should warn when providing an empty file' do
@@ -91,20 +98,19 @@ describe DocParser::Document do
91
98
  file.close
92
99
 
93
100
  open(file.path).read.empty?.must_equal true
94
- _, err = capture_io do
95
- # Switch to hijacked IO
96
- Log4r::Outputter['docparser'].instance_variable_set(:@out, $stderr)
97
- DocParser::Document.new(filename: file.path, parser: @parser)
98
- end
99
- # Restore IO
100
- Log4r::Outputter['docparser'].instance_variable_set(:@out, $stderr)
101
- err.must_include "#{file.path} is empty"
101
+ err = StringIO.new
102
+
103
+ DocParser::Document.new(filename: file.path,
104
+ parser: @parser,
105
+ logger: Logger.new(err))
106
+
107
+ err.string.must_include "#{file.path} is empty"
102
108
  end
103
109
 
104
110
  it 'should add the row to the results' do
105
111
  @test_doc.add_row ['test']
106
112
  @test_doc.add_row 'test', 'test2'
107
- @test_doc.results.must_equal [[%w(test), %w(test test2)]]
113
+ @test_doc.results.must_equal [[%w[test], %w[test test2]]]
108
114
  end
109
115
 
110
116
  it 'should be possible to not use outputs' do
@@ -133,5 +139,4 @@ describe DocParser::Document do
133
139
  test_doc.add_row ['b'], output: 0
134
140
  test_doc.results.must_equal [[['b']], [['a']]]
135
141
  end
136
-
137
142
  end
@@ -1,13 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../../../test_helper'
2
4
 
3
5
  describe DocParser::CSVOutput do
4
- before do
5
- Log4r::Logger['docparser'].level = Log4r::ERROR
6
- end
7
- after do
8
- Log4r::Logger['docparser'].level = Log4r::INFO
9
- end
10
-
11
6
  it 'must create a file' do
12
7
  Dir.mktmpdir do |dir|
13
8
  filename = File.join(dir, 'test.csv')
@@ -30,7 +25,7 @@ describe DocParser::CSVOutput do
30
25
  Dir.mktmpdir do |dir|
31
26
  filename = File.join(dir, 'test.csv')
32
27
  output = DocParser::CSVOutput.new(filename: filename)
33
- output.add_row %w(aap noot mies)
28
+ output.add_row %w[aap noot mies]
34
29
  output.add_row ['aap', 'noot', 'mies;']
35
30
  output.close
36
31
  open(filename).read.must_equal "aap;noot;mies\naap;noot;\"mies;\"\n"
@@ -43,8 +38,8 @@ describe DocParser::CSVOutput do
43
38
  output = DocParser::CSVOutput.new(filename: filename)
44
39
  output.header = 'test', 'the', 'header'
45
40
  output.rowcount.must_equal 0
46
- output.add_row %w(aap noot mies)
47
- output.add_row %w(aap noot mies)
41
+ output.add_row %w[aap noot mies]
42
+ output.add_row %w[aap noot mies]
48
43
  output.rowcount.must_equal 2
49
44
  end
50
45
  end
@@ -1,13 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../../../test_helper'
2
4
 
3
5
  describe DocParser::HTMLOutput do
4
- before do
5
- Log4r::Logger['docparser'].level = Log4r::ERROR
6
- end
7
- after do
8
- Log4r::Logger['docparser'].level = Log4r::INFO
9
- end
10
-
11
6
  it 'must create a file' do
12
7
  Dir.mktmpdir do |dir|
13
8
  filename = File.join(dir, 'test.html')
@@ -31,7 +26,7 @@ describe DocParser::HTMLOutput do
31
26
  Dir.mktmpdir do |dir|
32
27
  filename = File.join(dir, 'test.html')
33
28
  output = DocParser::HTMLOutput.new(filename: filename)
34
- output.add_row %w(aap noot mies)
29
+ output.add_row %w[aap noot mies]
35
30
  output.add_row ['aap', 'noot', 'mies;']
36
31
  output.close
37
32
  html = open(filename).read
@@ -47,8 +42,8 @@ describe DocParser::HTMLOutput do
47
42
  output = DocParser::HTMLOutput.new(filename: filename)
48
43
  output.header = 'test', 'the', 'header'
49
44
  output.rowcount.must_equal 0
50
- output.add_row %w(aap noot mies)
51
- output.add_row %w(aap noot mies)
45
+ output.add_row %w[aap noot mies]
46
+ output.add_row %w[aap noot mies]
52
47
  output.rowcount.must_equal 2
53
48
  output.close
54
49
  open(filename).read.must_include('<p>2 rows</p>')
@@ -1,13 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../../../test_helper'
2
4
 
3
5
  describe DocParser::JSONOutput do
4
- before do
5
- Log4r::Logger['docparser'].level = Log4r::ERROR
6
- end
7
- after do
8
- Log4r::Logger['docparser'].level = Log4r::INFO
9
- end
10
-
11
6
  it 'must create a file' do
12
7
  Dir.mktmpdir do |dir|
13
8
  filename = File.join(dir, 'test.json')
@@ -31,7 +26,7 @@ describe DocParser::JSONOutput do
31
26
  filename = File.join(dir, 'test.json')
32
27
  output = DocParser::JSONOutput.new(filename: filename)
33
28
  lambda do
34
- output.add_row %w(aap noot mies)
29
+ output.add_row %w[aap noot mies]
35
30
  end.must_raise(DocParser::MissingHeaderException)
36
31
  end
37
32
  end
@@ -41,9 +36,9 @@ describe DocParser::JSONOutput do
41
36
  filename = File.join(dir, 'test.json')
42
37
  output = DocParser::JSONOutput.new(filename: filename)
43
38
  output.header = 'test', 'the', 'header'
44
- output.add_row %w(a b c)
45
- output.add_row %w(aap noot mies")
46
- output.add_row %w(aap noot) # testing empty column
39
+ output.add_row %w[a b c]
40
+ output.add_row %w[aap noot mies"]
41
+ output.add_row %w[aap noot] # testing empty column
47
42
  output.close
48
43
  expected = '[{"test":"a","the":"b","header":"c"},
49
44
  {"test":"aap","the":"noot","header":"mies\""},
@@ -58,8 +53,8 @@ describe DocParser::JSONOutput do
58
53
  output = DocParser::JSONOutput.new(filename: filename)
59
54
  output.header = 'test', 'the', 'header'
60
55
  output.rowcount.must_equal 0
61
- output.add_row %w(aap noot mies)
62
- output.add_row %w(aap noot mies)
56
+ output.add_row %w[aap noot mies]
57
+ output.add_row %w[aap noot mies]
63
58
  output.rowcount.must_equal 2
64
59
  end
65
60
  end
@@ -1,13 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../../../test_helper'
2
4
 
3
5
  describe DocParser::MultiOutput do
4
- before do
5
- Log4r::Logger['docparser'].level = Log4r::ERROR
6
- end
7
- after do
8
- Log4r::Logger['docparser'].level = Log4r::INFO
9
- end
10
-
11
6
  it 'must create files' do
12
7
  Dir.mktmpdir do |dir|
13
8
  filename = File.join(dir, 'test')
@@ -36,7 +31,7 @@ describe DocParser::MultiOutput do
36
31
  filename = File.join(dir, 'test')
37
32
  output = DocParser::MultiOutput.new(filename: filename)
38
33
  lambda do
39
- output.add_row %w(aap noot mies)
34
+ output.add_row %w[aap noot mies]
40
35
  end.must_raise(DocParser::MissingHeaderException)
41
36
  end
42
37
  end
@@ -47,8 +42,8 @@ describe DocParser::MultiOutput do
47
42
  output = DocParser::MultiOutput.new(filename: filename)
48
43
  output.header = 'test', 'the', 'header'
49
44
  output.rowcount.must_equal 0
50
- output.add_row %w(aap noot mies)
51
- output.add_row %w(aap noot mies)
45
+ output.add_row %w[aap noot mies]
46
+ output.add_row %w[aap noot mies]
52
47
  output.rowcount.must_equal 2
53
48
  end
54
49
  end
@@ -57,7 +52,7 @@ describe DocParser::MultiOutput do
57
52
  Dir.mktmpdir do |dir|
58
53
  filename = File.join(dir, 'test')
59
54
  output = DocParser::MultiOutput.new(filename: filename)
60
- methods = %i(add_row header= close)
55
+ methods = %i[add_row header= close]
61
56
  outputs = output.instance_variable_get(:@outputs)
62
57
  outputs.map! do |o|
63
58
  SimpleMock.new o
@@ -76,5 +71,4 @@ describe DocParser::MultiOutput do
76
71
  end
77
72
  end
78
73
  end
79
-
80
74
  end
@@ -1,13 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../../../test_helper'
2
4
 
3
5
  describe DocParser::NilOutput do
4
- before do
5
- Log4r::Logger['docparser'].level = Log4r::ERROR
6
- end
7
- after do
8
- Log4r::Logger['docparser'].level = Log4r::INFO
9
- end
10
-
11
6
  it 'should not create a file' do
12
7
  Dir.mktmpdir do |dir|
13
8
  filename = File.join(dir, '*')
@@ -20,8 +15,8 @@ describe DocParser::NilOutput do
20
15
  output = DocParser::NilOutput.new
21
16
  output.header = 'test', 'the', 'header'
22
17
  output.rowcount.must_equal 0
23
- output.add_row %w(aap noot mies)
24
- output.add_row %w(aap noot mies)
18
+ output.add_row %w[aap noot mies]
19
+ output.add_row %w[aap noot mies]
25
20
  output.rowcount.must_equal 0
26
21
  end
27
22
  end