docparser 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rubocop.yml +15 -3
- data/.rubocop_todo.yml +45 -0
- data/.travis.yml +1 -1
- data/Gemfile +5 -4
- data/README.md +2 -2
- data/Rakefile +3 -1
- data/docparser.gemspec +9 -9
- data/example.rb +2 -0
- data/lib/docparser.rb +2 -0
- data/lib/docparser/document.rb +20 -10
- data/lib/docparser/output.rb +9 -6
- data/lib/docparser/output/csv_output.rb +2 -0
- data/lib/docparser/output/html_output.rb +52 -49
- data/lib/docparser/output/json_output.rb +3 -1
- data/lib/docparser/output/multi_output.rb +3 -1
- data/lib/docparser/output/nil_output.rb +5 -6
- data/lib/docparser/output/xlsx_output.rb +2 -0
- data/lib/docparser/output/yaml_output.rb +4 -1
- data/lib/docparser/parser.rb +9 -13
- data/lib/docparser/version.rb +3 -1
- data/test/.rubocop.yml +6 -2
- data/test/.rubocop_todo.yml +23 -0
- data/test/lib/docparser/blackbox_test.rb +5 -4
- data/test/lib/docparser/document_test.rb +19 -14
- data/test/lib/docparser/output/csv_output_test.rb +5 -10
- data/test/lib/docparser/output/html_output_test.rb +5 -10
- data/test/lib/docparser/output/json_output_test.rb +8 -13
- data/test/lib/docparser/output/multi_output_test.rb +6 -12
- data/test/lib/docparser/output/nil_output_test.rb +4 -9
- data/test/lib/docparser/output/xlsx_output_test.rb +5 -10
- data/test/lib/docparser/output/yaml_output_test.rb +22 -27
- data/test/lib/docparser/output_test.rb +3 -8
- data/test/lib/docparser/parser_test.rb +2 -22
- data/test/lib/docparser/version_test.rb +2 -0
- data/test/support/hackaday/dl.rb +2 -0
- data/test/test_helper.rb +2 -3
- metadata +20 -35
- data/test/lib/docparser/logging_test.rb +0 -19
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module DocParser
|
2
4
|
# This Output is used for testing purposes.
|
3
5
|
|
@@ -8,13 +10,10 @@ module DocParser
|
|
8
10
|
@rowcount = 0
|
9
11
|
end
|
10
12
|
|
11
|
-
def close
|
12
|
-
end
|
13
|
+
def close; end
|
13
14
|
|
14
|
-
def write_row(*)
|
15
|
-
end
|
15
|
+
def write_row(*); end
|
16
16
|
|
17
|
-
def add_row(*)
|
18
|
-
end
|
17
|
+
def add_row(*); end
|
19
18
|
end
|
20
19
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'yaml'
|
2
4
|
module DocParser
|
3
5
|
# The YAMLOutput class generates a YAML file containing all rows as seperate
|
@@ -6,7 +8,8 @@ module DocParser
|
|
6
8
|
class YAMLOutput < Output
|
7
9
|
# @!visibility private
|
8
10
|
def write_row(row)
|
9
|
-
|
11
|
+
raise MissingHeaderException if @header.nil? || @header.empty?
|
12
|
+
|
10
13
|
@doc ||= {}
|
11
14
|
|
12
15
|
0.upto(@header.length - 1) do |counter|
|
data/lib/docparser/parser.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rubygems'
|
2
4
|
require 'bundler/setup'
|
3
5
|
require 'parallel'
|
6
|
+
require 'logger'
|
4
7
|
require 'set'
|
5
|
-
require 'log4r'
|
6
|
-
require 'log4r/formatter/patternformatter'
|
7
8
|
require 'docparser/version'
|
8
9
|
require 'docparser/output'
|
9
10
|
require 'docparser/document'
|
@@ -15,13 +16,6 @@ require 'docparser/output/json_output.rb'
|
|
15
16
|
require 'docparser/output/multi_output.rb'
|
16
17
|
require 'docparser/output/nil_output.rb'
|
17
18
|
|
18
|
-
Log4r.define_levels(*Log4r::Log4rConfig::LogLevels)
|
19
|
-
logger = Log4r::Logger.new('docparser')
|
20
|
-
output = Log4r::StdoutOutputter.new('docparser')
|
21
|
-
output.formatter = Log4r::PatternFormatter.new(pattern: '[%l %C] %d :: %m')
|
22
|
-
logger.outputters = output
|
23
|
-
logger.level = Log4r::INFO
|
24
|
-
|
25
19
|
# The DocParser namespace
|
26
20
|
# See README.md for information on using DocParser
|
27
21
|
module DocParser
|
@@ -48,11 +42,11 @@ module DocParser
|
|
48
42
|
@files = range ? files[range] : files
|
49
43
|
@encoding = encoding
|
50
44
|
|
51
|
-
|
45
|
+
@logger = Logger.new(STDERR)
|
46
|
+
@logger.level = quiet ? Logger::ERROR : Logger::INFO
|
52
47
|
|
53
48
|
initialize_outputs output
|
54
49
|
|
55
|
-
@logger = Log4r::Logger.new('docparser::parser')
|
56
50
|
@logger.info "DocParser v#{VERSION} loaded"
|
57
51
|
end
|
58
52
|
|
@@ -88,7 +82,7 @@ module DocParser
|
|
88
82
|
elsif output.is_a?(Array) && output.all? { |o| o.is_a? Output }
|
89
83
|
@outputs = output
|
90
84
|
elsif output
|
91
|
-
|
85
|
+
raise ArgumentError, 'Invalid outputs specified'
|
92
86
|
end
|
93
87
|
|
94
88
|
@resultsets = Array.new(@outputs.length) { Set.new }
|
@@ -102,9 +96,11 @@ module DocParser
|
|
102
96
|
parse_doc(file, &block)
|
103
97
|
# :nocov: #
|
104
98
|
end.each do |result|
|
99
|
+
next unless @outputs
|
100
|
+
|
105
101
|
result.each_with_index do |set, index|
|
106
102
|
@resultsets[index].merge(set)
|
107
|
-
end
|
103
|
+
end
|
108
104
|
end
|
109
105
|
end
|
110
106
|
|
data/lib/docparser/version.rb
CHANGED
data/test/.rubocop.yml
CHANGED
@@ -0,0 +1,23 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2020-04-13 17:56:31 +0200 using RuboCop version 0.81.0.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 9
|
10
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
11
|
+
# ExcludedMethods: refine
|
12
|
+
Metrics/BlockLength:
|
13
|
+
Max: 173
|
14
|
+
|
15
|
+
# Offense count: 14
|
16
|
+
Security/Open:
|
17
|
+
Exclude:
|
18
|
+
- 'lib/docparser/document_test.rb'
|
19
|
+
- 'lib/docparser/output/csv_output_test.rb'
|
20
|
+
- 'lib/docparser/output/html_output_test.rb'
|
21
|
+
- 'lib/docparser/output/json_output_test.rb'
|
22
|
+
- 'lib/docparser/output/multi_output_test.rb'
|
23
|
+
- 'lib/docparser/output/yaml_output_test.rb'
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../test_helper'
|
2
4
|
require 'open3'
|
3
5
|
require 'shellwords'
|
@@ -14,15 +16,14 @@ describe DocParser do
|
|
14
16
|
Dir.mktmpdir do |dir|
|
15
17
|
Dir.chdir(dir)
|
16
18
|
example_file = Shellwords.escape(File.join($ROOT_DIR, 'example.rb'))
|
17
|
-
|
18
|
-
err.
|
19
|
-
rows = out.scan(/(\d+) rows/).flatten
|
19
|
+
_, err = cmd_to_sys '/usr/bin/env ruby ' + example_file
|
20
|
+
rows = err.scan(/(\d+) rows/).flatten
|
20
21
|
rows.length.must_equal 5
|
21
22
|
row_lengths = rows.group_by(&:to_i)
|
22
23
|
row_lengths.length.must_equal 1
|
23
24
|
# HaD: 40 pages of 7 articles
|
24
25
|
row_lengths.keys.first.must_equal(7 * 40)
|
25
|
-
|
26
|
+
err.must_match(/Done processing/)
|
26
27
|
end
|
27
28
|
Dir.chdir(curwd)
|
28
29
|
end
|
@@ -1,7 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../test_helper'
|
4
|
+
|
2
5
|
describe DocParser::Document do
|
3
6
|
before do
|
4
|
-
Log4r::Logger['docparser'].level = Log4r::INFO
|
5
7
|
$output = DocParser::NilOutput.new
|
6
8
|
@parser = Class.new do
|
7
9
|
define_method(:outputs) { [$output] }
|
@@ -54,15 +56,15 @@ describe DocParser::Document do
|
|
54
56
|
it 'should be possible to use css queries' do
|
55
57
|
css = 'article > h1 + p'
|
56
58
|
css_content = @test_doc.css_content(css)
|
57
|
-
css_element = @test_doc.
|
59
|
+
css_element = @test_doc.elements(css)
|
58
60
|
css_content.must_equal('Great article it is')
|
59
61
|
css_content.must_equal(css_element.first.content)
|
60
62
|
end
|
61
63
|
|
62
64
|
it 'should be possible to use xpath queries' do
|
63
65
|
xpath = '//li/ancestor::article/h1'
|
64
|
-
xpath_content = @test_doc.
|
65
|
-
xpath_element = @test_doc.
|
66
|
+
xpath_content = @test_doc.element_content(xpath)
|
67
|
+
xpath_element = @test_doc.elements(xpath)
|
66
68
|
xpath_content.must_equal('This is an article')
|
67
69
|
xpath_content.must_equal(xpath_element.first.content)
|
68
70
|
end
|
@@ -83,6 +85,11 @@ describe DocParser::Document do
|
|
83
85
|
array2 << element.content
|
84
86
|
end
|
85
87
|
array2.must_equal(array)
|
88
|
+
array2 = []
|
89
|
+
@test_doc.each_element('//p') do |element|
|
90
|
+
array2 << element.content
|
91
|
+
end
|
92
|
+
array2.must_equal(array)
|
86
93
|
end
|
87
94
|
|
88
95
|
it 'should warn when providing an empty file' do
|
@@ -91,20 +98,19 @@ describe DocParser::Document do
|
|
91
98
|
file.close
|
92
99
|
|
93
100
|
open(file.path).read.empty?.must_equal true
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
err.must_include "#{file.path} is empty"
|
101
|
+
err = StringIO.new
|
102
|
+
|
103
|
+
DocParser::Document.new(filename: file.path,
|
104
|
+
parser: @parser,
|
105
|
+
logger: Logger.new(err))
|
106
|
+
|
107
|
+
err.string.must_include "#{file.path} is empty"
|
102
108
|
end
|
103
109
|
|
104
110
|
it 'should add the row to the results' do
|
105
111
|
@test_doc.add_row ['test']
|
106
112
|
@test_doc.add_row 'test', 'test2'
|
107
|
-
@test_doc.results.must_equal [[%w
|
113
|
+
@test_doc.results.must_equal [[%w[test], %w[test test2]]]
|
108
114
|
end
|
109
115
|
|
110
116
|
it 'should be possible to not use outputs' do
|
@@ -133,5 +139,4 @@ describe DocParser::Document do
|
|
133
139
|
test_doc.add_row ['b'], output: 0
|
134
140
|
test_doc.results.must_equal [[['b']], [['a']]]
|
135
141
|
end
|
136
|
-
|
137
142
|
end
|
@@ -1,13 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../../test_helper'
|
2
4
|
|
3
5
|
describe DocParser::CSVOutput do
|
4
|
-
before do
|
5
|
-
Log4r::Logger['docparser'].level = Log4r::ERROR
|
6
|
-
end
|
7
|
-
after do
|
8
|
-
Log4r::Logger['docparser'].level = Log4r::INFO
|
9
|
-
end
|
10
|
-
|
11
6
|
it 'must create a file' do
|
12
7
|
Dir.mktmpdir do |dir|
|
13
8
|
filename = File.join(dir, 'test.csv')
|
@@ -30,7 +25,7 @@ describe DocParser::CSVOutput do
|
|
30
25
|
Dir.mktmpdir do |dir|
|
31
26
|
filename = File.join(dir, 'test.csv')
|
32
27
|
output = DocParser::CSVOutput.new(filename: filename)
|
33
|
-
output.add_row %w
|
28
|
+
output.add_row %w[aap noot mies]
|
34
29
|
output.add_row ['aap', 'noot', 'mies;']
|
35
30
|
output.close
|
36
31
|
open(filename).read.must_equal "aap;noot;mies\naap;noot;\"mies;\"\n"
|
@@ -43,8 +38,8 @@ describe DocParser::CSVOutput do
|
|
43
38
|
output = DocParser::CSVOutput.new(filename: filename)
|
44
39
|
output.header = 'test', 'the', 'header'
|
45
40
|
output.rowcount.must_equal 0
|
46
|
-
output.add_row %w
|
47
|
-
output.add_row %w
|
41
|
+
output.add_row %w[aap noot mies]
|
42
|
+
output.add_row %w[aap noot mies]
|
48
43
|
output.rowcount.must_equal 2
|
49
44
|
end
|
50
45
|
end
|
@@ -1,13 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../../test_helper'
|
2
4
|
|
3
5
|
describe DocParser::HTMLOutput do
|
4
|
-
before do
|
5
|
-
Log4r::Logger['docparser'].level = Log4r::ERROR
|
6
|
-
end
|
7
|
-
after do
|
8
|
-
Log4r::Logger['docparser'].level = Log4r::INFO
|
9
|
-
end
|
10
|
-
|
11
6
|
it 'must create a file' do
|
12
7
|
Dir.mktmpdir do |dir|
|
13
8
|
filename = File.join(dir, 'test.html')
|
@@ -31,7 +26,7 @@ describe DocParser::HTMLOutput do
|
|
31
26
|
Dir.mktmpdir do |dir|
|
32
27
|
filename = File.join(dir, 'test.html')
|
33
28
|
output = DocParser::HTMLOutput.new(filename: filename)
|
34
|
-
output.add_row %w
|
29
|
+
output.add_row %w[aap noot mies]
|
35
30
|
output.add_row ['aap', 'noot', 'mies;']
|
36
31
|
output.close
|
37
32
|
html = open(filename).read
|
@@ -47,8 +42,8 @@ describe DocParser::HTMLOutput do
|
|
47
42
|
output = DocParser::HTMLOutput.new(filename: filename)
|
48
43
|
output.header = 'test', 'the', 'header'
|
49
44
|
output.rowcount.must_equal 0
|
50
|
-
output.add_row %w
|
51
|
-
output.add_row %w
|
45
|
+
output.add_row %w[aap noot mies]
|
46
|
+
output.add_row %w[aap noot mies]
|
52
47
|
output.rowcount.must_equal 2
|
53
48
|
output.close
|
54
49
|
open(filename).read.must_include('<p>2 rows</p>')
|
@@ -1,13 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../../test_helper'
|
2
4
|
|
3
5
|
describe DocParser::JSONOutput do
|
4
|
-
before do
|
5
|
-
Log4r::Logger['docparser'].level = Log4r::ERROR
|
6
|
-
end
|
7
|
-
after do
|
8
|
-
Log4r::Logger['docparser'].level = Log4r::INFO
|
9
|
-
end
|
10
|
-
|
11
6
|
it 'must create a file' do
|
12
7
|
Dir.mktmpdir do |dir|
|
13
8
|
filename = File.join(dir, 'test.json')
|
@@ -31,7 +26,7 @@ describe DocParser::JSONOutput do
|
|
31
26
|
filename = File.join(dir, 'test.json')
|
32
27
|
output = DocParser::JSONOutput.new(filename: filename)
|
33
28
|
lambda do
|
34
|
-
output.add_row %w
|
29
|
+
output.add_row %w[aap noot mies]
|
35
30
|
end.must_raise(DocParser::MissingHeaderException)
|
36
31
|
end
|
37
32
|
end
|
@@ -41,9 +36,9 @@ describe DocParser::JSONOutput do
|
|
41
36
|
filename = File.join(dir, 'test.json')
|
42
37
|
output = DocParser::JSONOutput.new(filename: filename)
|
43
38
|
output.header = 'test', 'the', 'header'
|
44
|
-
output.add_row %w
|
45
|
-
output.add_row %w
|
46
|
-
output.add_row %w
|
39
|
+
output.add_row %w[a b c]
|
40
|
+
output.add_row %w[aap noot mies"]
|
41
|
+
output.add_row %w[aap noot] # testing empty column
|
47
42
|
output.close
|
48
43
|
expected = '[{"test":"a","the":"b","header":"c"},
|
49
44
|
{"test":"aap","the":"noot","header":"mies\""},
|
@@ -58,8 +53,8 @@ describe DocParser::JSONOutput do
|
|
58
53
|
output = DocParser::JSONOutput.new(filename: filename)
|
59
54
|
output.header = 'test', 'the', 'header'
|
60
55
|
output.rowcount.must_equal 0
|
61
|
-
output.add_row %w
|
62
|
-
output.add_row %w
|
56
|
+
output.add_row %w[aap noot mies]
|
57
|
+
output.add_row %w[aap noot mies]
|
63
58
|
output.rowcount.must_equal 2
|
64
59
|
end
|
65
60
|
end
|
@@ -1,13 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../../test_helper'
|
2
4
|
|
3
5
|
describe DocParser::MultiOutput do
|
4
|
-
before do
|
5
|
-
Log4r::Logger['docparser'].level = Log4r::ERROR
|
6
|
-
end
|
7
|
-
after do
|
8
|
-
Log4r::Logger['docparser'].level = Log4r::INFO
|
9
|
-
end
|
10
|
-
|
11
6
|
it 'must create files' do
|
12
7
|
Dir.mktmpdir do |dir|
|
13
8
|
filename = File.join(dir, 'test')
|
@@ -36,7 +31,7 @@ describe DocParser::MultiOutput do
|
|
36
31
|
filename = File.join(dir, 'test')
|
37
32
|
output = DocParser::MultiOutput.new(filename: filename)
|
38
33
|
lambda do
|
39
|
-
output.add_row %w
|
34
|
+
output.add_row %w[aap noot mies]
|
40
35
|
end.must_raise(DocParser::MissingHeaderException)
|
41
36
|
end
|
42
37
|
end
|
@@ -47,8 +42,8 @@ describe DocParser::MultiOutput do
|
|
47
42
|
output = DocParser::MultiOutput.new(filename: filename)
|
48
43
|
output.header = 'test', 'the', 'header'
|
49
44
|
output.rowcount.must_equal 0
|
50
|
-
output.add_row %w
|
51
|
-
output.add_row %w
|
45
|
+
output.add_row %w[aap noot mies]
|
46
|
+
output.add_row %w[aap noot mies]
|
52
47
|
output.rowcount.must_equal 2
|
53
48
|
end
|
54
49
|
end
|
@@ -57,7 +52,7 @@ describe DocParser::MultiOutput do
|
|
57
52
|
Dir.mktmpdir do |dir|
|
58
53
|
filename = File.join(dir, 'test')
|
59
54
|
output = DocParser::MultiOutput.new(filename: filename)
|
60
|
-
methods = %i
|
55
|
+
methods = %i[add_row header= close]
|
61
56
|
outputs = output.instance_variable_get(:@outputs)
|
62
57
|
outputs.map! do |o|
|
63
58
|
SimpleMock.new o
|
@@ -76,5 +71,4 @@ describe DocParser::MultiOutput do
|
|
76
71
|
end
|
77
72
|
end
|
78
73
|
end
|
79
|
-
|
80
74
|
end
|
@@ -1,13 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../../test_helper'
|
2
4
|
|
3
5
|
describe DocParser::NilOutput do
|
4
|
-
before do
|
5
|
-
Log4r::Logger['docparser'].level = Log4r::ERROR
|
6
|
-
end
|
7
|
-
after do
|
8
|
-
Log4r::Logger['docparser'].level = Log4r::INFO
|
9
|
-
end
|
10
|
-
|
11
6
|
it 'should not create a file' do
|
12
7
|
Dir.mktmpdir do |dir|
|
13
8
|
filename = File.join(dir, '*')
|
@@ -20,8 +15,8 @@ describe DocParser::NilOutput do
|
|
20
15
|
output = DocParser::NilOutput.new
|
21
16
|
output.header = 'test', 'the', 'header'
|
22
17
|
output.rowcount.must_equal 0
|
23
|
-
output.add_row %w
|
24
|
-
output.add_row %w
|
18
|
+
output.add_row %w[aap noot mies]
|
19
|
+
output.add_row %w[aap noot mies]
|
25
20
|
output.rowcount.must_equal 0
|
26
21
|
end
|
27
22
|
end
|