docparser 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rubocop.yml +15 -3
- data/.rubocop_todo.yml +45 -0
- data/.travis.yml +1 -1
- data/Gemfile +5 -4
- data/README.md +2 -2
- data/Rakefile +3 -1
- data/docparser.gemspec +9 -9
- data/example.rb +2 -0
- data/lib/docparser.rb +2 -0
- data/lib/docparser/document.rb +20 -10
- data/lib/docparser/output.rb +9 -6
- data/lib/docparser/output/csv_output.rb +2 -0
- data/lib/docparser/output/html_output.rb +52 -49
- data/lib/docparser/output/json_output.rb +3 -1
- data/lib/docparser/output/multi_output.rb +3 -1
- data/lib/docparser/output/nil_output.rb +5 -6
- data/lib/docparser/output/xlsx_output.rb +2 -0
- data/lib/docparser/output/yaml_output.rb +4 -1
- data/lib/docparser/parser.rb +9 -13
- data/lib/docparser/version.rb +3 -1
- data/test/.rubocop.yml +6 -2
- data/test/.rubocop_todo.yml +23 -0
- data/test/lib/docparser/blackbox_test.rb +5 -4
- data/test/lib/docparser/document_test.rb +19 -14
- data/test/lib/docparser/output/csv_output_test.rb +5 -10
- data/test/lib/docparser/output/html_output_test.rb +5 -10
- data/test/lib/docparser/output/json_output_test.rb +8 -13
- data/test/lib/docparser/output/multi_output_test.rb +6 -12
- data/test/lib/docparser/output/nil_output_test.rb +4 -9
- data/test/lib/docparser/output/xlsx_output_test.rb +5 -10
- data/test/lib/docparser/output/yaml_output_test.rb +22 -27
- data/test/lib/docparser/output_test.rb +3 -8
- data/test/lib/docparser/parser_test.rb +2 -22
- data/test/lib/docparser/version_test.rb +2 -0
- data/test/support/hackaday/dl.rb +2 -0
- data/test/test_helper.rb +2 -3
- metadata +20 -35
- data/test/lib/docparser/logging_test.rb +0 -19
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module DocParser
|
2
4
|
# This Output is used for testing purposes.
|
3
5
|
|
@@ -8,13 +10,10 @@ module DocParser
|
|
8
10
|
@rowcount = 0
|
9
11
|
end
|
10
12
|
|
11
|
-
def close
|
12
|
-
end
|
13
|
+
def close; end
|
13
14
|
|
14
|
-
def write_row(*)
|
15
|
-
end
|
15
|
+
def write_row(*); end
|
16
16
|
|
17
|
-
def add_row(*)
|
18
|
-
end
|
17
|
+
def add_row(*); end
|
19
18
|
end
|
20
19
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'yaml'
|
2
4
|
module DocParser
|
3
5
|
# The YAMLOutput class generates a YAML file containing all rows as seperate
|
@@ -6,7 +8,8 @@ module DocParser
|
|
6
8
|
class YAMLOutput < Output
|
7
9
|
# @!visibility private
|
8
10
|
def write_row(row)
|
9
|
-
|
11
|
+
raise MissingHeaderException if @header.nil? || @header.empty?
|
12
|
+
|
10
13
|
@doc ||= {}
|
11
14
|
|
12
15
|
0.upto(@header.length - 1) do |counter|
|
data/lib/docparser/parser.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rubygems'
|
2
4
|
require 'bundler/setup'
|
3
5
|
require 'parallel'
|
6
|
+
require 'logger'
|
4
7
|
require 'set'
|
5
|
-
require 'log4r'
|
6
|
-
require 'log4r/formatter/patternformatter'
|
7
8
|
require 'docparser/version'
|
8
9
|
require 'docparser/output'
|
9
10
|
require 'docparser/document'
|
@@ -15,13 +16,6 @@ require 'docparser/output/json_output.rb'
|
|
15
16
|
require 'docparser/output/multi_output.rb'
|
16
17
|
require 'docparser/output/nil_output.rb'
|
17
18
|
|
18
|
-
Log4r.define_levels(*Log4r::Log4rConfig::LogLevels)
|
19
|
-
logger = Log4r::Logger.new('docparser')
|
20
|
-
output = Log4r::StdoutOutputter.new('docparser')
|
21
|
-
output.formatter = Log4r::PatternFormatter.new(pattern: '[%l %C] %d :: %m')
|
22
|
-
logger.outputters = output
|
23
|
-
logger.level = Log4r::INFO
|
24
|
-
|
25
19
|
# The DocParser namespace
|
26
20
|
# See README.md for information on using DocParser
|
27
21
|
module DocParser
|
@@ -48,11 +42,11 @@ module DocParser
|
|
48
42
|
@files = range ? files[range] : files
|
49
43
|
@encoding = encoding
|
50
44
|
|
51
|
-
|
45
|
+
@logger = Logger.new(STDERR)
|
46
|
+
@logger.level = quiet ? Logger::ERROR : Logger::INFO
|
52
47
|
|
53
48
|
initialize_outputs output
|
54
49
|
|
55
|
-
@logger = Log4r::Logger.new('docparser::parser')
|
56
50
|
@logger.info "DocParser v#{VERSION} loaded"
|
57
51
|
end
|
58
52
|
|
@@ -88,7 +82,7 @@ module DocParser
|
|
88
82
|
elsif output.is_a?(Array) && output.all? { |o| o.is_a? Output }
|
89
83
|
@outputs = output
|
90
84
|
elsif output
|
91
|
-
|
85
|
+
raise ArgumentError, 'Invalid outputs specified'
|
92
86
|
end
|
93
87
|
|
94
88
|
@resultsets = Array.new(@outputs.length) { Set.new }
|
@@ -102,9 +96,11 @@ module DocParser
|
|
102
96
|
parse_doc(file, &block)
|
103
97
|
# :nocov: #
|
104
98
|
end.each do |result|
|
99
|
+
next unless @outputs
|
100
|
+
|
105
101
|
result.each_with_index do |set, index|
|
106
102
|
@resultsets[index].merge(set)
|
107
|
-
end
|
103
|
+
end
|
108
104
|
end
|
109
105
|
end
|
110
106
|
|
data/lib/docparser/version.rb
CHANGED
data/test/.rubocop.yml
CHANGED
@@ -0,0 +1,23 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2020-04-13 17:56:31 +0200 using RuboCop version 0.81.0.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 9
|
10
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
11
|
+
# ExcludedMethods: refine
|
12
|
+
Metrics/BlockLength:
|
13
|
+
Max: 173
|
14
|
+
|
15
|
+
# Offense count: 14
|
16
|
+
Security/Open:
|
17
|
+
Exclude:
|
18
|
+
- 'lib/docparser/document_test.rb'
|
19
|
+
- 'lib/docparser/output/csv_output_test.rb'
|
20
|
+
- 'lib/docparser/output/html_output_test.rb'
|
21
|
+
- 'lib/docparser/output/json_output_test.rb'
|
22
|
+
- 'lib/docparser/output/multi_output_test.rb'
|
23
|
+
- 'lib/docparser/output/yaml_output_test.rb'
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../test_helper'
|
2
4
|
require 'open3'
|
3
5
|
require 'shellwords'
|
@@ -14,15 +16,14 @@ describe DocParser do
|
|
14
16
|
Dir.mktmpdir do |dir|
|
15
17
|
Dir.chdir(dir)
|
16
18
|
example_file = Shellwords.escape(File.join($ROOT_DIR, 'example.rb'))
|
17
|
-
|
18
|
-
err.
|
19
|
-
rows = out.scan(/(\d+) rows/).flatten
|
19
|
+
_, err = cmd_to_sys '/usr/bin/env ruby ' + example_file
|
20
|
+
rows = err.scan(/(\d+) rows/).flatten
|
20
21
|
rows.length.must_equal 5
|
21
22
|
row_lengths = rows.group_by(&:to_i)
|
22
23
|
row_lengths.length.must_equal 1
|
23
24
|
# HaD: 40 pages of 7 articles
|
24
25
|
row_lengths.keys.first.must_equal(7 * 40)
|
25
|
-
|
26
|
+
err.must_match(/Done processing/)
|
26
27
|
end
|
27
28
|
Dir.chdir(curwd)
|
28
29
|
end
|
@@ -1,7 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../test_helper'
|
4
|
+
|
2
5
|
describe DocParser::Document do
|
3
6
|
before do
|
4
|
-
Log4r::Logger['docparser'].level = Log4r::INFO
|
5
7
|
$output = DocParser::NilOutput.new
|
6
8
|
@parser = Class.new do
|
7
9
|
define_method(:outputs) { [$output] }
|
@@ -54,15 +56,15 @@ describe DocParser::Document do
|
|
54
56
|
it 'should be possible to use css queries' do
|
55
57
|
css = 'article > h1 + p'
|
56
58
|
css_content = @test_doc.css_content(css)
|
57
|
-
css_element = @test_doc.
|
59
|
+
css_element = @test_doc.elements(css)
|
58
60
|
css_content.must_equal('Great article it is')
|
59
61
|
css_content.must_equal(css_element.first.content)
|
60
62
|
end
|
61
63
|
|
62
64
|
it 'should be possible to use xpath queries' do
|
63
65
|
xpath = '//li/ancestor::article/h1'
|
64
|
-
xpath_content = @test_doc.
|
65
|
-
xpath_element = @test_doc.
|
66
|
+
xpath_content = @test_doc.element_content(xpath)
|
67
|
+
xpath_element = @test_doc.elements(xpath)
|
66
68
|
xpath_content.must_equal('This is an article')
|
67
69
|
xpath_content.must_equal(xpath_element.first.content)
|
68
70
|
end
|
@@ -83,6 +85,11 @@ describe DocParser::Document do
|
|
83
85
|
array2 << element.content
|
84
86
|
end
|
85
87
|
array2.must_equal(array)
|
88
|
+
array2 = []
|
89
|
+
@test_doc.each_element('//p') do |element|
|
90
|
+
array2 << element.content
|
91
|
+
end
|
92
|
+
array2.must_equal(array)
|
86
93
|
end
|
87
94
|
|
88
95
|
it 'should warn when providing an empty file' do
|
@@ -91,20 +98,19 @@ describe DocParser::Document do
|
|
91
98
|
file.close
|
92
99
|
|
93
100
|
open(file.path).read.empty?.must_equal true
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
err.must_include "#{file.path} is empty"
|
101
|
+
err = StringIO.new
|
102
|
+
|
103
|
+
DocParser::Document.new(filename: file.path,
|
104
|
+
parser: @parser,
|
105
|
+
logger: Logger.new(err))
|
106
|
+
|
107
|
+
err.string.must_include "#{file.path} is empty"
|
102
108
|
end
|
103
109
|
|
104
110
|
it 'should add the row to the results' do
|
105
111
|
@test_doc.add_row ['test']
|
106
112
|
@test_doc.add_row 'test', 'test2'
|
107
|
-
@test_doc.results.must_equal [[%w
|
113
|
+
@test_doc.results.must_equal [[%w[test], %w[test test2]]]
|
108
114
|
end
|
109
115
|
|
110
116
|
it 'should be possible to not use outputs' do
|
@@ -133,5 +139,4 @@ describe DocParser::Document do
|
|
133
139
|
test_doc.add_row ['b'], output: 0
|
134
140
|
test_doc.results.must_equal [[['b']], [['a']]]
|
135
141
|
end
|
136
|
-
|
137
142
|
end
|
@@ -1,13 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../../test_helper'
|
2
4
|
|
3
5
|
describe DocParser::CSVOutput do
|
4
|
-
before do
|
5
|
-
Log4r::Logger['docparser'].level = Log4r::ERROR
|
6
|
-
end
|
7
|
-
after do
|
8
|
-
Log4r::Logger['docparser'].level = Log4r::INFO
|
9
|
-
end
|
10
|
-
|
11
6
|
it 'must create a file' do
|
12
7
|
Dir.mktmpdir do |dir|
|
13
8
|
filename = File.join(dir, 'test.csv')
|
@@ -30,7 +25,7 @@ describe DocParser::CSVOutput do
|
|
30
25
|
Dir.mktmpdir do |dir|
|
31
26
|
filename = File.join(dir, 'test.csv')
|
32
27
|
output = DocParser::CSVOutput.new(filename: filename)
|
33
|
-
output.add_row %w
|
28
|
+
output.add_row %w[aap noot mies]
|
34
29
|
output.add_row ['aap', 'noot', 'mies;']
|
35
30
|
output.close
|
36
31
|
open(filename).read.must_equal "aap;noot;mies\naap;noot;\"mies;\"\n"
|
@@ -43,8 +38,8 @@ describe DocParser::CSVOutput do
|
|
43
38
|
output = DocParser::CSVOutput.new(filename: filename)
|
44
39
|
output.header = 'test', 'the', 'header'
|
45
40
|
output.rowcount.must_equal 0
|
46
|
-
output.add_row %w
|
47
|
-
output.add_row %w
|
41
|
+
output.add_row %w[aap noot mies]
|
42
|
+
output.add_row %w[aap noot mies]
|
48
43
|
output.rowcount.must_equal 2
|
49
44
|
end
|
50
45
|
end
|
@@ -1,13 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../../test_helper'
|
2
4
|
|
3
5
|
describe DocParser::HTMLOutput do
|
4
|
-
before do
|
5
|
-
Log4r::Logger['docparser'].level = Log4r::ERROR
|
6
|
-
end
|
7
|
-
after do
|
8
|
-
Log4r::Logger['docparser'].level = Log4r::INFO
|
9
|
-
end
|
10
|
-
|
11
6
|
it 'must create a file' do
|
12
7
|
Dir.mktmpdir do |dir|
|
13
8
|
filename = File.join(dir, 'test.html')
|
@@ -31,7 +26,7 @@ describe DocParser::HTMLOutput do
|
|
31
26
|
Dir.mktmpdir do |dir|
|
32
27
|
filename = File.join(dir, 'test.html')
|
33
28
|
output = DocParser::HTMLOutput.new(filename: filename)
|
34
|
-
output.add_row %w
|
29
|
+
output.add_row %w[aap noot mies]
|
35
30
|
output.add_row ['aap', 'noot', 'mies;']
|
36
31
|
output.close
|
37
32
|
html = open(filename).read
|
@@ -47,8 +42,8 @@ describe DocParser::HTMLOutput do
|
|
47
42
|
output = DocParser::HTMLOutput.new(filename: filename)
|
48
43
|
output.header = 'test', 'the', 'header'
|
49
44
|
output.rowcount.must_equal 0
|
50
|
-
output.add_row %w
|
51
|
-
output.add_row %w
|
45
|
+
output.add_row %w[aap noot mies]
|
46
|
+
output.add_row %w[aap noot mies]
|
52
47
|
output.rowcount.must_equal 2
|
53
48
|
output.close
|
54
49
|
open(filename).read.must_include('<p>2 rows</p>')
|
@@ -1,13 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../../test_helper'
|
2
4
|
|
3
5
|
describe DocParser::JSONOutput do
|
4
|
-
before do
|
5
|
-
Log4r::Logger['docparser'].level = Log4r::ERROR
|
6
|
-
end
|
7
|
-
after do
|
8
|
-
Log4r::Logger['docparser'].level = Log4r::INFO
|
9
|
-
end
|
10
|
-
|
11
6
|
it 'must create a file' do
|
12
7
|
Dir.mktmpdir do |dir|
|
13
8
|
filename = File.join(dir, 'test.json')
|
@@ -31,7 +26,7 @@ describe DocParser::JSONOutput do
|
|
31
26
|
filename = File.join(dir, 'test.json')
|
32
27
|
output = DocParser::JSONOutput.new(filename: filename)
|
33
28
|
lambda do
|
34
|
-
output.add_row %w
|
29
|
+
output.add_row %w[aap noot mies]
|
35
30
|
end.must_raise(DocParser::MissingHeaderException)
|
36
31
|
end
|
37
32
|
end
|
@@ -41,9 +36,9 @@ describe DocParser::JSONOutput do
|
|
41
36
|
filename = File.join(dir, 'test.json')
|
42
37
|
output = DocParser::JSONOutput.new(filename: filename)
|
43
38
|
output.header = 'test', 'the', 'header'
|
44
|
-
output.add_row %w
|
45
|
-
output.add_row %w
|
46
|
-
output.add_row %w
|
39
|
+
output.add_row %w[a b c]
|
40
|
+
output.add_row %w[aap noot mies"]
|
41
|
+
output.add_row %w[aap noot] # testing empty column
|
47
42
|
output.close
|
48
43
|
expected = '[{"test":"a","the":"b","header":"c"},
|
49
44
|
{"test":"aap","the":"noot","header":"mies\""},
|
@@ -58,8 +53,8 @@ describe DocParser::JSONOutput do
|
|
58
53
|
output = DocParser::JSONOutput.new(filename: filename)
|
59
54
|
output.header = 'test', 'the', 'header'
|
60
55
|
output.rowcount.must_equal 0
|
61
|
-
output.add_row %w
|
62
|
-
output.add_row %w
|
56
|
+
output.add_row %w[aap noot mies]
|
57
|
+
output.add_row %w[aap noot mies]
|
63
58
|
output.rowcount.must_equal 2
|
64
59
|
end
|
65
60
|
end
|
@@ -1,13 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../../test_helper'
|
2
4
|
|
3
5
|
describe DocParser::MultiOutput do
|
4
|
-
before do
|
5
|
-
Log4r::Logger['docparser'].level = Log4r::ERROR
|
6
|
-
end
|
7
|
-
after do
|
8
|
-
Log4r::Logger['docparser'].level = Log4r::INFO
|
9
|
-
end
|
10
|
-
|
11
6
|
it 'must create files' do
|
12
7
|
Dir.mktmpdir do |dir|
|
13
8
|
filename = File.join(dir, 'test')
|
@@ -36,7 +31,7 @@ describe DocParser::MultiOutput do
|
|
36
31
|
filename = File.join(dir, 'test')
|
37
32
|
output = DocParser::MultiOutput.new(filename: filename)
|
38
33
|
lambda do
|
39
|
-
output.add_row %w
|
34
|
+
output.add_row %w[aap noot mies]
|
40
35
|
end.must_raise(DocParser::MissingHeaderException)
|
41
36
|
end
|
42
37
|
end
|
@@ -47,8 +42,8 @@ describe DocParser::MultiOutput do
|
|
47
42
|
output = DocParser::MultiOutput.new(filename: filename)
|
48
43
|
output.header = 'test', 'the', 'header'
|
49
44
|
output.rowcount.must_equal 0
|
50
|
-
output.add_row %w
|
51
|
-
output.add_row %w
|
45
|
+
output.add_row %w[aap noot mies]
|
46
|
+
output.add_row %w[aap noot mies]
|
52
47
|
output.rowcount.must_equal 2
|
53
48
|
end
|
54
49
|
end
|
@@ -57,7 +52,7 @@ describe DocParser::MultiOutput do
|
|
57
52
|
Dir.mktmpdir do |dir|
|
58
53
|
filename = File.join(dir, 'test')
|
59
54
|
output = DocParser::MultiOutput.new(filename: filename)
|
60
|
-
methods = %i
|
55
|
+
methods = %i[add_row header= close]
|
61
56
|
outputs = output.instance_variable_get(:@outputs)
|
62
57
|
outputs.map! do |o|
|
63
58
|
SimpleMock.new o
|
@@ -76,5 +71,4 @@ describe DocParser::MultiOutput do
|
|
76
71
|
end
|
77
72
|
end
|
78
73
|
end
|
79
|
-
|
80
74
|
end
|
@@ -1,13 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../../test_helper'
|
2
4
|
|
3
5
|
describe DocParser::NilOutput do
|
4
|
-
before do
|
5
|
-
Log4r::Logger['docparser'].level = Log4r::ERROR
|
6
|
-
end
|
7
|
-
after do
|
8
|
-
Log4r::Logger['docparser'].level = Log4r::INFO
|
9
|
-
end
|
10
|
-
|
11
6
|
it 'should not create a file' do
|
12
7
|
Dir.mktmpdir do |dir|
|
13
8
|
filename = File.join(dir, '*')
|
@@ -20,8 +15,8 @@ describe DocParser::NilOutput do
|
|
20
15
|
output = DocParser::NilOutput.new
|
21
16
|
output.header = 'test', 'the', 'header'
|
22
17
|
output.rowcount.must_equal 0
|
23
|
-
output.add_row %w
|
24
|
-
output.add_row %w
|
18
|
+
output.add_row %w[aap noot mies]
|
19
|
+
output.add_row %w[aap noot mies]
|
25
20
|
output.rowcount.must_equal 0
|
26
21
|
end
|
27
22
|
end
|