docparser 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -1
- data/Gemfile +2 -2
- data/docparser.gemspec +2 -2
- data/lib/docparser/document.rb +1 -1
- data/lib/docparser/output.rb +14 -1
- data/lib/docparser/output/xlsx_output.rb +1 -0
- data/lib/docparser/parser.rb +8 -4
- data/lib/docparser/version.rb +1 -1
- data/test/lib/docparser/output/csv_output_test.rb +1 -1
- data/test/lib/docparser/output/html_output_test.rb +1 -1
- data/test/lib/docparser/output/json_output_test.rb +1 -1
- data/test/lib/docparser/output/multi_output_test.rb +2 -2
- data/test/lib/docparser/output/xlsx_output_test.rb +1 -1
- data/test/lib/docparser/output/yaml_output_test.rb +1 -1
- data/test/lib/docparser/output_test.rb +1 -1
- data/test/test_helper.rb +1 -0
- metadata +9 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9cd285e165c839e427e95d54298b8e8ab5a6056d
|
4
|
+
data.tar.gz: c18bf5d8e4f3227587152211014eafcc7fbf0f39
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bed20d4093001e9147f90573ce596e17350a5defae63d76ab2a486d9272555fafafdc27dfa155a06c76144aea49b94251b3b7be4052053ff7034cf750ca8ae0c
|
7
|
+
data.tar.gz: 3ef3f2360e53f98ed508651ea2f363d78808fb41b50605d99a6f3086262053081678d30e5882f3d87d86d4b685d0ea05347069f407c41f2f738c180810d2f93f
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -3,10 +3,10 @@ gemspec
|
|
3
3
|
source 'https://rubygems.org'
|
4
4
|
|
5
5
|
group :test do
|
6
|
-
gem 'minitest', '~> 5.3.
|
6
|
+
gem 'minitest', '~> 5.3.2'
|
7
7
|
gem 'coveralls', require: false
|
8
8
|
gem 'rake'
|
9
|
-
gem 'rubocop', '~> 0.
|
9
|
+
gem 'rubocop', '~> 0.20.1'
|
10
10
|
gem 'simplecov', require: false
|
11
11
|
gem 'simple_mock'
|
12
12
|
end
|
data/docparser.gemspec
CHANGED
@@ -20,12 +20,12 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.extra_rdoc_files = ['README.md', 'LICENSE']
|
21
21
|
|
22
22
|
spec.add_runtime_dependency 'nokogiri', '~> 1.6.1'
|
23
|
-
spec.add_runtime_dependency 'parallel', '~> 0.
|
23
|
+
spec.add_runtime_dependency 'parallel', '~> 1.0.0'
|
24
24
|
spec.add_runtime_dependency 'axlsx', '~> 2.0.1'
|
25
25
|
spec.add_runtime_dependency 'log4r', '~> 1.1.10'
|
26
26
|
|
27
27
|
spec.add_development_dependency 'yard'
|
28
|
-
spec.add_development_dependency '
|
28
|
+
spec.add_development_dependency 'kramdown', '~> 1.3.2'
|
29
29
|
spec.add_development_dependency 'github-markup'
|
30
30
|
spec.required_ruby_version = '>= 2.0.0'
|
31
31
|
end
|
data/lib/docparser/document.rb
CHANGED
@@ -32,7 +32,7 @@ module DocParser
|
|
32
32
|
# Adds a row to an output
|
33
33
|
def add_row(*row, output: 0)
|
34
34
|
output = @parser.outputs.index(output) if output.is_a? Output
|
35
|
-
@logger.debug { "#{filename}: Adding row #{row.flatten
|
35
|
+
@logger.debug { "#{filename}: Adding row #{row.flatten}" }
|
36
36
|
results[output] << row.flatten
|
37
37
|
end
|
38
38
|
|
data/lib/docparser/output.rb
CHANGED
@@ -5,7 +5,20 @@ module DocParser
|
|
5
5
|
attr_reader :rowcount
|
6
6
|
|
7
7
|
# Creates a new output
|
8
|
+
#
|
9
|
+
# You can assign the output to the Parser so it automatically writes all
|
10
|
+
# data to the file you want.
|
11
|
+
#
|
12
|
+
# Do not use this class as an output, instead use one of the classes that
|
13
|
+
# inherit from it
|
14
|
+
#
|
8
15
|
# @param filename [String] Output filename
|
16
|
+
# @see Parser
|
17
|
+
# @see CSVOutput
|
18
|
+
# @see HTMLOutput
|
19
|
+
# @see YAMLOutput
|
20
|
+
# @see XLSXOutput
|
21
|
+
# @see MultiOutput
|
9
22
|
def initialize(filename: filename)
|
10
23
|
@rowcount = 0
|
11
24
|
@filename = filename
|
@@ -34,7 +47,7 @@ module DocParser
|
|
34
47
|
@file.close unless @file.closed?
|
35
48
|
@logger.info 'Finished writing'
|
36
49
|
size = File.size(@filename) / 1024.0
|
37
|
-
@logger.info
|
50
|
+
@logger.info format('%s: %d rows, %.2f KiB', @filename, rowcount, size)
|
38
51
|
end
|
39
52
|
|
40
53
|
# Called after the file is opened
|
data/lib/docparser/parser.rb
CHANGED
@@ -32,7 +32,8 @@ module DocParser
|
|
32
32
|
# @!visibility private
|
33
33
|
attr_reader :outputs, :files, :num_processes, :encoding
|
34
34
|
|
35
|
-
# Creates a new
|
35
|
+
# Creates a new Parser instance
|
36
|
+
#
|
36
37
|
# @param files [Array] An array containing URLs or paths to files
|
37
38
|
# @param quiet [Boolean] Be quiet
|
38
39
|
# @param encoding [String] The encoding to use for opening the files
|
@@ -55,9 +56,12 @@ module DocParser
|
|
55
56
|
@logger.info "DocParser v#{VERSION} loaded"
|
56
57
|
end
|
57
58
|
|
58
|
-
#
|
59
59
|
# Parses the `files`
|
60
60
|
#
|
61
|
+
# Accepts a block which is executed for each document in the Document
|
62
|
+
# context where you can access the content using Nokogiri.
|
63
|
+
#
|
64
|
+
# @see Document
|
61
65
|
def parse!(&block)
|
62
66
|
@logger.info "Parsing #{@files.length} files (encoding: #{@encoding})."
|
63
67
|
start_time = Time.now
|
@@ -72,7 +76,7 @@ module DocParser
|
|
72
76
|
|
73
77
|
write_to_outputs
|
74
78
|
|
75
|
-
@logger.info
|
79
|
+
@logger.info format('Done processing in %.2fs.', Time.now - start_time)
|
76
80
|
end
|
77
81
|
|
78
82
|
private
|
@@ -83,7 +87,7 @@ module DocParser
|
|
83
87
|
@outputs << output
|
84
88
|
elsif output.is_a?(Array) && output.all? { |o| o.is_a? Output }
|
85
89
|
@outputs = output
|
86
|
-
elsif
|
90
|
+
elsif output
|
87
91
|
fail ArgumentError, 'Invalid outputs specified'
|
88
92
|
end
|
89
93
|
|
data/lib/docparser/version.rb
CHANGED
@@ -13,9 +13,9 @@ describe DocParser::MultiOutput do
|
|
13
13
|
filename = File.join(dir, 'test')
|
14
14
|
DocParser::MultiOutput.new(filename: filename)
|
15
15
|
|
16
|
-
File.
|
16
|
+
File.exist?(filename).must_equal false
|
17
17
|
['.csv', '.html', '.yml', '.xlsx', '.json'].each do |ext|
|
18
|
-
File.
|
18
|
+
File.exist?(filename + ext).must_equal true
|
19
19
|
end
|
20
20
|
end
|
21
21
|
end
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: docparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jurriaan Pruis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.
|
33
|
+
version: 1.0.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.
|
40
|
+
version: 1.0.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: axlsx
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -81,19 +81,19 @@ dependencies:
|
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: kramdown
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - "
|
87
|
+
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
89
|
+
version: 1.3.2
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- - "
|
94
|
+
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
96
|
+
version: 1.3.2
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: github-markup
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|