docparser 0.2.0 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -1
- data/Gemfile +2 -2
- data/docparser.gemspec +2 -2
- data/lib/docparser/document.rb +1 -1
- data/lib/docparser/output.rb +14 -1
- data/lib/docparser/output/xlsx_output.rb +1 -0
- data/lib/docparser/parser.rb +8 -4
- data/lib/docparser/version.rb +1 -1
- data/test/lib/docparser/output/csv_output_test.rb +1 -1
- data/test/lib/docparser/output/html_output_test.rb +1 -1
- data/test/lib/docparser/output/json_output_test.rb +1 -1
- data/test/lib/docparser/output/multi_output_test.rb +2 -2
- data/test/lib/docparser/output/xlsx_output_test.rb +1 -1
- data/test/lib/docparser/output/yaml_output_test.rb +1 -1
- data/test/lib/docparser/output_test.rb +1 -1
- data/test/test_helper.rb +1 -0
- metadata +9 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9cd285e165c839e427e95d54298b8e8ab5a6056d
|
4
|
+
data.tar.gz: c18bf5d8e4f3227587152211014eafcc7fbf0f39
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bed20d4093001e9147f90573ce596e17350a5defae63d76ab2a486d9272555fafafdc27dfa155a06c76144aea49b94251b3b7be4052053ff7034cf750ca8ae0c
|
7
|
+
data.tar.gz: 3ef3f2360e53f98ed508651ea2f363d78808fb41b50605d99a6f3086262053081678d30e5882f3d87d86d4b685d0ea05347069f407c41f2f738c180810d2f93f
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -3,10 +3,10 @@ gemspec
|
|
3
3
|
source 'https://rubygems.org'
|
4
4
|
|
5
5
|
group :test do
|
6
|
-
gem 'minitest', '~> 5.3.
|
6
|
+
gem 'minitest', '~> 5.3.2'
|
7
7
|
gem 'coveralls', require: false
|
8
8
|
gem 'rake'
|
9
|
-
gem 'rubocop', '~> 0.
|
9
|
+
gem 'rubocop', '~> 0.20.1'
|
10
10
|
gem 'simplecov', require: false
|
11
11
|
gem 'simple_mock'
|
12
12
|
end
|
data/docparser.gemspec
CHANGED
@@ -20,12 +20,12 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.extra_rdoc_files = ['README.md', 'LICENSE']
|
21
21
|
|
22
22
|
spec.add_runtime_dependency 'nokogiri', '~> 1.6.1'
|
23
|
-
spec.add_runtime_dependency 'parallel', '~> 0.
|
23
|
+
spec.add_runtime_dependency 'parallel', '~> 1.0.0'
|
24
24
|
spec.add_runtime_dependency 'axlsx', '~> 2.0.1'
|
25
25
|
spec.add_runtime_dependency 'log4r', '~> 1.1.10'
|
26
26
|
|
27
27
|
spec.add_development_dependency 'yard'
|
28
|
-
spec.add_development_dependency '
|
28
|
+
spec.add_development_dependency 'kramdown', '~> 1.3.2'
|
29
29
|
spec.add_development_dependency 'github-markup'
|
30
30
|
spec.required_ruby_version = '>= 2.0.0'
|
31
31
|
end
|
data/lib/docparser/document.rb
CHANGED
@@ -32,7 +32,7 @@ module DocParser
|
|
32
32
|
# Adds a row to an output
|
33
33
|
def add_row(*row, output: 0)
|
34
34
|
output = @parser.outputs.index(output) if output.is_a? Output
|
35
|
-
@logger.debug { "#{filename}: Adding row #{row.flatten
|
35
|
+
@logger.debug { "#{filename}: Adding row #{row.flatten}" }
|
36
36
|
results[output] << row.flatten
|
37
37
|
end
|
38
38
|
|
data/lib/docparser/output.rb
CHANGED
@@ -5,7 +5,20 @@ module DocParser
|
|
5
5
|
attr_reader :rowcount
|
6
6
|
|
7
7
|
# Creates a new output
|
8
|
+
#
|
9
|
+
# You can assign the output to the Parser so it automatically writes all
|
10
|
+
# data to the file you want.
|
11
|
+
#
|
12
|
+
# Do not use this class as an output, instead use one of the classes that
|
13
|
+
# inherit from it
|
14
|
+
#
|
8
15
|
# @param filename [String] Output filename
|
16
|
+
# @see Parser
|
17
|
+
# @see CSVOutput
|
18
|
+
# @see HTMLOutput
|
19
|
+
# @see YAMLOutput
|
20
|
+
# @see XLSXOutput
|
21
|
+
# @see MultiOutput
|
9
22
|
def initialize(filename: filename)
|
10
23
|
@rowcount = 0
|
11
24
|
@filename = filename
|
@@ -34,7 +47,7 @@ module DocParser
|
|
34
47
|
@file.close unless @file.closed?
|
35
48
|
@logger.info 'Finished writing'
|
36
49
|
size = File.size(@filename) / 1024.0
|
37
|
-
@logger.info
|
50
|
+
@logger.info format('%s: %d rows, %.2f KiB', @filename, rowcount, size)
|
38
51
|
end
|
39
52
|
|
40
53
|
# Called after the file is opened
|
data/lib/docparser/parser.rb
CHANGED
@@ -32,7 +32,8 @@ module DocParser
|
|
32
32
|
# @!visibility private
|
33
33
|
attr_reader :outputs, :files, :num_processes, :encoding
|
34
34
|
|
35
|
-
# Creates a new
|
35
|
+
# Creates a new Parser instance
|
36
|
+
#
|
36
37
|
# @param files [Array] An array containing URLs or paths to files
|
37
38
|
# @param quiet [Boolean] Be quiet
|
38
39
|
# @param encoding [String] The encoding to use for opening the files
|
@@ -55,9 +56,12 @@ module DocParser
|
|
55
56
|
@logger.info "DocParser v#{VERSION} loaded"
|
56
57
|
end
|
57
58
|
|
58
|
-
#
|
59
59
|
# Parses the `files`
|
60
60
|
#
|
61
|
+
# Accepts a block which is executed for each document in the Document
|
62
|
+
# context where you can access the content using Nokogiri.
|
63
|
+
#
|
64
|
+
# @see Document
|
61
65
|
def parse!(&block)
|
62
66
|
@logger.info "Parsing #{@files.length} files (encoding: #{@encoding})."
|
63
67
|
start_time = Time.now
|
@@ -72,7 +76,7 @@ module DocParser
|
|
72
76
|
|
73
77
|
write_to_outputs
|
74
78
|
|
75
|
-
@logger.info
|
79
|
+
@logger.info format('Done processing in %.2fs.', Time.now - start_time)
|
76
80
|
end
|
77
81
|
|
78
82
|
private
|
@@ -83,7 +87,7 @@ module DocParser
|
|
83
87
|
@outputs << output
|
84
88
|
elsif output.is_a?(Array) && output.all? { |o| o.is_a? Output }
|
85
89
|
@outputs = output
|
86
|
-
elsif
|
90
|
+
elsif output
|
87
91
|
fail ArgumentError, 'Invalid outputs specified'
|
88
92
|
end
|
89
93
|
|
data/lib/docparser/version.rb
CHANGED
@@ -13,9 +13,9 @@ describe DocParser::MultiOutput do
|
|
13
13
|
filename = File.join(dir, 'test')
|
14
14
|
DocParser::MultiOutput.new(filename: filename)
|
15
15
|
|
16
|
-
File.
|
16
|
+
File.exist?(filename).must_equal false
|
17
17
|
['.csv', '.html', '.yml', '.xlsx', '.json'].each do |ext|
|
18
|
-
File.
|
18
|
+
File.exist?(filename + ext).must_equal true
|
19
19
|
end
|
20
20
|
end
|
21
21
|
end
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: docparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jurriaan Pruis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.
|
33
|
+
version: 1.0.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.
|
40
|
+
version: 1.0.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: axlsx
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -81,19 +81,19 @@ dependencies:
|
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: kramdown
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - "
|
87
|
+
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
89
|
+
version: 1.3.2
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- - "
|
94
|
+
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
96
|
+
version: 1.3.2
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: github-markup
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|