docparser 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: edbb546c57582191dd0dbb7acc77a4451bdfd9a0
4
- data.tar.gz: 6f5e932a6cbaaafc93d9d7abb35ed647055efd46
3
+ metadata.gz: 907927469491015367a9b5ba12ff4a8122495428
4
+ data.tar.gz: 5c842a24a58026c8296d61ca95d921f9ab20ccf9
5
5
  SHA512:
6
- metadata.gz: 433c99cae004c59432928aceb5d3f75e3fc5a9a33eadbd9fb6c7ae7cb359678eaa938304dfce8d4627e56f159ede28af2671e46d33c5357e5e33e7d51a045157
7
- data.tar.gz: 89748f88e7ac14f99f75fdccbfcae36e439bdcd1a92e57baab761b1c27856503077a437ee30ea7844b755d3d4afb3e113ca24dd83915779c97124cd808e03bef
6
+ metadata.gz: 126b0563186b5f7dc9b94a55ee576d3f07818119056c99bd8dd938f940cb5c19b942cdb380ad9f2dc0367383b4e30cf42b8a2468cb9cad734f5cd716e92ce192
7
+ data.tar.gz: 7abef08de7561f3e8486141c311655bf8f13e1d4c6a658b9a9919c56f0d23fc48c071b6df14211f19f0d3987018d97b739065c01bb5fed267c38f3e86292071e
data/Gemfile CHANGED
@@ -6,7 +6,7 @@ source 'https://rubygems.org'
6
6
  group :test do
7
7
  gem 'coveralls', require: false
8
8
  gem 'rake'
9
- gem 'rubocop'
9
+ gem 'rubocop', git: 'git://github.com/bbatsov/rubocop.git', ref: '011ca3536c0a'
10
10
  gem 'simplecov', require: false
11
11
  gem 'simple_mock'
12
12
  end
data/Rakefile CHANGED
@@ -1,5 +1,8 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
2
2
  require 'rake/testtask'
3
+ require 'rubocop'
4
+ require 'yard'
5
+ YARD::Rake::YardocTask.new
3
6
 
4
7
  Rake::TestTask.new do |t|
5
8
  t.libs << 'lib/docparser'
@@ -10,7 +13,10 @@ end
10
13
  task test: :rubocop
11
14
 
12
15
  task :rubocop do
13
- sh 'rubocop'
16
+ puts "Running Rubocop #{Rubocop::Version::STRING}"
17
+ args = FileList['**/*.rb', 'Rakefile', 'docparser.gemspec']
18
+ cli = Rubocop::CLI.new
19
+ fail unless cli.run(args) == 0
14
20
  end
15
21
 
16
- task :default => :test
22
+ task default: :test
data/docparser.gemspec CHANGED
@@ -3,27 +3,28 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
  require 'docparser/version'
4
4
 
5
5
  Gem::Specification.new do |spec|
6
- spec.name = "docparser"
6
+ spec.name = 'docparser'
7
7
  spec.version = DocParser::VERSION
8
- spec.authors = ["Jurriaan Pruis"]
9
- spec.email = ["email@jurriaanpruis.nl"]
10
- spec.description = %q{DocParser is a Ruby Gem for webscraping}
11
- spec.summary = %q{DocParser is a Ruby Gem for webscraping}
12
- spec.homepage = "https://github.com/jurriaan/docparser"
13
- spec.license = "MIT"
8
+ spec.authors = ['Jurriaan Pruis']
9
+ spec.email = ['email@jurriaanpruis.nl']
10
+ spec.description = 'DocParser is a Ruby Gem for webscraping'
11
+ spec.summary = 'DocParser is a Ruby Gem for webscraping'
12
+ spec.homepage = 'https://github.com/jurriaan/docparser'
13
+ spec.license = 'MIT'
14
14
  spec.platform = Gem::Platform::RUBY
15
15
 
16
- spec.files = `git ls-files`.split($/)
17
- spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
- spec.require_paths = ["lib"]
16
+ spec.files = `git ls-files`.split($RS)
17
+ spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(/^(test|spec|features)\//)
19
+ spec.require_paths = ['lib']
20
+ spec.extra_rdoc_files = ['README.md', 'LICENSE']
20
21
 
21
22
  spec.add_runtime_dependency 'nokogiri', '~> 1.5.9'
22
23
  spec.add_runtime_dependency 'parallel', '~> 0.6.4'
23
24
  spec.add_runtime_dependency 'axlsx', '~> 1.3.6'
24
25
  spec.add_runtime_dependency 'terminal-table', '~> 1.4.5'
25
26
  spec.add_runtime_dependency 'pageme', '~> 0.0.3'
26
- spec.add_runtime_dependency 'json', '~> 1.7.7'
27
+ spec.add_runtime_dependency 'multi_json', '~> 1.7'
27
28
  spec.add_runtime_dependency 'log4r', '~> 1.1.10'
28
29
 
29
30
  spec.add_development_dependency 'yard'
data/example.rb CHANGED
@@ -4,7 +4,6 @@
4
4
  #
5
5
  $LOAD_PATH.unshift __dir__
6
6
  require File.expand_path('lib/docparser.rb', __dir__)
7
- require 'tmpdir'
8
7
 
9
8
  include DocParser
10
9
  output = MultiOutput.new(filename: 'hackaday')
@@ -5,6 +5,10 @@ module DocParser
5
5
  # @see Output
6
6
  class Document
7
7
  attr_reader :filename, :doc, :encoding, :results
8
+
9
+ # @return [String] the source of the document
10
+ attr_reader :html
11
+
8
12
  def initialize(filename: nil, encoding: 'utf-8', parser: nil)
9
13
  if encoding == 'utf-8'
10
14
  encodingstring = 'r:utf-8'
@@ -37,11 +41,6 @@ module DocParser
37
41
  @title ||= xpath_content('//head/title')
38
42
  end
39
43
 
40
- # @return [String] the source of the document
41
- def html
42
- @html
43
- end
44
-
45
44
  # Executes a xpath query
46
45
  def xpath(query)
47
46
  res = @doc.search(query)
@@ -57,6 +57,6 @@ module DocParser
57
57
  end
58
58
  end
59
59
 
60
- class MissingHeaderException < StandardError
61
- end
60
+ # MissingHeaderException gets thrown if a required header is missing.
61
+ class MissingHeaderException < StandardError; end
62
62
  end
@@ -1,4 +1,4 @@
1
- require 'json'
1
+ require 'multi_json'
2
2
  module DocParser
3
3
  # The JSONOutput class generates a JSON file containing all rows as seperate
4
4
  # Array elements
@@ -25,7 +25,7 @@ module DocParser
25
25
  @doc[@header[counter]] = ''
26
26
  end
27
27
  end
28
- @file << JSON.dump(@doc)
28
+ @file << MultiJson.dump(@doc)
29
29
  end
30
30
 
31
31
  def footer
@@ -1,4 +1,4 @@
1
1
  module DocParser
2
2
  # The current version of DocParser
3
- VERSION = '0.1.2'
3
+ VERSION = '0.1.3'
4
4
  end
data/test/.rubocop.yml ADDED
@@ -0,0 +1,3 @@
1
+ # Do not introduce global variables.
2
+ AvoidGlobalVars:
3
+ Enabled: false
@@ -110,7 +110,7 @@ describe DocParser::Document do
110
110
  it 'should add the row to the results' do
111
111
  @test_doc.add_row ['test']
112
112
  @test_doc.add_row 'test', 'test2'
113
- @test_doc.results.must_equal [[['test'], ['test', 'test2']]]
113
+ @test_doc.results.must_equal [[%w(test), %w(test test2)]]
114
114
  end
115
115
 
116
116
  it 'should be possible to not use outputs' do
@@ -30,7 +30,7 @@ describe DocParser::CSVOutput do
30
30
  Dir.mktmpdir do |dir|
31
31
  filename = File.join(dir, 'test.csv')
32
32
  output = DocParser::CSVOutput.new(filename: filename)
33
- output.add_row ['aap', 'noot', 'mies']
33
+ output.add_row %w(aap noot mies)
34
34
  output.add_row ['aap', 'noot', 'mies;']
35
35
  output.close
36
36
  open(filename).read.must_equal "aap;noot;mies\naap;noot;\"mies;\"\n"
@@ -43,8 +43,8 @@ describe DocParser::CSVOutput do
43
43
  output = DocParser::CSVOutput.new(filename: filename)
44
44
  output.header = 'test', 'the', 'header'
45
45
  output.rowcount.must_equal 0
46
- output.add_row ['aap', 'noot', 'mies']
47
- output.add_row ['aap', 'noot', 'mies']
46
+ output.add_row %w(aap noot mies)
47
+ output.add_row %w(aap noot mies)
48
48
  output.rowcount.must_equal 2
49
49
  end
50
50
  end
@@ -22,8 +22,8 @@ describe DocParser::HTMLOutput do
22
22
  output = DocParser::HTMLOutput.new(filename: filename)
23
23
  output.header = 'test', 'the', 'header'
24
24
  output.close
25
- open(filename).read.must_include '<thead><tr><th>test</th><th>the</th>'\
26
- '<th>header</th></tr></thead>'
25
+ open(filename).read.must_include '<thead><tr><th>test</th><th>the</th>
26
+ <th>header</th></tr></thead>'.gsub(/\s+/, '')
27
27
  end
28
28
  end
29
29
 
@@ -31,7 +31,7 @@ describe DocParser::HTMLOutput do
31
31
  Dir.mktmpdir do |dir|
32
32
  filename = File.join(dir, 'test.html')
33
33
  output = DocParser::HTMLOutput.new(filename: filename)
34
- output.add_row ['aap', 'noot', 'mies']
34
+ output.add_row %w(aap noot mies)
35
35
  output.add_row ['aap', 'noot', 'mies;']
36
36
  output.close
37
37
  html = open(filename).read
@@ -47,8 +47,8 @@ describe DocParser::HTMLOutput do
47
47
  output = DocParser::HTMLOutput.new(filename: filename)
48
48
  output.header = 'test', 'the', 'header'
49
49
  output.rowcount.must_equal 0
50
- output.add_row ['aap', 'noot', 'mies']
51
- output.add_row ['aap', 'noot', 'mies']
50
+ output.add_row %w(aap noot mies)
51
+ output.add_row %w(aap noot mies)
52
52
  output.rowcount.must_equal 2
53
53
  output.close
54
54
  open(filename).read.must_include('<p>2 rows</p>')
@@ -31,7 +31,7 @@ describe DocParser::JSONOutput do
31
31
  filename = File.join(dir, 'test.json')
32
32
  output = DocParser::JSONOutput.new(filename: filename)
33
33
  -> do
34
- output.add_row ['aap', 'noot', 'mies']
34
+ output.add_row %w(aap noot mies)
35
35
  end.must_raise(DocParser::MissingHeaderException)
36
36
  end
37
37
  end
@@ -41,13 +41,14 @@ describe DocParser::JSONOutput do
41
41
  filename = File.join(dir, 'test.json')
42
42
  output = DocParser::JSONOutput.new(filename: filename)
43
43
  output.header = 'test', 'the', 'header'
44
- output.add_row ['a', 'b', 'c']
45
- output.add_row ['aap', 'noot', 'mies"']
46
- output.add_row ['aap', 'noot'] # testing empty column
44
+ output.add_row %w(a b c)
45
+ output.add_row %w(aap noot mies")
46
+ output.add_row %w(aap noot) # testing empty column
47
47
  output.close
48
- open(filename).read.must_equal '[{"test":"a","the":"b","header":"c"}'\
49
- ',{"test":"aap","the":"noot","header":"mies\""}'\
50
- ',{"test":"aap","the":"noot","header":""}]'
48
+ expected = '[{"test":"a","the":"b","header":"c"},
49
+ {"test":"aap","the":"noot","header":"mies\""},
50
+ {"test":"aap","the":"noot","header":""}]'.gsub(/\s+/, '')
51
+ open(filename).read.must_equal expected
51
52
  end
52
53
  end
53
54
 
@@ -57,8 +58,8 @@ describe DocParser::JSONOutput do
57
58
  output = DocParser::JSONOutput.new(filename: filename)
58
59
  output.header = 'test', 'the', 'header'
59
60
  output.rowcount.must_equal 0
60
- output.add_row ['aap', 'noot', 'mies']
61
- output.add_row ['aap', 'noot', 'mies']
61
+ output.add_row %w(aap noot mies)
62
+ output.add_row %w(aap noot mies)
62
63
  output.rowcount.must_equal 2
63
64
  end
64
65
  end
@@ -36,7 +36,7 @@ describe DocParser::MultiOutput do
36
36
  filename = File.join(dir, 'test')
37
37
  output = DocParser::MultiOutput.new(filename: filename)
38
38
  -> do
39
- output.add_row ['aap', 'noot', 'mies']
39
+ output.add_row %w(aap noot mies)
40
40
  end.must_raise(DocParser::MissingHeaderException)
41
41
  end
42
42
  end
@@ -47,8 +47,8 @@ describe DocParser::MultiOutput do
47
47
  output = DocParser::MultiOutput.new(filename: filename)
48
48
  output.header = 'test', 'the', 'header'
49
49
  output.rowcount.must_equal 0
50
- output.add_row ['aap', 'noot', 'mies']
51
- output.add_row ['aap', 'noot', 'mies']
50
+ output.add_row %w(aap noot mies)
51
+ output.add_row %w(aap noot mies)
52
52
  output.rowcount.must_equal 2
53
53
  end
54
54
  end
@@ -57,7 +57,7 @@ describe DocParser::MultiOutput do
57
57
  Dir.mktmpdir do |dir|
58
58
  filename = File.join(dir, 'test')
59
59
  output = DocParser::MultiOutput.new(filename: filename)
60
- methods = [:add_row, :header=, :close]
60
+ methods = %i(add_row header= close)
61
61
  outputs = output.instance_variable_get(:@outputs)
62
62
  outputs.map! do |o|
63
63
  SimpleMock.new o
@@ -20,8 +20,8 @@ describe DocParser::NilOutput do
20
20
  output = DocParser::NilOutput.new
21
21
  output.header = 'test', 'the', 'header'
22
22
  output.rowcount.must_equal 0
23
- output.add_row ['aap', 'noot', 'mies']
24
- output.add_row ['aap', 'noot', 'mies']
23
+ output.add_row %w(aap noot mies)
24
+ output.add_row %w(aap noot mies)
25
25
  output.rowcount.must_equal 0
26
26
  end
27
27
  end
@@ -20,19 +20,18 @@ describe DocParser::ScreenOutput do
20
20
  output = DocParser::ScreenOutput.new
21
21
  output.header = 'test', 'the', 'header'
22
22
  output.rowcount.must_equal 0
23
- output.add_row ['aap', 'noot', 'mies']
24
- output.add_row ['aap', 'noot', 'mies']
23
+ output.add_row %w(aap noot mies)
24
+ output.add_row %w(aap noot mies)
25
25
  output.rowcount.must_equal 2
26
26
  end
27
27
 
28
28
  it 'must have a header' do
29
29
  output = DocParser::ScreenOutput.new
30
30
  -> do
31
- output.add_row ['aap', 'noot', 'mies']
31
+ output.add_row %w(aap noot mies)
32
32
  end.must_raise(DocParser::MissingHeaderException)
33
33
  end
34
34
 
35
-
36
35
  it 'must output the data after close' do
37
36
  $out = StringIO.new
38
37
  output = Class.new DocParser::ScreenOutput do
@@ -43,8 +42,8 @@ describe DocParser::ScreenOutput do
43
42
  end
44
43
  end.new
45
44
  output.header = 'test', 'the', 'header'
46
- output.add_row ['aap1', '', 'mies']
47
- output.add_row ['aap2', 'mies1']
45
+ output.add_row ['aap1' , '', 'mies']
46
+ output.add_row %w(aap2 mies1)
48
47
  output.close
49
48
  out = $out.string
50
49
  out.must_include 'header'
@@ -31,7 +31,7 @@ describe DocParser::XLSXOutput do
31
31
  Dir.mktmpdir do |dir|
32
32
  filename = File.join(dir, 'test.xlsx')
33
33
  output = DocParser::XLSXOutput.new(filename: filename)
34
- output.add_row ['aap', 'noot', 'mies']
34
+ output.add_row %w(aap noot mies)
35
35
  output.add_row ['aap', 'noot', 'mies;']
36
36
  output.close
37
37
  sheet = output.instance_variable_get(:@sheet)
@@ -45,8 +45,8 @@ describe DocParser::XLSXOutput do
45
45
  output = DocParser::XLSXOutput.new(filename: filename)
46
46
  output.header = 'test', 'the', 'header'
47
47
  output.rowcount.must_equal 0
48
- output.add_row ['aap', 'noot', 'mies']
49
- output.add_row ['aap', 'noot', 'mies']
48
+ output.add_row %w(aap noot mies)
49
+ output.add_row %w(aap noot mies)
50
50
  output.rowcount.must_equal 2
51
51
  end
52
52
  end
@@ -31,7 +31,7 @@ describe DocParser::YAMLOutput do
31
31
  filename = File.join(dir, 'test.yml')
32
32
  output = DocParser::YAMLOutput.new(filename: filename)
33
33
  -> do
34
- output.add_row ['aap', 'noot', 'mies']
34
+ output.add_row %w(aap noot mies)
35
35
  end.must_raise(DocParser::MissingHeaderException)
36
36
  end
37
37
  end
@@ -41,9 +41,9 @@ describe DocParser::YAMLOutput do
41
41
  filename = File.join(dir, 'test.csv')
42
42
  output = DocParser::YAMLOutput.new(filename: filename)
43
43
  output.header = 'test', 'the', 'header'
44
- output.add_row ['a', 'b', 'c']
45
- output.add_row ['aap', 'noot', 'mies"']
46
- output.add_row ['aap', 'noot'] # testing empty column
44
+ output.add_row %w(a b c)
45
+ output.add_row %w(aap noot mies")
46
+ output.add_row %w(aap noot) # testing empty column
47
47
  output.close
48
48
  open(filename).read.must_equal <<-YAMLEND
49
49
  ---
@@ -68,8 +68,8 @@ YAMLEND
68
68
  output = DocParser::YAMLOutput.new(filename: filename)
69
69
  output.header = 'test', 'the', 'header'
70
70
  output.rowcount.must_equal 0
71
- output.add_row ['aap', 'noot', 'mies']
72
- output.add_row ['aap', 'noot', 'mies']
71
+ output.add_row %w(aap noot mies)
72
+ output.add_row %w(aap noot mies)
73
73
  output.rowcount.must_equal 2
74
74
  end
75
75
  end
@@ -32,7 +32,7 @@ describe DocParser::Output do
32
32
  output.header = 'test', 'the', 'header'
33
33
  end
34
34
  header = output.instance_variable_get(:@header)
35
- header.must_equal ['test', 'the', 'header']
35
+ header.must_equal %w(test the header)
36
36
  $method_id.must_equal :header
37
37
  end
38
38
  end
@@ -75,7 +75,6 @@ describe DocParser::Output do
75
75
  end
76
76
 
77
77
  it 'should raise a NotImplementedError on write_row' do
78
-
79
78
  Dir.mktmpdir do |dir|
80
79
  filename = File.join(dir, 'test.csv')
81
80
  output = DocParser::Output.new(filename: filename)
@@ -162,7 +162,6 @@ describe DocParser::Parser do
162
162
  mock_output2.verify.must_equal true
163
163
  end
164
164
 
165
-
166
165
  it 'should support parallel processing' do
167
166
  mock_output = SimpleMock.new DocParser::NilOutput.new
168
167
  mock_output.expect :close, nil
@@ -193,5 +192,4 @@ describe DocParser::Parser do
193
192
  $method_id.must_equal :fork
194
193
  mock_output.verify.must_equal true
195
194
  end
196
-
197
195
  end
data/test/test_helper.rb CHANGED
@@ -5,7 +5,7 @@ SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
5
5
  Coveralls::SimpleCov::Formatter
6
6
  ]
7
7
  SimpleCov.start do
8
- #add_filter '/test/'
8
+ # add_filter '/test/'
9
9
  end
10
10
  require 'minitest/autorun'
11
11
  require 'minitest/pride'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: docparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jurriaan Pruis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-04-28 00:00:00.000000000 Z
11
+ date: 2013-05-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -81,19 +81,19 @@ dependencies:
81
81
  - !ruby/object:Gem::Version
82
82
  version: 0.0.3
83
83
  - !ruby/object:Gem::Dependency
84
- name: json
84
+ name: multi_json
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - ~>
88
88
  - !ruby/object:Gem::Version
89
- version: 1.7.7
89
+ version: '1.7'
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - ~>
95
95
  - !ruby/object:Gem::Version
96
- version: 1.7.7
96
+ version: '1.7'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: log4r
99
99
  requirement: !ruby/object:Gem::Requirement
@@ -127,7 +127,9 @@ email:
127
127
  - email@jurriaanpruis.nl
128
128
  executables: []
129
129
  extensions: []
130
- extra_rdoc_files: []
130
+ extra_rdoc_files:
131
+ - README.md
132
+ - LICENSE
131
133
  files:
132
134
  - .coveralls.yml
133
135
  - .gitignore
@@ -153,6 +155,7 @@ files:
153
155
  - lib/docparser/output/yaml_output.rb
154
156
  - lib/docparser/parser.rb
155
157
  - lib/docparser/version.rb
158
+ - test/.rubocop.yml
156
159
  - test/lib/docparser/blackbox_test.rb
157
160
  - test/lib/docparser/document_test.rb
158
161
  - test/lib/docparser/logging_test.rb
@@ -238,6 +241,7 @@ signing_key:
238
241
  specification_version: 4
239
242
  summary: DocParser is a Ruby Gem for webscraping
240
243
  test_files:
244
+ - test/.rubocop.yml
241
245
  - test/lib/docparser/blackbox_test.rb
242
246
  - test/lib/docparser/document_test.rb
243
247
  - test/lib/docparser/logging_test.rb