docparser 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: edbb546c57582191dd0dbb7acc77a4451bdfd9a0
4
- data.tar.gz: 6f5e932a6cbaaafc93d9d7abb35ed647055efd46
3
+ metadata.gz: 907927469491015367a9b5ba12ff4a8122495428
4
+ data.tar.gz: 5c842a24a58026c8296d61ca95d921f9ab20ccf9
5
5
  SHA512:
6
- metadata.gz: 433c99cae004c59432928aceb5d3f75e3fc5a9a33eadbd9fb6c7ae7cb359678eaa938304dfce8d4627e56f159ede28af2671e46d33c5357e5e33e7d51a045157
7
- data.tar.gz: 89748f88e7ac14f99f75fdccbfcae36e439bdcd1a92e57baab761b1c27856503077a437ee30ea7844b755d3d4afb3e113ca24dd83915779c97124cd808e03bef
6
+ metadata.gz: 126b0563186b5f7dc9b94a55ee576d3f07818119056c99bd8dd938f940cb5c19b942cdb380ad9f2dc0367383b4e30cf42b8a2468cb9cad734f5cd716e92ce192
7
+ data.tar.gz: 7abef08de7561f3e8486141c311655bf8f13e1d4c6a658b9a9919c56f0d23fc48c071b6df14211f19f0d3987018d97b739065c01bb5fed267c38f3e86292071e
data/Gemfile CHANGED
@@ -6,7 +6,7 @@ source 'https://rubygems.org'
6
6
  group :test do
7
7
  gem 'coveralls', require: false
8
8
  gem 'rake'
9
- gem 'rubocop'
9
+ gem 'rubocop', git: 'git://github.com/bbatsov/rubocop.git', ref: '011ca3536c0a'
10
10
  gem 'simplecov', require: false
11
11
  gem 'simple_mock'
12
12
  end
data/Rakefile CHANGED
@@ -1,5 +1,8 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
2
2
  require 'rake/testtask'
3
+ require 'rubocop'
4
+ require 'yard'
5
+ YARD::Rake::YardocTask.new
3
6
 
4
7
  Rake::TestTask.new do |t|
5
8
  t.libs << 'lib/docparser'
@@ -10,7 +13,10 @@ end
10
13
  task test: :rubocop
11
14
 
12
15
  task :rubocop do
13
- sh 'rubocop'
16
+ puts "Running Rubocop #{Rubocop::Version::STRING}"
17
+ args = FileList['**/*.rb', 'Rakefile', 'docparser.gemspec']
18
+ cli = Rubocop::CLI.new
19
+ fail unless cli.run(args) == 0
14
20
  end
15
21
 
16
- task :default => :test
22
+ task default: :test
data/docparser.gemspec CHANGED
@@ -3,27 +3,28 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
  require 'docparser/version'
4
4
 
5
5
  Gem::Specification.new do |spec|
6
- spec.name = "docparser"
6
+ spec.name = 'docparser'
7
7
  spec.version = DocParser::VERSION
8
- spec.authors = ["Jurriaan Pruis"]
9
- spec.email = ["email@jurriaanpruis.nl"]
10
- spec.description = %q{DocParser is a Ruby Gem for webscraping}
11
- spec.summary = %q{DocParser is a Ruby Gem for webscraping}
12
- spec.homepage = "https://github.com/jurriaan/docparser"
13
- spec.license = "MIT"
8
+ spec.authors = ['Jurriaan Pruis']
9
+ spec.email = ['email@jurriaanpruis.nl']
10
+ spec.description = 'DocParser is a Ruby Gem for webscraping'
11
+ spec.summary = 'DocParser is a Ruby Gem for webscraping'
12
+ spec.homepage = 'https://github.com/jurriaan/docparser'
13
+ spec.license = 'MIT'
14
14
  spec.platform = Gem::Platform::RUBY
15
15
 
16
- spec.files = `git ls-files`.split($/)
17
- spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
- spec.require_paths = ["lib"]
16
+ spec.files = `git ls-files`.split($RS)
17
+ spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(/^(test|spec|features)\//)
19
+ spec.require_paths = ['lib']
20
+ spec.extra_rdoc_files = ['README.md', 'LICENSE']
20
21
 
21
22
  spec.add_runtime_dependency 'nokogiri', '~> 1.5.9'
22
23
  spec.add_runtime_dependency 'parallel', '~> 0.6.4'
23
24
  spec.add_runtime_dependency 'axlsx', '~> 1.3.6'
24
25
  spec.add_runtime_dependency 'terminal-table', '~> 1.4.5'
25
26
  spec.add_runtime_dependency 'pageme', '~> 0.0.3'
26
- spec.add_runtime_dependency 'json', '~> 1.7.7'
27
+ spec.add_runtime_dependency 'multi_json', '~> 1.7'
27
28
  spec.add_runtime_dependency 'log4r', '~> 1.1.10'
28
29
 
29
30
  spec.add_development_dependency 'yard'
data/example.rb CHANGED
@@ -4,7 +4,6 @@
4
4
  #
5
5
  $LOAD_PATH.unshift __dir__
6
6
  require File.expand_path('lib/docparser.rb', __dir__)
7
- require 'tmpdir'
8
7
 
9
8
  include DocParser
10
9
  output = MultiOutput.new(filename: 'hackaday')
@@ -5,6 +5,10 @@ module DocParser
5
5
  # @see Output
6
6
  class Document
7
7
  attr_reader :filename, :doc, :encoding, :results
8
+
9
+ # @return [String] the source of the document
10
+ attr_reader :html
11
+
8
12
  def initialize(filename: nil, encoding: 'utf-8', parser: nil)
9
13
  if encoding == 'utf-8'
10
14
  encodingstring = 'r:utf-8'
@@ -37,11 +41,6 @@ module DocParser
37
41
  @title ||= xpath_content('//head/title')
38
42
  end
39
43
 
40
- # @return [String] the source of the document
41
- def html
42
- @html
43
- end
44
-
45
44
  # Executes a xpath query
46
45
  def xpath(query)
47
46
  res = @doc.search(query)
@@ -57,6 +57,6 @@ module DocParser
57
57
  end
58
58
  end
59
59
 
60
- class MissingHeaderException < StandardError
61
- end
60
+ # MissingHeaderException gets thrown if a required header is missing.
61
+ class MissingHeaderException < StandardError; end
62
62
  end
@@ -1,4 +1,4 @@
1
- require 'json'
1
+ require 'multi_json'
2
2
  module DocParser
3
3
  # The JSONOutput class generates a JSON file containing all rows as seperate
4
4
  # Array elements
@@ -25,7 +25,7 @@ module DocParser
25
25
  @doc[@header[counter]] = ''
26
26
  end
27
27
  end
28
- @file << JSON.dump(@doc)
28
+ @file << MultiJson.dump(@doc)
29
29
  end
30
30
 
31
31
  def footer
@@ -1,4 +1,4 @@
1
1
  module DocParser
2
2
  # The current version of DocParser
3
- VERSION = '0.1.2'
3
+ VERSION = '0.1.3'
4
4
  end
data/test/.rubocop.yml ADDED
@@ -0,0 +1,3 @@
1
+ # Do not introduce global variables.
2
+ AvoidGlobalVars:
3
+ Enabled: false
@@ -110,7 +110,7 @@ describe DocParser::Document do
110
110
  it 'should add the row to the results' do
111
111
  @test_doc.add_row ['test']
112
112
  @test_doc.add_row 'test', 'test2'
113
- @test_doc.results.must_equal [[['test'], ['test', 'test2']]]
113
+ @test_doc.results.must_equal [[%w(test), %w(test test2)]]
114
114
  end
115
115
 
116
116
  it 'should be possible to not use outputs' do
@@ -30,7 +30,7 @@ describe DocParser::CSVOutput do
30
30
  Dir.mktmpdir do |dir|
31
31
  filename = File.join(dir, 'test.csv')
32
32
  output = DocParser::CSVOutput.new(filename: filename)
33
- output.add_row ['aap', 'noot', 'mies']
33
+ output.add_row %w(aap noot mies)
34
34
  output.add_row ['aap', 'noot', 'mies;']
35
35
  output.close
36
36
  open(filename).read.must_equal "aap;noot;mies\naap;noot;\"mies;\"\n"
@@ -43,8 +43,8 @@ describe DocParser::CSVOutput do
43
43
  output = DocParser::CSVOutput.new(filename: filename)
44
44
  output.header = 'test', 'the', 'header'
45
45
  output.rowcount.must_equal 0
46
- output.add_row ['aap', 'noot', 'mies']
47
- output.add_row ['aap', 'noot', 'mies']
46
+ output.add_row %w(aap noot mies)
47
+ output.add_row %w(aap noot mies)
48
48
  output.rowcount.must_equal 2
49
49
  end
50
50
  end
@@ -22,8 +22,8 @@ describe DocParser::HTMLOutput do
22
22
  output = DocParser::HTMLOutput.new(filename: filename)
23
23
  output.header = 'test', 'the', 'header'
24
24
  output.close
25
- open(filename).read.must_include '<thead><tr><th>test</th><th>the</th>'\
26
- '<th>header</th></tr></thead>'
25
+ open(filename).read.must_include '<thead><tr><th>test</th><th>the</th>
26
+ <th>header</th></tr></thead>'.gsub(/\s+/, '')
27
27
  end
28
28
  end
29
29
 
@@ -31,7 +31,7 @@ describe DocParser::HTMLOutput do
31
31
  Dir.mktmpdir do |dir|
32
32
  filename = File.join(dir, 'test.html')
33
33
  output = DocParser::HTMLOutput.new(filename: filename)
34
- output.add_row ['aap', 'noot', 'mies']
34
+ output.add_row %w(aap noot mies)
35
35
  output.add_row ['aap', 'noot', 'mies;']
36
36
  output.close
37
37
  html = open(filename).read
@@ -47,8 +47,8 @@ describe DocParser::HTMLOutput do
47
47
  output = DocParser::HTMLOutput.new(filename: filename)
48
48
  output.header = 'test', 'the', 'header'
49
49
  output.rowcount.must_equal 0
50
- output.add_row ['aap', 'noot', 'mies']
51
- output.add_row ['aap', 'noot', 'mies']
50
+ output.add_row %w(aap noot mies)
51
+ output.add_row %w(aap noot mies)
52
52
  output.rowcount.must_equal 2
53
53
  output.close
54
54
  open(filename).read.must_include('<p>2 rows</p>')
@@ -31,7 +31,7 @@ describe DocParser::JSONOutput do
31
31
  filename = File.join(dir, 'test.json')
32
32
  output = DocParser::JSONOutput.new(filename: filename)
33
33
  -> do
34
- output.add_row ['aap', 'noot', 'mies']
34
+ output.add_row %w(aap noot mies)
35
35
  end.must_raise(DocParser::MissingHeaderException)
36
36
  end
37
37
  end
@@ -41,13 +41,14 @@ describe DocParser::JSONOutput do
41
41
  filename = File.join(dir, 'test.json')
42
42
  output = DocParser::JSONOutput.new(filename: filename)
43
43
  output.header = 'test', 'the', 'header'
44
- output.add_row ['a', 'b', 'c']
45
- output.add_row ['aap', 'noot', 'mies"']
46
- output.add_row ['aap', 'noot'] # testing empty column
44
+ output.add_row %w(a b c)
45
+ output.add_row %w(aap noot mies")
46
+ output.add_row %w(aap noot) # testing empty column
47
47
  output.close
48
- open(filename).read.must_equal '[{"test":"a","the":"b","header":"c"}'\
49
- ',{"test":"aap","the":"noot","header":"mies\""}'\
50
- ',{"test":"aap","the":"noot","header":""}]'
48
+ expected = '[{"test":"a","the":"b","header":"c"},
49
+ {"test":"aap","the":"noot","header":"mies\""},
50
+ {"test":"aap","the":"noot","header":""}]'.gsub(/\s+/, '')
51
+ open(filename).read.must_equal expected
51
52
  end
52
53
  end
53
54
 
@@ -57,8 +58,8 @@ describe DocParser::JSONOutput do
57
58
  output = DocParser::JSONOutput.new(filename: filename)
58
59
  output.header = 'test', 'the', 'header'
59
60
  output.rowcount.must_equal 0
60
- output.add_row ['aap', 'noot', 'mies']
61
- output.add_row ['aap', 'noot', 'mies']
61
+ output.add_row %w(aap noot mies)
62
+ output.add_row %w(aap noot mies)
62
63
  output.rowcount.must_equal 2
63
64
  end
64
65
  end
@@ -36,7 +36,7 @@ describe DocParser::MultiOutput do
36
36
  filename = File.join(dir, 'test')
37
37
  output = DocParser::MultiOutput.new(filename: filename)
38
38
  -> do
39
- output.add_row ['aap', 'noot', 'mies']
39
+ output.add_row %w(aap noot mies)
40
40
  end.must_raise(DocParser::MissingHeaderException)
41
41
  end
42
42
  end
@@ -47,8 +47,8 @@ describe DocParser::MultiOutput do
47
47
  output = DocParser::MultiOutput.new(filename: filename)
48
48
  output.header = 'test', 'the', 'header'
49
49
  output.rowcount.must_equal 0
50
- output.add_row ['aap', 'noot', 'mies']
51
- output.add_row ['aap', 'noot', 'mies']
50
+ output.add_row %w(aap noot mies)
51
+ output.add_row %w(aap noot mies)
52
52
  output.rowcount.must_equal 2
53
53
  end
54
54
  end
@@ -57,7 +57,7 @@ describe DocParser::MultiOutput do
57
57
  Dir.mktmpdir do |dir|
58
58
  filename = File.join(dir, 'test')
59
59
  output = DocParser::MultiOutput.new(filename: filename)
60
- methods = [:add_row, :header=, :close]
60
+ methods = %i(add_row header= close)
61
61
  outputs = output.instance_variable_get(:@outputs)
62
62
  outputs.map! do |o|
63
63
  SimpleMock.new o
@@ -20,8 +20,8 @@ describe DocParser::NilOutput do
20
20
  output = DocParser::NilOutput.new
21
21
  output.header = 'test', 'the', 'header'
22
22
  output.rowcount.must_equal 0
23
- output.add_row ['aap', 'noot', 'mies']
24
- output.add_row ['aap', 'noot', 'mies']
23
+ output.add_row %w(aap noot mies)
24
+ output.add_row %w(aap noot mies)
25
25
  output.rowcount.must_equal 0
26
26
  end
27
27
  end
@@ -20,19 +20,18 @@ describe DocParser::ScreenOutput do
20
20
  output = DocParser::ScreenOutput.new
21
21
  output.header = 'test', 'the', 'header'
22
22
  output.rowcount.must_equal 0
23
- output.add_row ['aap', 'noot', 'mies']
24
- output.add_row ['aap', 'noot', 'mies']
23
+ output.add_row %w(aap noot mies)
24
+ output.add_row %w(aap noot mies)
25
25
  output.rowcount.must_equal 2
26
26
  end
27
27
 
28
28
  it 'must have a header' do
29
29
  output = DocParser::ScreenOutput.new
30
30
  -> do
31
- output.add_row ['aap', 'noot', 'mies']
31
+ output.add_row %w(aap noot mies)
32
32
  end.must_raise(DocParser::MissingHeaderException)
33
33
  end
34
34
 
35
-
36
35
  it 'must output the data after close' do
37
36
  $out = StringIO.new
38
37
  output = Class.new DocParser::ScreenOutput do
@@ -43,8 +42,8 @@ describe DocParser::ScreenOutput do
43
42
  end
44
43
  end.new
45
44
  output.header = 'test', 'the', 'header'
46
- output.add_row ['aap1', '', 'mies']
47
- output.add_row ['aap2', 'mies1']
45
+ output.add_row ['aap1' , '', 'mies']
46
+ output.add_row %w(aap2 mies1)
48
47
  output.close
49
48
  out = $out.string
50
49
  out.must_include 'header'
@@ -31,7 +31,7 @@ describe DocParser::XLSXOutput do
31
31
  Dir.mktmpdir do |dir|
32
32
  filename = File.join(dir, 'test.xlsx')
33
33
  output = DocParser::XLSXOutput.new(filename: filename)
34
- output.add_row ['aap', 'noot', 'mies']
34
+ output.add_row %w(aap noot mies)
35
35
  output.add_row ['aap', 'noot', 'mies;']
36
36
  output.close
37
37
  sheet = output.instance_variable_get(:@sheet)
@@ -45,8 +45,8 @@ describe DocParser::XLSXOutput do
45
45
  output = DocParser::XLSXOutput.new(filename: filename)
46
46
  output.header = 'test', 'the', 'header'
47
47
  output.rowcount.must_equal 0
48
- output.add_row ['aap', 'noot', 'mies']
49
- output.add_row ['aap', 'noot', 'mies']
48
+ output.add_row %w(aap noot mies)
49
+ output.add_row %w(aap noot mies)
50
50
  output.rowcount.must_equal 2
51
51
  end
52
52
  end
@@ -31,7 +31,7 @@ describe DocParser::YAMLOutput do
31
31
  filename = File.join(dir, 'test.yml')
32
32
  output = DocParser::YAMLOutput.new(filename: filename)
33
33
  -> do
34
- output.add_row ['aap', 'noot', 'mies']
34
+ output.add_row %w(aap noot mies)
35
35
  end.must_raise(DocParser::MissingHeaderException)
36
36
  end
37
37
  end
@@ -41,9 +41,9 @@ describe DocParser::YAMLOutput do
41
41
  filename = File.join(dir, 'test.csv')
42
42
  output = DocParser::YAMLOutput.new(filename: filename)
43
43
  output.header = 'test', 'the', 'header'
44
- output.add_row ['a', 'b', 'c']
45
- output.add_row ['aap', 'noot', 'mies"']
46
- output.add_row ['aap', 'noot'] # testing empty column
44
+ output.add_row %w(a b c)
45
+ output.add_row %w(aap noot mies")
46
+ output.add_row %w(aap noot) # testing empty column
47
47
  output.close
48
48
  open(filename).read.must_equal <<-YAMLEND
49
49
  ---
@@ -68,8 +68,8 @@ YAMLEND
68
68
  output = DocParser::YAMLOutput.new(filename: filename)
69
69
  output.header = 'test', 'the', 'header'
70
70
  output.rowcount.must_equal 0
71
- output.add_row ['aap', 'noot', 'mies']
72
- output.add_row ['aap', 'noot', 'mies']
71
+ output.add_row %w(aap noot mies)
72
+ output.add_row %w(aap noot mies)
73
73
  output.rowcount.must_equal 2
74
74
  end
75
75
  end
@@ -32,7 +32,7 @@ describe DocParser::Output do
32
32
  output.header = 'test', 'the', 'header'
33
33
  end
34
34
  header = output.instance_variable_get(:@header)
35
- header.must_equal ['test', 'the', 'header']
35
+ header.must_equal %w(test the header)
36
36
  $method_id.must_equal :header
37
37
  end
38
38
  end
@@ -75,7 +75,6 @@ describe DocParser::Output do
75
75
  end
76
76
 
77
77
  it 'should raise a NotImplementedError on write_row' do
78
-
79
78
  Dir.mktmpdir do |dir|
80
79
  filename = File.join(dir, 'test.csv')
81
80
  output = DocParser::Output.new(filename: filename)
@@ -162,7 +162,6 @@ describe DocParser::Parser do
162
162
  mock_output2.verify.must_equal true
163
163
  end
164
164
 
165
-
166
165
  it 'should support parallel processing' do
167
166
  mock_output = SimpleMock.new DocParser::NilOutput.new
168
167
  mock_output.expect :close, nil
@@ -193,5 +192,4 @@ describe DocParser::Parser do
193
192
  $method_id.must_equal :fork
194
193
  mock_output.verify.must_equal true
195
194
  end
196
-
197
195
  end
data/test/test_helper.rb CHANGED
@@ -5,7 +5,7 @@ SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
5
5
  Coveralls::SimpleCov::Formatter
6
6
  ]
7
7
  SimpleCov.start do
8
- #add_filter '/test/'
8
+ # add_filter '/test/'
9
9
  end
10
10
  require 'minitest/autorun'
11
11
  require 'minitest/pride'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: docparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jurriaan Pruis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-04-28 00:00:00.000000000 Z
11
+ date: 2013-05-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -81,19 +81,19 @@ dependencies:
81
81
  - !ruby/object:Gem::Version
82
82
  version: 0.0.3
83
83
  - !ruby/object:Gem::Dependency
84
- name: json
84
+ name: multi_json
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - ~>
88
88
  - !ruby/object:Gem::Version
89
- version: 1.7.7
89
+ version: '1.7'
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - ~>
95
95
  - !ruby/object:Gem::Version
96
- version: 1.7.7
96
+ version: '1.7'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: log4r
99
99
  requirement: !ruby/object:Gem::Requirement
@@ -127,7 +127,9 @@ email:
127
127
  - email@jurriaanpruis.nl
128
128
  executables: []
129
129
  extensions: []
130
- extra_rdoc_files: []
130
+ extra_rdoc_files:
131
+ - README.md
132
+ - LICENSE
131
133
  files:
132
134
  - .coveralls.yml
133
135
  - .gitignore
@@ -153,6 +155,7 @@ files:
153
155
  - lib/docparser/output/yaml_output.rb
154
156
  - lib/docparser/parser.rb
155
157
  - lib/docparser/version.rb
158
+ - test/.rubocop.yml
156
159
  - test/lib/docparser/blackbox_test.rb
157
160
  - test/lib/docparser/document_test.rb
158
161
  - test/lib/docparser/logging_test.rb
@@ -238,6 +241,7 @@ signing_key:
238
241
  specification_version: 4
239
242
  summary: DocParser is a Ruby Gem for webscraping
240
243
  test_files:
244
+ - test/.rubocop.yml
241
245
  - test/lib/docparser/blackbox_test.rb
242
246
  - test/lib/docparser/document_test.rb
243
247
  - test/lib/docparser/logging_test.rb