docparser 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/Gemfile +1 -1
 - data/Rakefile +9 -3
 - data/docparser.gemspec +13 -12
 - data/example.rb +0 -1
 - data/lib/docparser/document.rb +4 -5
 - data/lib/docparser/output.rb +2 -2
 - data/lib/docparser/output/json_output.rb +2 -2
 - data/lib/docparser/version.rb +1 -1
 - data/test/.rubocop.yml +3 -0
 - data/test/lib/docparser/document_test.rb +1 -1
 - data/test/lib/docparser/output/csv_output_test.rb +3 -3
 - data/test/lib/docparser/output/html_output_test.rb +5 -5
 - data/test/lib/docparser/output/json_output_test.rb +10 -9
 - data/test/lib/docparser/output/multi_output_test.rb +4 -4
 - data/test/lib/docparser/output/nil_output_test.rb +2 -2
 - data/test/lib/docparser/output/screen_output_test.rb +5 -6
 - data/test/lib/docparser/output/xlsx_output_test.rb +3 -3
 - data/test/lib/docparser/output/yaml_output_test.rb +6 -6
 - data/test/lib/docparser/output_test.rb +1 -2
 - data/test/lib/docparser/parser_test.rb +0 -2
 - data/test/test_helper.rb +1 -1
 - metadata +10 -6
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 907927469491015367a9b5ba12ff4a8122495428
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 5c842a24a58026c8296d61ca95d921f9ab20ccf9
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 126b0563186b5f7dc9b94a55ee576d3f07818119056c99bd8dd938f940cb5c19b942cdb380ad9f2dc0367383b4e30cf42b8a2468cb9cad734f5cd716e92ce192
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 7abef08de7561f3e8486141c311655bf8f13e1d4c6a658b9a9919c56f0d23fc48c071b6df14211f19f0d3987018d97b739065c01bb5fed267c38f3e86292071e
         
     | 
    
        data/Gemfile
    CHANGED
    
    
    
        data/Rakefile
    CHANGED
    
    | 
         @@ -1,5 +1,8 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require  
     | 
| 
      
 1 
     | 
    
         
            +
            require 'bundler/gem_tasks'
         
     | 
| 
       2 
2 
     | 
    
         
             
            require 'rake/testtask'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'rubocop'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'yard'
         
     | 
| 
      
 5 
     | 
    
         
            +
            YARD::Rake::YardocTask.new
         
     | 
| 
       3 
6 
     | 
    
         | 
| 
       4 
7 
     | 
    
         
             
            Rake::TestTask.new do |t|
         
     | 
| 
       5 
8 
     | 
    
         
             
              t.libs << 'lib/docparser'
         
     | 
| 
         @@ -10,7 +13,10 @@ end 
     | 
|
| 
       10 
13 
     | 
    
         
             
            task test: :rubocop
         
     | 
| 
       11 
14 
     | 
    
         | 
| 
       12 
15 
     | 
    
         
             
            task :rubocop do
         
     | 
| 
       13 
     | 
    
         
            -
               
     | 
| 
      
 16 
     | 
    
         
            +
              puts "Running Rubocop #{Rubocop::Version::STRING}"
         
     | 
| 
      
 17 
     | 
    
         
            +
              args = FileList['**/*.rb', 'Rakefile', 'docparser.gemspec']
         
     | 
| 
      
 18 
     | 
    
         
            +
              cli = Rubocop::CLI.new
         
     | 
| 
      
 19 
     | 
    
         
            +
              fail unless cli.run(args) == 0
         
     | 
| 
       14 
20 
     | 
    
         
             
            end
         
     | 
| 
       15 
21 
     | 
    
         | 
| 
       16 
     | 
    
         
            -
            task : 
     | 
| 
      
 22 
     | 
    
         
            +
            task default: :test
         
     | 
    
        data/docparser.gemspec
    CHANGED
    
    | 
         @@ -3,27 +3,28 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 
     | 
|
| 
       3 
3 
     | 
    
         
             
            require 'docparser/version'
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
       5 
5 
     | 
    
         
             
            Gem::Specification.new do |spec|
         
     | 
| 
       6 
     | 
    
         
            -
              spec.name          =  
     | 
| 
      
 6 
     | 
    
         
            +
              spec.name          = 'docparser'
         
     | 
| 
       7 
7 
     | 
    
         
             
              spec.version       = DocParser::VERSION
         
     | 
| 
       8 
     | 
    
         
            -
              spec.authors       = [ 
     | 
| 
       9 
     | 
    
         
            -
              spec.email         = [ 
     | 
| 
       10 
     | 
    
         
            -
              spec.description   =  
     | 
| 
       11 
     | 
    
         
            -
              spec.summary       =  
     | 
| 
       12 
     | 
    
         
            -
              spec.homepage      =  
     | 
| 
       13 
     | 
    
         
            -
              spec.license       =  
     | 
| 
      
 8 
     | 
    
         
            +
              spec.authors       = ['Jurriaan Pruis']
         
     | 
| 
      
 9 
     | 
    
         
            +
              spec.email         = ['email@jurriaanpruis.nl']
         
     | 
| 
      
 10 
     | 
    
         
            +
              spec.description   = 'DocParser is a Ruby Gem for webscraping'
         
     | 
| 
      
 11 
     | 
    
         
            +
              spec.summary       = 'DocParser is a Ruby Gem for webscraping'
         
     | 
| 
      
 12 
     | 
    
         
            +
              spec.homepage      = 'https://github.com/jurriaan/docparser'
         
     | 
| 
      
 13 
     | 
    
         
            +
              spec.license       = 'MIT'
         
     | 
| 
       14 
14 
     | 
    
         
             
              spec.platform      = Gem::Platform::RUBY
         
     | 
| 
       15 
15 
     | 
    
         | 
| 
       16 
     | 
    
         
            -
              spec.files         = `git ls-files`.split( 
     | 
| 
       17 
     | 
    
         
            -
              spec.executables   = spec.files.grep( 
     | 
| 
       18 
     | 
    
         
            -
              spec.test_files    = spec.files.grep( 
     | 
| 
       19 
     | 
    
         
            -
              spec.require_paths = [ 
     | 
| 
      
 16 
     | 
    
         
            +
              spec.files         = `git ls-files`.split($RS)
         
     | 
| 
      
 17 
     | 
    
         
            +
              spec.executables   = spec.files.grep(/^bin\//) { |f| File.basename(f) }
         
     | 
| 
      
 18 
     | 
    
         
            +
              spec.test_files    = spec.files.grep(/^(test|spec|features)\//)
         
     | 
| 
      
 19 
     | 
    
         
            +
              spec.require_paths = ['lib']
         
     | 
| 
      
 20 
     | 
    
         
            +
              spec.extra_rdoc_files = ['README.md', 'LICENSE']
         
     | 
| 
       20 
21 
     | 
    
         | 
| 
       21 
22 
     | 
    
         
             
              spec.add_runtime_dependency 'nokogiri', '~> 1.5.9'
         
     | 
| 
       22 
23 
     | 
    
         
             
              spec.add_runtime_dependency 'parallel', '~> 0.6.4'
         
     | 
| 
       23 
24 
     | 
    
         
             
              spec.add_runtime_dependency 'axlsx', '~> 1.3.6'
         
     | 
| 
       24 
25 
     | 
    
         
             
              spec.add_runtime_dependency 'terminal-table', '~> 1.4.5'
         
     | 
| 
       25 
26 
     | 
    
         
             
              spec.add_runtime_dependency 'pageme', '~> 0.0.3'
         
     | 
| 
       26 
     | 
    
         
            -
              spec.add_runtime_dependency ' 
     | 
| 
      
 27 
     | 
    
         
            +
              spec.add_runtime_dependency 'multi_json', '~> 1.7'
         
     | 
| 
       27 
28 
     | 
    
         
             
              spec.add_runtime_dependency 'log4r', '~> 1.1.10'
         
     | 
| 
       28 
29 
     | 
    
         | 
| 
       29 
30 
     | 
    
         
             
              spec.add_development_dependency 'yard'
         
     | 
    
        data/example.rb
    CHANGED
    
    
    
        data/lib/docparser/document.rb
    CHANGED
    
    | 
         @@ -5,6 +5,10 @@ module DocParser 
     | 
|
| 
       5 
5 
     | 
    
         
             
              # @see Output
         
     | 
| 
       6 
6 
     | 
    
         
             
              class Document
         
     | 
| 
       7 
7 
     | 
    
         
             
                attr_reader :filename, :doc, :encoding, :results
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                # @return [String] the source of the document
         
     | 
| 
      
 10 
     | 
    
         
            +
                attr_reader :html
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
       8 
12 
     | 
    
         
             
                def initialize(filename: nil, encoding: 'utf-8', parser: nil)
         
     | 
| 
       9 
13 
     | 
    
         
             
                  if encoding == 'utf-8'
         
     | 
| 
       10 
14 
     | 
    
         
             
                    encodingstring = 'r:utf-8'
         
     | 
| 
         @@ -37,11 +41,6 @@ module DocParser 
     | 
|
| 
       37 
41 
     | 
    
         
             
                  @title ||= xpath_content('//head/title')
         
     | 
| 
       38 
42 
     | 
    
         
             
                end
         
     | 
| 
       39 
43 
     | 
    
         | 
| 
       40 
     | 
    
         
            -
                # @return [String] the source of the document
         
     | 
| 
       41 
     | 
    
         
            -
                def html
         
     | 
| 
       42 
     | 
    
         
            -
                  @html
         
     | 
| 
       43 
     | 
    
         
            -
                end
         
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
       45 
44 
     | 
    
         
             
                # Executes a xpath query
         
     | 
| 
       46 
45 
     | 
    
         
             
                def xpath(query)
         
     | 
| 
       47 
46 
     | 
    
         
             
                  res = @doc.search(query)
         
     | 
    
        data/lib/docparser/output.rb
    CHANGED
    
    
| 
         @@ -1,4 +1,4 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require ' 
     | 
| 
      
 1 
     | 
    
         
            +
            require 'multi_json'
         
     | 
| 
       2 
2 
     | 
    
         
             
            module DocParser
         
     | 
| 
       3 
3 
     | 
    
         
             
              # The JSONOutput class generates a JSON file containing all rows as seperate
         
     | 
| 
       4 
4 
     | 
    
         
             
              # Array elements
         
     | 
| 
         @@ -25,7 +25,7 @@ module DocParser 
     | 
|
| 
       25 
25 
     | 
    
         
             
                      @doc[@header[counter]] = ''
         
     | 
| 
       26 
26 
     | 
    
         
             
                    end
         
     | 
| 
       27 
27 
     | 
    
         
             
                  end
         
     | 
| 
       28 
     | 
    
         
            -
                  @file <<  
     | 
| 
      
 28 
     | 
    
         
            +
                  @file << MultiJson.dump(@doc)
         
     | 
| 
       29 
29 
     | 
    
         
             
                end
         
     | 
| 
       30 
30 
     | 
    
         | 
| 
       31 
31 
     | 
    
         
             
                def footer
         
     | 
    
        data/lib/docparser/version.rb
    CHANGED
    
    
    
        data/test/.rubocop.yml
    ADDED
    
    
| 
         @@ -110,7 +110,7 @@ describe DocParser::Document do 
     | 
|
| 
       110 
110 
     | 
    
         
             
              it 'should add the row to the results' do
         
     | 
| 
       111 
111 
     | 
    
         
             
                @test_doc.add_row ['test']
         
     | 
| 
       112 
112 
     | 
    
         
             
                @test_doc.add_row 'test', 'test2'
         
     | 
| 
       113 
     | 
    
         
            -
                @test_doc.results.must_equal [[ 
     | 
| 
      
 113 
     | 
    
         
            +
                @test_doc.results.must_equal [[%w(test), %w(test test2)]]
         
     | 
| 
       114 
114 
     | 
    
         
             
              end
         
     | 
| 
       115 
115 
     | 
    
         | 
| 
       116 
116 
     | 
    
         
             
              it 'should be possible to not use outputs' do
         
     | 
| 
         @@ -30,7 +30,7 @@ describe DocParser::CSVOutput do 
     | 
|
| 
       30 
30 
     | 
    
         
             
                Dir.mktmpdir do |dir|
         
     | 
| 
       31 
31 
     | 
    
         
             
                  filename = File.join(dir, 'test.csv')
         
     | 
| 
       32 
32 
     | 
    
         
             
                  output = DocParser::CSVOutput.new(filename: filename)
         
     | 
| 
       33 
     | 
    
         
            -
                  output.add_row  
     | 
| 
      
 33 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
       34 
34 
     | 
    
         
             
                  output.add_row ['aap', 'noot', 'mies;']
         
     | 
| 
       35 
35 
     | 
    
         
             
                  output.close
         
     | 
| 
       36 
36 
     | 
    
         
             
                  open(filename).read.must_equal "aap;noot;mies\naap;noot;\"mies;\"\n"
         
     | 
| 
         @@ -43,8 +43,8 @@ describe DocParser::CSVOutput do 
     | 
|
| 
       43 
43 
     | 
    
         
             
                  output = DocParser::CSVOutput.new(filename: filename)
         
     | 
| 
       44 
44 
     | 
    
         
             
                  output.header = 'test', 'the', 'header'
         
     | 
| 
       45 
45 
     | 
    
         
             
                  output.rowcount.must_equal 0
         
     | 
| 
       46 
     | 
    
         
            -
                  output.add_row  
     | 
| 
       47 
     | 
    
         
            -
                  output.add_row  
     | 
| 
      
 46 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
      
 47 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
       48 
48 
     | 
    
         
             
                  output.rowcount.must_equal 2
         
     | 
| 
       49 
49 
     | 
    
         
             
                end
         
     | 
| 
       50 
50 
     | 
    
         
             
              end
         
     | 
| 
         @@ -22,8 +22,8 @@ describe DocParser::HTMLOutput do 
     | 
|
| 
       22 
22 
     | 
    
         
             
                  output = DocParser::HTMLOutput.new(filename: filename)
         
     | 
| 
       23 
23 
     | 
    
         
             
                  output.header = 'test', 'the', 'header'
         
     | 
| 
       24 
24 
     | 
    
         
             
                  output.close
         
     | 
| 
       25 
     | 
    
         
            -
                  open(filename).read.must_include '<thead><tr><th>test</th><th>the</th> 
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
      
 25 
     | 
    
         
            +
                  open(filename).read.must_include '<thead><tr><th>test</th><th>the</th>
         
     | 
| 
      
 26 
     | 
    
         
            +
                  <th>header</th></tr></thead>'.gsub(/\s+/, '')
         
     | 
| 
       27 
27 
     | 
    
         
             
                end
         
     | 
| 
       28 
28 
     | 
    
         
             
              end
         
     | 
| 
       29 
29 
     | 
    
         | 
| 
         @@ -31,7 +31,7 @@ describe DocParser::HTMLOutput do 
     | 
|
| 
       31 
31 
     | 
    
         
             
                Dir.mktmpdir do |dir|
         
     | 
| 
       32 
32 
     | 
    
         
             
                  filename = File.join(dir, 'test.html')
         
     | 
| 
       33 
33 
     | 
    
         
             
                  output = DocParser::HTMLOutput.new(filename: filename)
         
     | 
| 
       34 
     | 
    
         
            -
                  output.add_row  
     | 
| 
      
 34 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
       35 
35 
     | 
    
         
             
                  output.add_row ['aap', 'noot', 'mies;']
         
     | 
| 
       36 
36 
     | 
    
         
             
                  output.close
         
     | 
| 
       37 
37 
     | 
    
         
             
                  html = open(filename).read
         
     | 
| 
         @@ -47,8 +47,8 @@ describe DocParser::HTMLOutput do 
     | 
|
| 
       47 
47 
     | 
    
         
             
                  output = DocParser::HTMLOutput.new(filename: filename)
         
     | 
| 
       48 
48 
     | 
    
         
             
                  output.header = 'test', 'the', 'header'
         
     | 
| 
       49 
49 
     | 
    
         
             
                  output.rowcount.must_equal 0
         
     | 
| 
       50 
     | 
    
         
            -
                  output.add_row  
     | 
| 
       51 
     | 
    
         
            -
                  output.add_row  
     | 
| 
      
 50 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
      
 51 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
       52 
52 
     | 
    
         
             
                  output.rowcount.must_equal 2
         
     | 
| 
       53 
53 
     | 
    
         
             
                  output.close
         
     | 
| 
       54 
54 
     | 
    
         
             
                  open(filename).read.must_include('<p>2 rows</p>')
         
     | 
| 
         @@ -31,7 +31,7 @@ describe DocParser::JSONOutput do 
     | 
|
| 
       31 
31 
     | 
    
         
             
                  filename = File.join(dir, 'test.json')
         
     | 
| 
       32 
32 
     | 
    
         
             
                  output = DocParser::JSONOutput.new(filename: filename)
         
     | 
| 
       33 
33 
     | 
    
         
             
                  -> do
         
     | 
| 
       34 
     | 
    
         
            -
                    output.add_row  
     | 
| 
      
 34 
     | 
    
         
            +
                    output.add_row %w(aap noot mies)
         
     | 
| 
       35 
35 
     | 
    
         
             
                  end.must_raise(DocParser::MissingHeaderException)
         
     | 
| 
       36 
36 
     | 
    
         
             
                end
         
     | 
| 
       37 
37 
     | 
    
         
             
              end
         
     | 
| 
         @@ -41,13 +41,14 @@ describe DocParser::JSONOutput do 
     | 
|
| 
       41 
41 
     | 
    
         
             
                  filename = File.join(dir, 'test.json')
         
     | 
| 
       42 
42 
     | 
    
         
             
                  output = DocParser::JSONOutput.new(filename: filename)
         
     | 
| 
       43 
43 
     | 
    
         
             
                  output.header = 'test', 'the', 'header'
         
     | 
| 
       44 
     | 
    
         
            -
                  output.add_row  
     | 
| 
       45 
     | 
    
         
            -
                  output.add_row  
     | 
| 
       46 
     | 
    
         
            -
                  output.add_row  
     | 
| 
      
 44 
     | 
    
         
            +
                  output.add_row %w(a b c)
         
     | 
| 
      
 45 
     | 
    
         
            +
                  output.add_row %w(aap noot mies")
         
     | 
| 
      
 46 
     | 
    
         
            +
                  output.add_row %w(aap noot) # testing empty column
         
     | 
| 
       47 
47 
     | 
    
         
             
                  output.close
         
     | 
| 
       48 
     | 
    
         
            -
                   
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
      
 48 
     | 
    
         
            +
                  expected = '[{"test":"a","the":"b","header":"c"},
         
     | 
| 
      
 49 
     | 
    
         
            +
                    {"test":"aap","the":"noot","header":"mies\""},
         
     | 
| 
      
 50 
     | 
    
         
            +
                    {"test":"aap","the":"noot","header":""}]'.gsub(/\s+/, '')
         
     | 
| 
      
 51 
     | 
    
         
            +
                  open(filename).read.must_equal expected
         
     | 
| 
       51 
52 
     | 
    
         
             
                end
         
     | 
| 
       52 
53 
     | 
    
         
             
              end
         
     | 
| 
       53 
54 
     | 
    
         | 
| 
         @@ -57,8 +58,8 @@ describe DocParser::JSONOutput do 
     | 
|
| 
       57 
58 
     | 
    
         
             
                  output = DocParser::JSONOutput.new(filename: filename)
         
     | 
| 
       58 
59 
     | 
    
         
             
                  output.header = 'test', 'the', 'header'
         
     | 
| 
       59 
60 
     | 
    
         
             
                  output.rowcount.must_equal 0
         
     | 
| 
       60 
     | 
    
         
            -
                  output.add_row  
     | 
| 
       61 
     | 
    
         
            -
                  output.add_row  
     | 
| 
      
 61 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
      
 62 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
       62 
63 
     | 
    
         
             
                  output.rowcount.must_equal 2
         
     | 
| 
       63 
64 
     | 
    
         
             
                end
         
     | 
| 
       64 
65 
     | 
    
         
             
              end
         
     | 
| 
         @@ -36,7 +36,7 @@ describe DocParser::MultiOutput do 
     | 
|
| 
       36 
36 
     | 
    
         
             
                  filename = File.join(dir, 'test')
         
     | 
| 
       37 
37 
     | 
    
         
             
                  output = DocParser::MultiOutput.new(filename: filename)
         
     | 
| 
       38 
38 
     | 
    
         
             
                  -> do
         
     | 
| 
       39 
     | 
    
         
            -
                    output.add_row  
     | 
| 
      
 39 
     | 
    
         
            +
                    output.add_row %w(aap noot mies)
         
     | 
| 
       40 
40 
     | 
    
         
             
                  end.must_raise(DocParser::MissingHeaderException)
         
     | 
| 
       41 
41 
     | 
    
         
             
                end
         
     | 
| 
       42 
42 
     | 
    
         
             
              end
         
     | 
| 
         @@ -47,8 +47,8 @@ describe DocParser::MultiOutput do 
     | 
|
| 
       47 
47 
     | 
    
         
             
                  output = DocParser::MultiOutput.new(filename: filename)
         
     | 
| 
       48 
48 
     | 
    
         
             
                  output.header = 'test', 'the', 'header'
         
     | 
| 
       49 
49 
     | 
    
         
             
                  output.rowcount.must_equal 0
         
     | 
| 
       50 
     | 
    
         
            -
                  output.add_row  
     | 
| 
       51 
     | 
    
         
            -
                  output.add_row  
     | 
| 
      
 50 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
      
 51 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
       52 
52 
     | 
    
         
             
                  output.rowcount.must_equal 2
         
     | 
| 
       53 
53 
     | 
    
         
             
                end
         
     | 
| 
       54 
54 
     | 
    
         
             
              end
         
     | 
| 
         @@ -57,7 +57,7 @@ describe DocParser::MultiOutput do 
     | 
|
| 
       57 
57 
     | 
    
         
             
                Dir.mktmpdir do |dir|
         
     | 
| 
       58 
58 
     | 
    
         
             
                  filename = File.join(dir, 'test')
         
     | 
| 
       59 
59 
     | 
    
         
             
                  output = DocParser::MultiOutput.new(filename: filename)
         
     | 
| 
       60 
     | 
    
         
            -
                  methods =  
     | 
| 
      
 60 
     | 
    
         
            +
                  methods = %i(add_row header= close)
         
     | 
| 
       61 
61 
     | 
    
         
             
                  outputs = output.instance_variable_get(:@outputs)
         
     | 
| 
       62 
62 
     | 
    
         
             
                  outputs.map! do |o|
         
     | 
| 
       63 
63 
     | 
    
         
             
                    SimpleMock.new o
         
     | 
| 
         @@ -20,8 +20,8 @@ describe DocParser::NilOutput do 
     | 
|
| 
       20 
20 
     | 
    
         
             
                output = DocParser::NilOutput.new
         
     | 
| 
       21 
21 
     | 
    
         
             
                output.header = 'test', 'the', 'header'
         
     | 
| 
       22 
22 
     | 
    
         
             
                output.rowcount.must_equal 0
         
     | 
| 
       23 
     | 
    
         
            -
                output.add_row  
     | 
| 
       24 
     | 
    
         
            -
                output.add_row  
     | 
| 
      
 23 
     | 
    
         
            +
                output.add_row %w(aap noot mies)
         
     | 
| 
      
 24 
     | 
    
         
            +
                output.add_row %w(aap noot mies)
         
     | 
| 
       25 
25 
     | 
    
         
             
                output.rowcount.must_equal 0
         
     | 
| 
       26 
26 
     | 
    
         
             
              end
         
     | 
| 
       27 
27 
     | 
    
         
             
            end
         
     | 
| 
         @@ -20,19 +20,18 @@ describe DocParser::ScreenOutput do 
     | 
|
| 
       20 
20 
     | 
    
         
             
                output = DocParser::ScreenOutput.new
         
     | 
| 
       21 
21 
     | 
    
         
             
                output.header = 'test', 'the', 'header'
         
     | 
| 
       22 
22 
     | 
    
         
             
                output.rowcount.must_equal 0
         
     | 
| 
       23 
     | 
    
         
            -
                output.add_row  
     | 
| 
       24 
     | 
    
         
            -
                output.add_row  
     | 
| 
      
 23 
     | 
    
         
            +
                output.add_row %w(aap noot mies)
         
     | 
| 
      
 24 
     | 
    
         
            +
                output.add_row %w(aap noot mies)
         
     | 
| 
       25 
25 
     | 
    
         
             
                output.rowcount.must_equal 2
         
     | 
| 
       26 
26 
     | 
    
         
             
              end
         
     | 
| 
       27 
27 
     | 
    
         | 
| 
       28 
28 
     | 
    
         
             
              it 'must have a header' do
         
     | 
| 
       29 
29 
     | 
    
         
             
                output = DocParser::ScreenOutput.new
         
     | 
| 
       30 
30 
     | 
    
         
             
                -> do
         
     | 
| 
       31 
     | 
    
         
            -
                  output.add_row  
     | 
| 
      
 31 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
       32 
32 
     | 
    
         
             
                end.must_raise(DocParser::MissingHeaderException)
         
     | 
| 
       33 
33 
     | 
    
         
             
              end
         
     | 
| 
       34 
34 
     | 
    
         | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
35 
     | 
    
         
             
              it 'must output the data after close' do
         
     | 
| 
       37 
36 
     | 
    
         
             
                $out = StringIO.new
         
     | 
| 
       38 
37 
     | 
    
         
             
                output = Class.new DocParser::ScreenOutput do
         
     | 
| 
         @@ -43,8 +42,8 @@ describe DocParser::ScreenOutput do 
     | 
|
| 
       43 
42 
     | 
    
         
             
                  end
         
     | 
| 
       44 
43 
     | 
    
         
             
                end.new
         
     | 
| 
       45 
44 
     | 
    
         
             
                output.header = 'test', 'the', 'header'
         
     | 
| 
       46 
     | 
    
         
            -
                output.add_row ['aap1', '', 'mies']
         
     | 
| 
       47 
     | 
    
         
            -
                output.add_row  
     | 
| 
      
 45 
     | 
    
         
            +
                output.add_row ['aap1' , '', 'mies']
         
     | 
| 
      
 46 
     | 
    
         
            +
                output.add_row %w(aap2 mies1)
         
     | 
| 
       48 
47 
     | 
    
         
             
                output.close
         
     | 
| 
       49 
48 
     | 
    
         
             
                out = $out.string
         
     | 
| 
       50 
49 
     | 
    
         
             
                out.must_include 'header'
         
     | 
| 
         @@ -31,7 +31,7 @@ describe DocParser::XLSXOutput do 
     | 
|
| 
       31 
31 
     | 
    
         
             
                Dir.mktmpdir do |dir|
         
     | 
| 
       32 
32 
     | 
    
         
             
                  filename = File.join(dir, 'test.xlsx')
         
     | 
| 
       33 
33 
     | 
    
         
             
                  output = DocParser::XLSXOutput.new(filename: filename)
         
     | 
| 
       34 
     | 
    
         
            -
                  output.add_row  
     | 
| 
      
 34 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
       35 
35 
     | 
    
         
             
                  output.add_row ['aap', 'noot', 'mies;']
         
     | 
| 
       36 
36 
     | 
    
         
             
                  output.close
         
     | 
| 
       37 
37 
     | 
    
         
             
                  sheet = output.instance_variable_get(:@sheet)
         
     | 
| 
         @@ -45,8 +45,8 @@ describe DocParser::XLSXOutput do 
     | 
|
| 
       45 
45 
     | 
    
         
             
                  output = DocParser::XLSXOutput.new(filename: filename)
         
     | 
| 
       46 
46 
     | 
    
         
             
                  output.header = 'test', 'the', 'header'
         
     | 
| 
       47 
47 
     | 
    
         
             
                  output.rowcount.must_equal 0
         
     | 
| 
       48 
     | 
    
         
            -
                  output.add_row  
     | 
| 
       49 
     | 
    
         
            -
                  output.add_row  
     | 
| 
      
 48 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
      
 49 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
       50 
50 
     | 
    
         
             
                  output.rowcount.must_equal 2
         
     | 
| 
       51 
51 
     | 
    
         
             
                end
         
     | 
| 
       52 
52 
     | 
    
         
             
              end
         
     | 
| 
         @@ -31,7 +31,7 @@ describe DocParser::YAMLOutput do 
     | 
|
| 
       31 
31 
     | 
    
         
             
                  filename = File.join(dir, 'test.yml')
         
     | 
| 
       32 
32 
     | 
    
         
             
                  output = DocParser::YAMLOutput.new(filename: filename)
         
     | 
| 
       33 
33 
     | 
    
         
             
                  -> do
         
     | 
| 
       34 
     | 
    
         
            -
                    output.add_row  
     | 
| 
      
 34 
     | 
    
         
            +
                    output.add_row %w(aap noot mies)
         
     | 
| 
       35 
35 
     | 
    
         
             
                  end.must_raise(DocParser::MissingHeaderException)
         
     | 
| 
       36 
36 
     | 
    
         
             
                end
         
     | 
| 
       37 
37 
     | 
    
         
             
              end
         
     | 
| 
         @@ -41,9 +41,9 @@ describe DocParser::YAMLOutput do 
     | 
|
| 
       41 
41 
     | 
    
         
             
                  filename = File.join(dir, 'test.csv')
         
     | 
| 
       42 
42 
     | 
    
         
             
                  output = DocParser::YAMLOutput.new(filename: filename)
         
     | 
| 
       43 
43 
     | 
    
         
             
                  output.header = 'test', 'the', 'header'
         
     | 
| 
       44 
     | 
    
         
            -
                  output.add_row  
     | 
| 
       45 
     | 
    
         
            -
                  output.add_row  
     | 
| 
       46 
     | 
    
         
            -
                  output.add_row  
     | 
| 
      
 44 
     | 
    
         
            +
                  output.add_row %w(a b c)
         
     | 
| 
      
 45 
     | 
    
         
            +
                  output.add_row %w(aap noot mies")
         
     | 
| 
      
 46 
     | 
    
         
            +
                  output.add_row %w(aap noot) # testing empty column
         
     | 
| 
       47 
47 
     | 
    
         
             
                  output.close
         
     | 
| 
       48 
48 
     | 
    
         
             
                  open(filename).read.must_equal <<-YAMLEND
         
     | 
| 
       49 
49 
     | 
    
         
             
            ---
         
     | 
| 
         @@ -68,8 +68,8 @@ YAMLEND 
     | 
|
| 
       68 
68 
     | 
    
         
             
                  output = DocParser::YAMLOutput.new(filename: filename)
         
     | 
| 
       69 
69 
     | 
    
         
             
                  output.header = 'test', 'the', 'header'
         
     | 
| 
       70 
70 
     | 
    
         
             
                  output.rowcount.must_equal 0
         
     | 
| 
       71 
     | 
    
         
            -
                  output.add_row  
     | 
| 
       72 
     | 
    
         
            -
                  output.add_row  
     | 
| 
      
 71 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
      
 72 
     | 
    
         
            +
                  output.add_row %w(aap noot mies)
         
     | 
| 
       73 
73 
     | 
    
         
             
                  output.rowcount.must_equal 2
         
     | 
| 
       74 
74 
     | 
    
         
             
                end
         
     | 
| 
       75 
75 
     | 
    
         
             
              end
         
     | 
| 
         @@ -32,7 +32,7 @@ describe DocParser::Output do 
     | 
|
| 
       32 
32 
     | 
    
         
             
                    output.header = 'test', 'the', 'header'
         
     | 
| 
       33 
33 
     | 
    
         
             
                  end
         
     | 
| 
       34 
34 
     | 
    
         
             
                  header = output.instance_variable_get(:@header)
         
     | 
| 
       35 
     | 
    
         
            -
                  header.must_equal  
     | 
| 
      
 35 
     | 
    
         
            +
                  header.must_equal %w(test the header)
         
     | 
| 
       36 
36 
     | 
    
         
             
                  $method_id.must_equal :header
         
     | 
| 
       37 
37 
     | 
    
         
             
                end
         
     | 
| 
       38 
38 
     | 
    
         
             
              end
         
     | 
| 
         @@ -75,7 +75,6 @@ describe DocParser::Output do 
     | 
|
| 
       75 
75 
     | 
    
         
             
              end
         
     | 
| 
       76 
76 
     | 
    
         | 
| 
       77 
77 
     | 
    
         
             
              it 'should raise a NotImplementedError on write_row' do
         
     | 
| 
       78 
     | 
    
         
            -
             
     | 
| 
       79 
78 
     | 
    
         
             
                Dir.mktmpdir do |dir|
         
     | 
| 
       80 
79 
     | 
    
         
             
                  filename = File.join(dir, 'test.csv')
         
     | 
| 
       81 
80 
     | 
    
         
             
                  output = DocParser::Output.new(filename: filename)
         
     | 
| 
         @@ -162,7 +162,6 @@ describe DocParser::Parser do 
     | 
|
| 
       162 
162 
     | 
    
         
             
                mock_output2.verify.must_equal true
         
     | 
| 
       163 
163 
     | 
    
         
             
              end
         
     | 
| 
       164 
164 
     | 
    
         | 
| 
       165 
     | 
    
         
            -
             
     | 
| 
       166 
165 
     | 
    
         
             
              it 'should support parallel processing' do
         
     | 
| 
       167 
166 
     | 
    
         
             
                mock_output = SimpleMock.new DocParser::NilOutput.new
         
     | 
| 
       168 
167 
     | 
    
         
             
                mock_output.expect :close, nil
         
     | 
| 
         @@ -193,5 +192,4 @@ describe DocParser::Parser do 
     | 
|
| 
       193 
192 
     | 
    
         
             
                $method_id.must_equal :fork
         
     | 
| 
       194 
193 
     | 
    
         
             
                mock_output.verify.must_equal true
         
     | 
| 
       195 
194 
     | 
    
         
             
              end
         
     | 
| 
       196 
     | 
    
         
            -
             
     | 
| 
       197 
195 
     | 
    
         
             
            end
         
     | 
    
        data/test/test_helper.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: docparser
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.1. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.1.3
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Jurriaan Pruis
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2013- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2013-05-12 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: nokogiri
         
     | 
| 
         @@ -81,19 +81,19 @@ dependencies: 
     | 
|
| 
       81 
81 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       82 
82 
     | 
    
         
             
                    version: 0.0.3
         
     | 
| 
       83 
83 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       84 
     | 
    
         
            -
              name:  
     | 
| 
      
 84 
     | 
    
         
            +
              name: multi_json
         
     | 
| 
       85 
85 
     | 
    
         
             
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
       86 
86 
     | 
    
         
             
                requirements:
         
     | 
| 
       87 
87 
     | 
    
         
             
                - - ~>
         
     | 
| 
       88 
88 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       89 
     | 
    
         
            -
                    version: 1.7 
     | 
| 
      
 89 
     | 
    
         
            +
                    version: '1.7'
         
     | 
| 
       90 
90 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       91 
91 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       92 
92 
     | 
    
         
             
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
       93 
93 
     | 
    
         
             
                requirements:
         
     | 
| 
       94 
94 
     | 
    
         
             
                - - ~>
         
     | 
| 
       95 
95 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       96 
     | 
    
         
            -
                    version: 1.7 
     | 
| 
      
 96 
     | 
    
         
            +
                    version: '1.7'
         
     | 
| 
       97 
97 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       98 
98 
     | 
    
         
             
              name: log4r
         
     | 
| 
       99 
99 
     | 
    
         
             
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
         @@ -127,7 +127,9 @@ email: 
     | 
|
| 
       127 
127 
     | 
    
         
             
            - email@jurriaanpruis.nl
         
     | 
| 
       128 
128 
     | 
    
         
             
            executables: []
         
     | 
| 
       129 
129 
     | 
    
         
             
            extensions: []
         
     | 
| 
       130 
     | 
    
         
            -
            extra_rdoc_files: 
     | 
| 
      
 130 
     | 
    
         
            +
            extra_rdoc_files:
         
     | 
| 
      
 131 
     | 
    
         
            +
            - README.md
         
     | 
| 
      
 132 
     | 
    
         
            +
            - LICENSE
         
     | 
| 
       131 
133 
     | 
    
         
             
            files:
         
     | 
| 
       132 
134 
     | 
    
         
             
            - .coveralls.yml
         
     | 
| 
       133 
135 
     | 
    
         
             
            - .gitignore
         
     | 
| 
         @@ -153,6 +155,7 @@ files: 
     | 
|
| 
       153 
155 
     | 
    
         
             
            - lib/docparser/output/yaml_output.rb
         
     | 
| 
       154 
156 
     | 
    
         
             
            - lib/docparser/parser.rb
         
     | 
| 
       155 
157 
     | 
    
         
             
            - lib/docparser/version.rb
         
     | 
| 
      
 158 
     | 
    
         
            +
            - test/.rubocop.yml
         
     | 
| 
       156 
159 
     | 
    
         
             
            - test/lib/docparser/blackbox_test.rb
         
     | 
| 
       157 
160 
     | 
    
         
             
            - test/lib/docparser/document_test.rb
         
     | 
| 
       158 
161 
     | 
    
         
             
            - test/lib/docparser/logging_test.rb
         
     | 
| 
         @@ -238,6 +241,7 @@ signing_key: 
     | 
|
| 
       238 
241 
     | 
    
         
             
            specification_version: 4
         
     | 
| 
       239 
242 
     | 
    
         
             
            summary: DocParser is a Ruby Gem for webscraping
         
     | 
| 
       240 
243 
     | 
    
         
             
            test_files:
         
     | 
| 
      
 244 
     | 
    
         
            +
            - test/.rubocop.yml
         
     | 
| 
       241 
245 
     | 
    
         
             
            - test/lib/docparser/blackbox_test.rb
         
     | 
| 
       242 
246 
     | 
    
         
             
            - test/lib/docparser/document_test.rb
         
     | 
| 
       243 
247 
     | 
    
         
             
            - test/lib/docparser/logging_test.rb
         
     |